···11+import { debug } from 'debug';
22+import { createFallback } from 'ai-fallback';
33+import { generateText } from 'ai';
44+import { createOpenAI } from '@ai-sdk/openai';
55+import { createOllama } from 'ollama-ai-provider';
66+import * as fs from 'node:fs/promises';
77+import * as path from 'node:path';
88+99+import { makeCacheFileHelper } from './path';
1010+1111+const log = debug('llms-txt-gen.rewrite');
1212+1313+const cacheDir = path.join(process.cwd(), '.cache/rewrite');
1414+await fs.mkdir(cacheDir, { recursive: true });
1515+const getCacheFile = makeCacheFileHelper(cacheDir, '.txt');
1616+1717+if (!process.env.OPENAI_API_KEY) throw new Error('Missing OPENAI_API_KEY env var');
1818+if (!process.env.OPENAI_API_URL) throw new Error('Missing OPENAI_API_URL env var');
1919+2020+const SYSTEM_PROMPT = `
2121+Reformat markdown content you're given into an llms-full.txt file, also in markdown format
2222+- Where the format isn't easily understandable by AI, reformat it faithfully to make it processable
2323+- Reformat for an AI and paraphrase where necessary, but don't add interpretations
2424+- Preserve code snippets and keep them in TypeScript or TypeScript typings format
2525+- Avoid using emphasis or excessive markdown syntax, but keep code snippets where they are
2626+- Don't mention other content, pages, or external content (Remove sentences such as "Refer to", "Read more")
2727+- When encountering a markdown table, ensure that you don't output a separate legend, and keep all relevant information in the table
2828+- Don't use any knowledge you may have on the subject. Only output what you're given.
2929+`;
3030+3131+const ai = createOpenAI({
3232+ apiKey: process.env.OPENAI_API_KEY,
3333+ baseURL: process.env.OPENAI_API_URL,
3434+});
3535+3636+const ollama = createOllama({
3737+ baseURL: 'http://localhost:11434/api',
3838+});
3939+4040+export async function rewriteMarkdown(url: URL, input: string) {
4141+ const cacheFile = await getCacheFile(url);
4242+ let content: string;
4343+ try {
4444+ content = await fs.readFile(cacheFile, 'utf-8');
4545+ if (content) {
4646+ log('prompt output from cache', url.pathname);
4747+ return content;
4848+ }
4949+ } catch {}
5050+ log('prompting to rewrite', url.pathname);
5151+ const { text } = await generateText({
5252+ model: createFallback({
5353+ models: [
5454+ ollama('gemma:7b'),
5555+ ai('@hf/google/gemma-7b-it'),
5656+ ],
5757+ onError(error, modelId) {
5858+ log(`error using model ${modelId}`, error);
5959+ },
6060+ }),
6161+ system: SYSTEM_PROMPT.trim(),
6262+ prompt: input,
6363+ });
6464+ await fs.writeFile(cacheFile, text, 'utf-8');
6565+ return text;
6666+}
+11-2
src/unified.ts
···5858 parent.children.splice(index, 1);
5959 if (node.children.length > 1 || !child || child.type !== 'text')
6060 return;
6161- switch (child.value.trim()) {
6161+ const value = child.value.trim();
6262+ switch (value) {
6263 case 'Example':
6364 case 'Remarks':
6465 case 'Note':
···7172 } else if (node.type === 'text') {
7273 if (!parent || parent.type !== 'paragraph' || parent.children.length > 1)
7374 return;
7474- switch (node.value.trim()) {
7575+ const value = node.value.trim();
7676+ if (
7777+ value.startsWith('Last updated on ') ||
7878+ value.startsWith('Copyright ')
7979+ ) {
8080+ parent.children.splice(index, 1);
8181+ return;
8282+ }
8383+ switch (value) {
7584 case 'Loading...':
7685 case 'Caution':
7786 case 'tsx':