firefox + llama.cpp == very good prose.
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 202 lines 5.6 kB view raw
1import { API_PARAMS, API_TIMEOUT_MS, DEBUG } from "./config"; 2import type { ApiErrorResponse, ApiChatCompletionStreamChunk, ApiHealthResponse } from "./types/api"; 3 4/** metadata returned by the api after streaming completes. */ 5export interface StreamResult { 6 completionTokens?: number; 7} 8 9/** error thrown when the api call fails for any reason (network, http, malformed response). */ 10export class ApiError extends Error { 11 constructor( 12 message: string, 13 public readonly statusCode?: number, 14 public readonly overrideCause?: unknown, 15 ) { 16 super(message); 17 this.name = "ApiError"; 18 } 19} 20 21/** 22 * streams a text correction request to the local llama.cpp server. 23 * 24 * yields each token as it arrives from the sse stream, enabling progressive 25 * display in the ui without waiting for the full response. 26 * 27 * @param text - validated, non-empty input text 28 * @param systemPrompt - system prompt to instruct the model 29 * @param baseUrl - api server base url (e.g. "http://localhost:8080") 30 * @param result - optional object populated with metadata after streaming completes 31 * @yields individual content tokens from the model's stream 32 * @throws {@link ApiError} on timeout, http errors, or network failures 33 */ 34export async function* streamCorrection( 35 text: string, 36 systemPrompt: string, 37 baseUrl: string, 38 result?: StreamResult, 39): AsyncGenerator<string, void, undefined> { 40 const controller = new AbortController(); 41 const timeout = setTimeout(() => controller.abort(), API_TIMEOUT_MS); 42 43 let response: Response; 44 45 try { 46 response = await fetch(`${baseUrl}/v1/chat/completions`, { 47 method: "POST", 48 headers: { "Content-Type": "application/json" }, 49 body: JSON.stringify({ 50 ...API_PARAMS, 51 stream: true, 52 stream_options: { include_usage: true }, 53 messages: [ 54 { role: "system", content: systemPrompt }, 55 { role: "user", content: text }, 56 ], 57 }), 58 signal: controller.signal, 59 }); 60 } catch (err: unknown) { 61 clearTimeout(timeout); 62 63 if (err instanceof DOMException && err.name === "AbortError") { 64 throw new ApiError( 65 `Request timed out after ${API_TIMEOUT_MS / 1_000} seconds.`, 66 ); 67 } 68 69 throw new ApiError( 70 `Failed to connect to API at ${baseUrl}. Is llama.cpp server running?`, 71 undefined, 72 err, 73 ); 74 } 75 76 /* connection established — clear the connect-timeout */ 77 clearTimeout(timeout); 78 79 /* http status errors */ 80 81 if (!response.ok) { 82 const status = response.status; 83 84 let detail: string; 85 try { 86 const body = (await response.json()) as ApiErrorResponse; 87 detail = body.error?.message ?? response.statusText; 88 } catch { 89 detail = response.statusText; 90 } 91 92 switch (status) { 93 case 404: 94 throw new ApiError( 95 `API endpoint not found (404). Is llama.cpp server running at ${baseUrl}?`, 96 status, 97 ); 98 case 429: 99 throw new ApiError("Rate limited by the API (429). Please wait and try again.", status); 100 case 502: 101 case 503: 102 throw new ApiError( 103 `Server is unavailable (${status}). Check llama.cpp server logs.`, 104 status, 105 ); 106 default: 107 if (status >= 500) { 108 throw new ApiError(`Server error (${status}): ${detail}`, status); 109 } 110 throw new ApiError(`HTTP ${status}: ${detail}`, status); 111 } 112 } 113 114 /* sse stream parsing */ 115 116 const body = response.body; 117 if (!body) { 118 throw new ApiError("Streaming not supported: response body is null."); 119 } 120 121 const reader = body.getReader(); 122 const decoder = new TextDecoder(); 123 let buffer = ""; 124 125 try { 126 while (true) { 127 const { done, value } = await reader.read(); 128 if (done) break; 129 130 buffer += decoder.decode(value, { stream: true }); 131 132 const lines = buffer.split("\n"); 133 buffer = lines.pop()!; 134 135 for (const line of lines) { 136 const trimmed = line.trim(); 137 138 if (!trimmed || trimmed === "data: [DONE]") { 139 continue; 140 } 141 142 if (!trimmed.startsWith("data: ")) { 143 continue; 144 } 145 146 let chunk: ApiChatCompletionStreamChunk; 147 try { 148 chunk = JSON.parse(trimmed.slice(6)); 149 } catch { 150 continue; 151 } 152 153 if (DEBUG) { 154 // eslint-disable-next-line no-console 155 console.log("[shakespeare]", chunk); 156 } 157 158 const token = chunk.choices?.[0]?.delta?.content; 159 if (token) { 160 yield token; 161 } 162 163 if (chunk.usage && result) { 164 result.completionTokens = chunk.usage.completion_tokens; 165 } 166 } 167 } 168 } finally { 169 reader.releaseLock(); 170 } 171} 172 173/** 174 * checks whether the llama.cpp server is reachable and healthy. 175 * 176 * queries the `/v1/health` endpoint and returns `true` if the server 177 * responds with `{ "status": "ok" }`. returns `false` on any network 178 * error, non-200 status, or unexpected response body. 179 * 180 * @param baseUrl - api server base url (e.g. "http://localhost:8080") 181 */ 182export async function checkHealth(baseUrl: string): Promise<boolean> { 183 try { 184 const controller = new AbortController(); 185 const timeout = setTimeout(() => controller.abort(), 5_000); 186 187 const response = await fetch(`${baseUrl}/v1/health`, { 188 signal: controller.signal, 189 }); 190 191 clearTimeout(timeout); 192 193 if (!response.ok) { 194 return false; 195 } 196 197 const body = (await response.json()) as ApiHealthResponse; 198 return body.status === "ok"; 199 } catch { 200 return false; 201 } 202}