firefox + llama.cpp == very good prose.
1import { API_PARAMS, API_TIMEOUT_MS, DEBUG } from "./config";
2import type { ApiErrorResponse, ApiChatCompletionStreamChunk, ApiHealthResponse } from "./types/api";
3
4/** metadata returned by the api after streaming completes. */
5export interface StreamResult {
6 completionTokens?: number;
7}
8
9/** error thrown when the api call fails for any reason (network, http, malformed response). */
10export class ApiError extends Error {
11 constructor(
12 message: string,
13 public readonly statusCode?: number,
14 public readonly overrideCause?: unknown,
15 ) {
16 super(message);
17 this.name = "ApiError";
18 }
19}
20
21/**
22 * streams a text correction request to the local llama.cpp server.
23 *
24 * yields each token as it arrives from the sse stream, enabling progressive
25 * display in the ui without waiting for the full response.
26 *
27 * @param text - validated, non-empty input text
28 * @param systemPrompt - system prompt to instruct the model
29 * @param baseUrl - api server base url (e.g. "http://localhost:8080")
30 * @param result - optional object populated with metadata after streaming completes
31 * @yields individual content tokens from the model's stream
32 * @throws {@link ApiError} on timeout, http errors, or network failures
33 */
34export async function* streamCorrection(
35 text: string,
36 systemPrompt: string,
37 baseUrl: string,
38 result?: StreamResult,
39): AsyncGenerator<string, void, undefined> {
40 const controller = new AbortController();
41 const timeout = setTimeout(() => controller.abort(), API_TIMEOUT_MS);
42
43 let response: Response;
44
45 try {
46 response = await fetch(`${baseUrl}/v1/chat/completions`, {
47 method: "POST",
48 headers: { "Content-Type": "application/json" },
49 body: JSON.stringify({
50 ...API_PARAMS,
51 stream: true,
52 stream_options: { include_usage: true },
53 messages: [
54 { role: "system", content: systemPrompt },
55 { role: "user", content: text },
56 ],
57 }),
58 signal: controller.signal,
59 });
60 } catch (err: unknown) {
61 clearTimeout(timeout);
62
63 if (err instanceof DOMException && err.name === "AbortError") {
64 throw new ApiError(
65 `Request timed out after ${API_TIMEOUT_MS / 1_000} seconds.`,
66 );
67 }
68
69 throw new ApiError(
70 `Failed to connect to API at ${baseUrl}. Is llama.cpp server running?`,
71 undefined,
72 err,
73 );
74 }
75
76 /* connection established — clear the connect-timeout */
77 clearTimeout(timeout);
78
79 /* http status errors */
80
81 if (!response.ok) {
82 const status = response.status;
83
84 let detail: string;
85 try {
86 const body = (await response.json()) as ApiErrorResponse;
87 detail = body.error?.message ?? response.statusText;
88 } catch {
89 detail = response.statusText;
90 }
91
92 switch (status) {
93 case 404:
94 throw new ApiError(
95 `API endpoint not found (404). Is llama.cpp server running at ${baseUrl}?`,
96 status,
97 );
98 case 429:
99 throw new ApiError("Rate limited by the API (429). Please wait and try again.", status);
100 case 502:
101 case 503:
102 throw new ApiError(
103 `Server is unavailable (${status}). Check llama.cpp server logs.`,
104 status,
105 );
106 default:
107 if (status >= 500) {
108 throw new ApiError(`Server error (${status}): ${detail}`, status);
109 }
110 throw new ApiError(`HTTP ${status}: ${detail}`, status);
111 }
112 }
113
114 /* sse stream parsing */
115
116 const body = response.body;
117 if (!body) {
118 throw new ApiError("Streaming not supported: response body is null.");
119 }
120
121 const reader = body.getReader();
122 const decoder = new TextDecoder();
123 let buffer = "";
124
125 try {
126 while (true) {
127 const { done, value } = await reader.read();
128 if (done) break;
129
130 buffer += decoder.decode(value, { stream: true });
131
132 const lines = buffer.split("\n");
133 buffer = lines.pop()!;
134
135 for (const line of lines) {
136 const trimmed = line.trim();
137
138 if (!trimmed || trimmed === "data: [DONE]") {
139 continue;
140 }
141
142 if (!trimmed.startsWith("data: ")) {
143 continue;
144 }
145
146 let chunk: ApiChatCompletionStreamChunk;
147 try {
148 chunk = JSON.parse(trimmed.slice(6));
149 } catch {
150 continue;
151 }
152
153 if (DEBUG) {
154 // eslint-disable-next-line no-console
155 console.log("[shakespeare]", chunk);
156 }
157
158 const token = chunk.choices?.[0]?.delta?.content;
159 if (token) {
160 yield token;
161 }
162
163 if (chunk.usage && result) {
164 result.completionTokens = chunk.usage.completion_tokens;
165 }
166 }
167 }
168 } finally {
169 reader.releaseLock();
170 }
171}
172
173/**
174 * checks whether the llama.cpp server is reachable and healthy.
175 *
176 * queries the `/v1/health` endpoint and returns `true` if the server
177 * responds with `{ "status": "ok" }`. returns `false` on any network
178 * error, non-200 status, or unexpected response body.
179 *
180 * @param baseUrl - api server base url (e.g. "http://localhost:8080")
181 */
182export async function checkHealth(baseUrl: string): Promise<boolean> {
183 try {
184 const controller = new AbortController();
185 const timeout = setTimeout(() => controller.abort(), 5_000);
186
187 const response = await fetch(`${baseUrl}/v1/health`, {
188 signal: controller.signal,
189 });
190
191 clearTimeout(timeout);
192
193 if (!response.ok) {
194 return false;
195 }
196
197 const body = (await response.json()) as ApiHealthResponse;
198 return body.status === "ok";
199 } catch {
200 return false;
201 }
202}