refactor(ai-provider): extract BaseAIProvider, add response schemas and forConfig()

··· 13 13 }, 14 14 "dependencies": { 15 15 "@nestjs/common": "^11.1.3", 16 16 - "@nestjs/config": "^4.0.2" 16 16 + "@nestjs/config": "^4.0.2", 17 17 + "zod": "^4.3.6" 17 18 }, 18 19 "peerDependencies": { 19 20 "@nestjs/common": "^11.0.0",

+17 -2

packages/ai-provider/src/ai-provider.registry.ts

reviewed

··· 6 6 const registry = new Map<string, ProviderFactory>(); 7 7 8 8 /** 9 9 - * Register an AI provider factory. Call this at module load time 10 10 - * (e.g., at the bottom of each provider file). 9 9 + * Register an AI provider factory. Prefer the `@AIProviderRegistration` 10 10 + * class decorator for built-in providers; use this for programmatic registration. 11 11 */ 12 12 export const registerAIProvider = ( 13 13 type: string, ··· 15 15 ): void => { 16 16 registry.set(type, factory); 17 17 }; 18 18 + 19 19 + interface AIProviderStatic { 20 20 + new (...args: any[]): AIProvider; 21 21 + fromConfigService(configService: ConfigService): AIProvider; 22 22 + } 23 23 + 24 24 + /** 25 25 + * Class decorator that auto-registers an AI provider. 26 26 + * The decorated class must have a static `fromConfigService` method. 27 27 + */ 28 28 + export const AIProviderRegistration = (type: string) => 29 29 + <T extends AIProviderStatic>(target: T): T => { 30 30 + registry.set(type, (cs) => target.fromConfigService(cs)); 31 31 + return target; 32 32 + }; 18 33 19 34 /** 20 35 * Resolve a registered AI provider by type.

+22

packages/ai-provider/src/ai.module.ts

reviewed

··· 30 30 exports: [AI_PROVIDER], 31 31 }; 32 32 } 33 33 + 34 34 + /** 35 35 + * Resolve the AI provider type from ConfigService (AI_PROVIDER env var) 36 36 + * at factory time, avoiding raw process.env assertions at the call site. 37 37 + */ 38 38 + static forConfig(): DynamicModule { 39 39 + return { 40 40 + module: AIModule, 41 41 + imports: [ConfigModule], 42 42 + providers: [ 43 43 + { 44 44 + provide: AI_PROVIDER, 45 45 + inject: [ConfigService], 46 46 + useFactory: (configService: ConfigService): AIProvider => { 47 47 + const type = configService.get<AIProviderType>('AI_PROVIDER', 'llama-cpp'); 48 48 + return resolveAIProvider(type, configService); 49 49 + }, 50 50 + }, 51 51 + ], 52 52 + exports: [AI_PROVIDER], 53 53 + }; 54 54 + } 33 55 }

+8 -1

packages/ai-provider/src/index.ts

reviewed

··· 2 2 export type { 3 3 AIProvider, 4 4 AIProviderConfig, 5 5 + AIProviderStatus, 6 6 + AIProviderStatusDetails, 7 7 + LlamaCppStatusDetails, 8 8 + ApiProviderStatusDetails, 5 9 AICompletionRequest, 6 10 AICompletionResponse, 7 11 } from './types'; 8 12 13 13 + // Base class 14 14 + export { BaseAIProvider, type RequestOptions } from './providers/base.provider'; 15 15 + 9 16 // Providers 10 17 export { 11 18 LlamaCppProvider, type LlamaCppConfig, ··· 14 21 } from './providers'; 15 22 16 23 // Registry 17 17 - export { registerAIProvider, resolveAIProvider, registeredProviderTypes } from './ai-provider.registry'; 24 24 + export { registerAIProvider, AIProviderRegistration, resolveAIProvider, registeredProviderTypes } from './ai-provider.registry'; 18 25 19 26 // NestJS Module 20 27 export { AIModule, AI_PROVIDER, type AIModuleOptions, type AIProviderType } from './ai.module';

+67 -112

packages/ai-provider/src/providers/anthropic.provider.ts

reviewed

··· 1 1 - import type { ConfigService } from '@nestjs/config'; 2 2 - import { registerAIProvider } from '../ai-provider.registry'; 1 1 + import type { ConfigService } from "@nestjs/config"; 2 2 + import { AIProviderRegistration } from "../ai-provider.registry"; 3 3 + import { anthropicResponseSchema } from "../response-schemas"; 3 4 import type { 4 4 - AIProvider, 5 5 - AIProviderConfig, 6 5 AICompletionRequest, 7 6 AICompletionResponse, 8 8 - } from '../types'; 7 7 + AIProviderConfig, 8 8 + } from "../types"; 9 9 + import { BaseAIProvider } from "./base.provider"; 9 10 10 10 - export interface AnthropicConfig extends AIProviderConfig { 11 11 - model?: string; 12 12 - } 11 11 + export interface AnthropicConfig extends AIProviderConfig {} 13 12 14 13 /** 15 14 * AI provider implementation for the Anthropic Messages API. 16 15 */ 17 17 - export class AnthropicProvider implements AIProvider { 18 18 - readonly name = 'anthropic'; 19 19 - 20 20 - private readonly baseUrl: string; 21 21 - private readonly apiKey: string; 22 22 - private readonly model: string; 23 23 - private readonly defaultTemperature: number; 24 24 - private readonly defaultMaxTokens: number; 25 25 - private readonly timeout: number; 16 16 + @AIProviderRegistration("anthropic") 17 17 + export class AnthropicProvider extends BaseAIProvider { 18 18 + readonly name = "anthropic"; 26 19 27 20 constructor(config: AnthropicConfig) { 28 28 - this.baseUrl = config.baseUrl.replace(/\/$/, ''); 29 29 - this.apiKey = config.apiKey ?? ''; 30 30 - this.model = config.model ?? 'claude-sonnet-4-5-20250929'; 31 31 - this.defaultTemperature = config.defaultTemperature ?? 0.1; 32 32 - this.defaultMaxTokens = config.defaultMaxTokens ?? 2048; 33 33 - this.timeout = config.timeout ?? 60000; 21 21 + super({ ...config, model: config.model ?? "claude-sonnet-4-5-20250929" }); 34 22 } 35 23 36 24 async complete(request: AICompletionRequest): Promise<AICompletionResponse> { 37 37 - const controller = new AbortController(); 38 38 - const timeoutId = setTimeout(() => controller.abort(), this.timeout); 25 25 + const body: Record<string, unknown> = { 26 26 + model: this.model, 27 27 + max_tokens: request.maxTokens ?? this.defaultMaxTokens, 28 28 + temperature: request.temperature ?? this.defaultTemperature, 29 29 + messages: [{ role: "user", content: request.prompt }], 30 30 + }; 39 31 40 40 - try { 41 41 - const body: Record<string, unknown> = { 42 42 - model: this.model, 43 43 - max_tokens: request.maxTokens ?? this.defaultMaxTokens, 44 44 - temperature: request.temperature ?? this.defaultTemperature, 45 45 - messages: [{ role: 'user', content: request.prompt }], 46 46 - }; 32 32 + if (request.systemPrompt) body["system"] = request.systemPrompt; 33 33 + if (request.stopSequences) body["stop_sequences"] = request.stopSequences; 47 34 48 48 - if (request.systemPrompt) { 49 49 - body.system = request.systemPrompt; 50 50 - } 51 51 - if (request.stopSequences) { 52 52 - body.stop_sequences = request.stopSequences; 53 53 - } 35 35 + const result = await this.request("/v1/messages", { 36 36 + headers: { 37 37 + "x-api-key": this.apiKey, 38 38 + "anthropic-version": "2023-06-01", 39 39 + }, 40 40 + body, 41 41 + schema: anthropicResponseSchema, 42 42 + }); 54 43 55 55 - const response = await fetch(`${this.baseUrl}/v1/messages`, { 56 56 - method: 'POST', 57 57 - headers: { 58 58 - 'Content-Type': 'application/json', 59 59 - 'x-api-key': this.apiKey, 60 60 - 'anthropic-version': '2023-06-01', 61 61 - }, 62 62 - body: JSON.stringify(body), 63 63 - signal: controller.signal, 64 64 - }); 44 44 + const stopReasonMap: Record< 45 45 + string, 46 46 + AICompletionResponse["finishReason"] 47 47 + > = { 48 48 + end_turn: "stop", 49 49 + stop_sequence: "stop", 50 50 + max_tokens: "length", 51 51 + }; 65 52 66 66 - if (!response.ok) { 67 67 - const errorBody = await response.text(); 68 68 - throw new Error(`Anthropic API error: ${response.status} ${errorBody}`); 69 69 - } 53 53 + const textBlock = result.content?.find(({ type }) => type === "text"); 70 54 71 71 - const result = (await response.json()) as { 72 72 - content?: Array<{ type: string; text?: string }>; 73 73 - usage?: { 74 74 - input_tokens?: number; 75 75 - output_tokens?: number; 76 76 - }; 77 77 - model?: string; 78 78 - stop_reason?: string; 79 79 - }; 80 80 - 81 81 - const stopReasonMap: Record<string, AICompletionResponse['finishReason']> = { 82 82 - end_turn: 'stop', 83 83 - stop_sequence: 'stop', 84 84 - max_tokens: 'length', 85 85 - }; 86 86 - 87 87 - const textBlock = result.content?.find((b) => b.type === 'text'); 88 88 - 89 89 - return { 90 90 - content: textBlock?.text ?? '', 91 91 - promptTokens: result.usage?.input_tokens, 92 92 - completionTokens: result.usage?.output_tokens, 93 93 - model: result.model ?? this.model, 94 94 - finishReason: stopReasonMap[result.stop_reason ?? ''] ?? 'stop', 95 95 - }; 96 96 - } finally { 97 97 - clearTimeout(timeoutId); 98 98 - } 55 55 + return { 56 56 + content: textBlock?.text ?? "", 57 57 + promptTokens: result.usage?.input_tokens, 58 58 + completionTokens: result.usage?.output_tokens, 59 59 + model: result.model ?? this.model, 60 60 + finishReason: stopReasonMap[result.stop_reason ?? ""] ?? "stop", 61 61 + }; 99 62 } 100 63 101 64 async isHealthy(): Promise<boolean> { 102 102 - try { 103 103 - const controller = new AbortController(); 104 104 - const timeoutId = setTimeout(() => controller.abort(), 10000); 105 105 - 106 106 - const response = await fetch(`${this.baseUrl}/v1/messages`, { 107 107 - method: 'POST', 108 108 - headers: { 109 109 - 'Content-Type': 'application/json', 110 110 - 'x-api-key': this.apiKey, 111 111 - 'anthropic-version': '2023-06-01', 112 112 - }, 113 113 - body: JSON.stringify({ 114 114 - model: this.model, 115 115 - max_tokens: 1, 116 116 - messages: [{ role: 'user', content: 'ping' }], 117 117 - }), 118 118 - signal: controller.signal, 119 119 - }); 120 120 - 121 121 - clearTimeout(timeoutId); 122 122 - return response.ok; 123 123 - } catch { 124 124 - return false; 125 125 - } 65 65 + return this.healthCheck("/v1/messages", { 66 66 + headers: { 67 67 + "x-api-key": this.apiKey, 68 68 + "anthropic-version": "2023-06-01", 69 69 + }, 70 70 + body: { 71 71 + model: this.model, 72 72 + max_tokens: 1, 73 73 + messages: [{ role: "user", content: "ping" }], 74 74 + }, 75 75 + timeout: 10000, 76 76 + }); 126 77 } 127 78 128 79 static fromConfigService(configService: ConfigService): AnthropicProvider { 129 80 return new AnthropicProvider({ 130 130 - baseUrl: configService.get<string>('ANTHROPIC_BASE_URL', 'https://api.anthropic.com'), 131 131 - apiKey: configService.get<string>('ANTHROPIC_API_KEY', ''), 132 132 - model: configService.get<string>('ANTHROPIC_MODEL', 'claude-sonnet-4-5-20250929'), 133 133 - defaultTemperature: configService.get<number>('AI_TEMPERATURE', 0.1), 134 134 - defaultMaxTokens: configService.get<number>('AI_MAX_TOKENS', 2048), 135 135 - timeout: configService.get<number>('AI_TIMEOUT', 60000), 81 81 + baseUrl: configService.get<string>( 82 82 + "ANTHROPIC_BASE_URL", 83 83 + "https://api.anthropic.com", 84 84 + ), 85 85 + apiKey: configService.get<string>("ANTHROPIC_API_KEY", ""), 86 86 + model: configService.get<string>( 87 87 + "ANTHROPIC_MODEL", 88 88 + "claude-sonnet-4-5-20250929", 89 89 + ), 90 90 + defaultTemperature: configService.get<number>("AI_TEMPERATURE", 0.1), 91 91 + defaultMaxTokens: configService.get<number>("AI_MAX_TOKENS", 2048), 92 92 + timeout: configService.get<number>("AI_TIMEOUT", 60000), 136 93 }); 137 94 } 138 95 } 139 139 - 140 140 - registerAIProvider('anthropic', (cs) => AnthropicProvider.fromConfigService(cs));

+119

packages/ai-provider/src/providers/base.provider.ts

reviewed

··· 1 1 + import type { z } from "zod/v4"; 2 2 + import type { 3 3 + AICompletionRequest, 4 4 + AICompletionResponse, 5 5 + AIProvider, 6 6 + AIProviderConfig, 7 7 + AIProviderStatus, 8 8 + } from "../types"; 9 9 + 10 10 + /** 11 11 + * Options for the `request()` helper. 12 12 + */ 13 13 + export interface RequestOptions<T = unknown> { 14 14 + method?: string; 15 15 + headers?: Record<string, string>; 16 16 + body?: Record<string, unknown>; 17 17 + schema?: z.ZodType<T>; 18 18 + timeout?: number; 19 19 + } 20 20 + 21 21 + /** 22 22 + * Abstract base class for AI providers. 23 23 + * Handles config normalization, fetch boilerplate (timeout, abort, error handling), 24 24 + * and a default `getStatus()` for API-based providers. 25 25 + */ 26 26 + export abstract class BaseAIProvider implements AIProvider { 27 27 + abstract readonly name: string; 28 28 + 29 29 + protected readonly baseUrl: string; 30 30 + protected readonly apiKey: string; 31 31 + protected readonly model: string; 32 32 + protected readonly defaultTemperature: number; 33 33 + protected readonly defaultMaxTokens: number; 34 34 + protected readonly timeout: number; 35 35 + 36 36 + constructor(config: AIProviderConfig) { 37 37 + this.baseUrl = config.baseUrl.replace(/\/$/, ""); 38 38 + this.apiKey = config.apiKey ?? ""; 39 39 + this.model = config.model ?? ""; 40 40 + this.defaultTemperature = config.defaultTemperature ?? 0.1; 41 41 + this.defaultMaxTokens = config.defaultMaxTokens ?? 2048; 42 42 + this.timeout = config.timeout ?? 60000; 43 43 + } 44 44 + 45 45 + abstract complete( 46 46 + request: AICompletionRequest, 47 47 + ): Promise<AICompletionResponse>; 48 48 + 49 49 + abstract isHealthy(): Promise<boolean>; 50 50 + 51 51 + /** 52 52 + * Default status implementation for API-based providers. 53 53 + * Override for providers with richer status endpoints (e.g. llama.cpp). 54 54 + */ 55 55 + async getStatus(): Promise<AIProviderStatus> { 56 56 + const healthy = await this.isHealthy(); 57 57 + return { 58 58 + healthy, 59 59 + providerName: this.name, 60 60 + details: { kind: "api", model: this.model, baseUrl: this.baseUrl }, 61 61 + }; 62 62 + } 63 63 + 64 64 + /** 65 65 + * Make a JSON request to the provider API. 66 66 + * Handles timeout via AbortController, Content-Type for bodies, 67 67 + * error responses, and optional Zod schema parsing. 68 68 + */ 69 69 + protected async request<T = unknown>( 70 70 + path: string, 71 71 + options?: RequestOptions<T>, 72 72 + ): Promise<T> { 73 73 + const controller = new AbortController(); 74 74 + const timeoutMs = options?.timeout ?? this.timeout; 75 75 + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); 76 76 + 77 77 + try { 78 78 + const hasBody = options?.body != null; 79 79 + const headers: Record<string, string> = hasBody 80 80 + ? { "Content-Type": "application/json", ...options?.headers } 81 81 + : { ...options?.headers }; 82 82 + 83 83 + const response = await fetch(`${this.baseUrl}${path}`, { 84 84 + method: options?.method ?? (hasBody ? "POST" : "GET"), 85 85 + headers, 86 86 + ...(hasBody ? { body: JSON.stringify(options!.body) } : {}), 87 87 + signal: controller.signal, 88 88 + }); 89 89 + 90 90 + if (!response.ok) { 91 91 + const errorBody = await response.text(); 92 92 + throw new Error( 93 93 + `${this.name} API error: ${response.status} ${errorBody}`, 94 94 + ); 95 95 + } 96 96 + 97 97 + const json = await response.json(); 98 98 + return options?.schema ? options.schema.parse(json) : json; 99 99 + } finally { 100 100 + clearTimeout(timeoutId); 101 101 + } 102 102 + } 103 103 + 104 104 + /** 105 105 + * Convenience wrapper around `request()` that returns `true` on success 106 106 + * and `false` on any error. Default timeout is 5 s. 107 107 + */ 108 108 + protected async healthCheck( 109 109 + path: string, 110 110 + options?: Omit<RequestOptions, "schema">, 111 111 + ): Promise<boolean> { 112 112 + try { 113 113 + await this.request(path, { timeout: 5000, ...options }); 114 114 + return true; 115 115 + } catch { 116 116 + return false; 117 117 + } 118 118 + } 119 119 + }

packages/ai-provider/src/providers/index.ts

reviewed

··· 1 1 + export { BaseAIProvider, type RequestOptions } from './base.provider'; 1 2 export { LlamaCppProvider, type LlamaCppConfig } from './llama-cpp.provider'; 2 3 export { OpenAIProvider, type OpenAIConfig } from './openai.provider'; 3 4 export { AnthropicProvider, type AnthropicConfig } from './anthropic.provider';

+2 -2

packages/ai-provider/src/providers/llama-cpp.provider.integration.test.ts

reviewed

··· 6 6 7 7 beforeAll(() => { 8 8 provider = new LlamaCppProvider({ 9 9 - baseUrl: process.env.LLAMA_URL || 'http://localhost:8080', 9 9 + baseUrl: process.env['LLAMA_URL'] || 'http://localhost:8080', 10 10 defaultTemperature: 0.1, 11 11 defaultMaxTokens: 100, 12 12 timeout: 10000, // 10 second timeout for tests ··· 72 72 describe('Error Handling', () => { 73 73 it('should timeout for requests exceeding configured timeout', async () => { 74 74 const slowProvider = new LlamaCppProvider({ 75 75 - baseUrl: process.env.LLAMA_URL || 'http://localhost:8080', 75 75 + baseUrl: process.env['LLAMA_URL'] || 'http://localhost:8080', 76 76 timeout: 1000, // 1 second timeout 77 77 }); 78 78

+57 -83

packages/ai-provider/src/providers/llama-cpp.provider.ts

reviewed

··· 1 1 - import type { ConfigService } from '@nestjs/config'; 2 2 - import { registerAIProvider } from '../ai-provider.registry'; 1 1 + import type { ConfigService } from "@nestjs/config"; 2 2 + import { AIProviderRegistration } from "../ai-provider.registry"; 3 3 + import { openaiResponseSchema } from "../response-schemas"; 3 4 import type { 4 4 - AIProvider, 5 5 - AIProviderConfig, 6 5 AICompletionRequest, 7 6 AICompletionResponse, 8 8 - } from '../types'; 7 7 + AIProviderConfig, 8 8 + AIProviderStatus, 9 9 + } from "../types"; 10 10 + import { BaseAIProvider } from "./base.provider"; 9 11 10 12 /** 11 13 * Configuration specific to llama.cpp server ··· 19 21 * AI provider implementation for llama.cpp server 20 22 * https://github.com/ggml-org/llama.cpp 21 23 */ 22 22 - export class LlamaCppProvider implements AIProvider { 23 23 - readonly name = 'llama-cpp'; 24 24 - 25 25 - private baseUrl: string; 26 26 - private defaultTemperature: number; 27 27 - private defaultMaxTokens: number; 28 28 - private timeout: number; 29 29 - 30 30 - constructor(config: LlamaCppConfig) { 31 31 - this.baseUrl = config.baseUrl.replace(/\/$/, ''); 32 32 - this.defaultTemperature = config.defaultTemperature ?? 0.1; 33 33 - this.defaultMaxTokens = config.defaultMaxTokens ?? 2048; 34 34 - this.timeout = config.timeout ?? 60000; 35 35 - } 24 24 + @AIProviderRegistration("llama-cpp") 25 25 + export class LlamaCppProvider extends BaseAIProvider { 26 26 + readonly name = "llama-cpp"; 36 27 37 28 async complete(request: AICompletionRequest): Promise<AICompletionResponse> { 38 38 - const prompt = request.systemPrompt 39 39 - ? `${request.systemPrompt}\n\n${request.prompt}` 40 40 - : request.prompt; 41 41 - 42 42 - const controller = new AbortController(); 43 43 - const timeoutId = setTimeout(() => controller.abort(), this.timeout); 44 44 - 45 45 - try { 46 46 - const fetchResponse = await fetch(`${this.baseUrl}/completion`, { 47 47 - method: 'POST', 48 48 - headers: { 'Content-Type': 'application/json' }, 49 49 - body: JSON.stringify({ 50 50 - prompt, 51 51 - temperature: request.temperature ?? this.defaultTemperature, 52 52 - n_predict: request.maxTokens ?? this.defaultMaxTokens, 53 53 - stop: request.stopSequences ?? ['</s>'], 54 54 - }), 55 55 - signal: controller.signal, 56 56 - }); 29 29 + const messages: Array<{ role: string; content: string }> = []; 30 30 + if (request.systemPrompt) 31 31 + messages.push({ role: "system", content: request.systemPrompt }); 32 32 + messages.push({ role: "user", content: request.prompt }); 57 33 58 58 - if (!fetchResponse.ok) { 59 59 - throw new Error( 60 60 - `llama.cpp API error: ${fetchResponse.status} ${fetchResponse.statusText}` 61 61 - ); 62 62 - } 63 63 - 64 64 - const result = (await fetchResponse.json()) as { 65 65 - content?: string; 66 66 - tokens_predicted?: number; 67 67 - tokens_evaluated?: number; 68 68 - stop_type?: string; 69 69 - }; 70 70 - 71 71 - const aiResponse: AICompletionResponse = { 72 72 - content: result.content ?? '', 73 73 - model: 'llama-cpp-local', 74 74 - finishReason: result.stop_type === 'word' ? 'stop' : 'length', 75 75 - }; 34 34 + const result = await this.request("/v1/chat/completions", { 35 35 + body: { 36 36 + messages, 37 37 + temperature: request.temperature ?? this.defaultTemperature, 38 38 + max_tokens: request.maxTokens ?? this.defaultMaxTokens, 39 39 + stop: request.stopSequences ?? ["</s>"], 40 40 + }, 41 41 + schema: openaiResponseSchema, 42 42 + }); 76 43 77 77 - if (result.tokens_predicted !== undefined) { 78 78 - aiResponse.completionTokens = result.tokens_predicted; 79 79 - } 80 80 - if (result.tokens_evaluated !== undefined) { 81 81 - aiResponse.promptTokens = result.tokens_evaluated; 82 82 - } 44 44 + const choice = result.choices?.[0]; 83 45 84 84 - return aiResponse; 85 85 - } finally { 86 86 - clearTimeout(timeoutId); 87 87 - } 46 46 + return { 47 47 + content: choice?.message?.content ?? "", 48 48 + promptTokens: result.usage?.prompt_tokens, 49 49 + completionTokens: result.usage?.completion_tokens, 50 50 + model: result.model ?? "llama-cpp-local", 51 51 + finishReason: choice?.finish_reason === "length" ? "length" : "stop", 52 52 + }; 88 53 } 89 54 90 55 async isHealthy(): Promise<boolean> { 91 91 - try { 92 92 - const controller = new AbortController(); 93 93 - const timeoutId = setTimeout(() => controller.abort(), 5000); 56 56 + return this.healthCheck("/health"); 57 57 + } 94 58 95 95 - const response = await fetch(`${this.baseUrl}/health`, { 96 96 - signal: controller.signal, 97 97 - }); 59 59 + override async getStatus(): Promise<AIProviderStatus> { 60 60 + const fetchOrNull = <T>(path: string): Promise<T | null> => 61 61 + this.request<T>(path, { timeout: 5000 }).catch(() => null); 98 62 99 99 - clearTimeout(timeoutId); 100 100 - return response.ok; 101 101 - } catch { 102 102 - return false; 103 103 - } 63 63 + const [health, props, slots] = await Promise.all([ 64 64 + fetchOrNull<Record<string, unknown>>("/health"), 65 65 + fetchOrNull<Record<string, unknown>>("/props"), 66 66 + fetchOrNull<Record<string, unknown>[]>("/slots"), 67 67 + ]); 68 68 + 69 69 + return { 70 70 + healthy: health != null && health["status"] === "ok", 71 71 + providerName: this.name, 72 72 + details: { 73 73 + kind: "llama-cpp", 74 74 + health, 75 75 + model: props, 76 76 + slots: Array.isArray(slots) ? slots : null, 77 77 + baseUrl: this.baseUrl, 78 78 + }, 79 79 + }; 104 80 } 105 81 106 82 static fromConfigService(configService: ConfigService): LlamaCppProvider { 107 83 return new LlamaCppProvider({ 108 108 - baseUrl: configService.get<string>('LLAMA_URL', 'http://llama:8080'), 109 109 - defaultTemperature: configService.get<number>('AI_TEMPERATURE', 0.1), 110 110 - defaultMaxTokens: configService.get<number>('AI_MAX_TOKENS', 2048), 111 111 - timeout: configService.get<number>('AI_TIMEOUT', 60000), 84 84 + baseUrl: configService.get<string>("LLAMA_URL", "http://llama:8080"), 85 85 + defaultTemperature: configService.get<number>("AI_TEMPERATURE", 0.1), 86 86 + defaultMaxTokens: configService.get<number>("AI_MAX_TOKENS", 2048), 87 87 + timeout: configService.get<number>("AI_TIMEOUT", 60000), 112 88 }); 113 89 } 114 90 } 115 115 - 116 116 - registerAIProvider('llama-cpp', (cs) => LlamaCppProvider.fromConfigService(cs));

+53 -102

packages/ai-provider/src/providers/openai.provider.ts

reviewed

··· 1 1 - import type { ConfigService } from '@nestjs/config'; 2 2 - import { registerAIProvider } from '../ai-provider.registry'; 1 1 + import type { ConfigService } from "@nestjs/config"; 2 2 + import { AIProviderRegistration } from "../ai-provider.registry"; 3 3 + import { openaiResponseSchema } from "../response-schemas"; 3 4 import type { 4 4 - AIProvider, 5 5 - AIProviderConfig, 6 5 AICompletionRequest, 7 6 AICompletionResponse, 8 8 - } from '../types'; 7 7 + AIProviderConfig, 8 8 + } from "../types"; 9 9 + import { BaseAIProvider } from "./base.provider"; 9 10 10 10 - export interface OpenAIConfig extends AIProviderConfig { 11 11 - model?: string; 12 12 - } 11 11 + export interface OpenAIConfig extends AIProviderConfig {} 13 12 14 13 /** 15 14 * AI provider implementation for OpenAI-compatible APIs. 16 15 * Works with OpenAI, Azure OpenAI, and any OpenAI-compatible endpoint. 17 16 */ 18 18 - export class OpenAIProvider implements AIProvider { 19 19 - readonly name = 'openai'; 20 20 - 21 21 - private readonly baseUrl: string; 22 22 - private readonly apiKey: string; 23 23 - private readonly model: string; 24 24 - private readonly defaultTemperature: number; 25 25 - private readonly defaultMaxTokens: number; 26 26 - private readonly timeout: number; 17 17 + @AIProviderRegistration("openai") 18 18 + export class OpenAIProvider extends BaseAIProvider { 19 19 + readonly name = "openai"; 27 20 28 21 constructor(config: OpenAIConfig) { 29 29 - this.baseUrl = config.baseUrl.replace(/\/$/, ''); 30 30 - this.apiKey = config.apiKey ?? ''; 31 31 - this.model = config.model ?? 'gpt-4o-mini'; 32 32 - this.defaultTemperature = config.defaultTemperature ?? 0.1; 33 33 - this.defaultMaxTokens = config.defaultMaxTokens ?? 2048; 34 34 - this.timeout = config.timeout ?? 60000; 22 22 + super({ ...config, model: config.model ?? "gpt-4o-mini" }); 35 23 } 36 24 37 25 async complete(request: AICompletionRequest): Promise<AICompletionResponse> { 38 26 const messages: Array<{ role: string; content: string }> = []; 27 27 + if (request.systemPrompt) 28 28 + messages.push({ role: "system", content: request.systemPrompt }); 29 29 + messages.push({ role: "user", content: request.prompt }); 39 30 40 40 - if (request.systemPrompt) { 41 41 - messages.push({ role: 'system', content: request.systemPrompt }); 42 42 - } 43 43 - messages.push({ role: 'user', content: request.prompt }); 31 31 + const result = await this.request("/v1/chat/completions", { 32 32 + headers: { Authorization: `Bearer ${this.apiKey}` }, 33 33 + body: { 34 34 + model: this.model, 35 35 + messages, 36 36 + temperature: request.temperature ?? this.defaultTemperature, 37 37 + max_tokens: request.maxTokens ?? this.defaultMaxTokens, 38 38 + ...(request.stopSequences ? { stop: request.stopSequences } : {}), 39 39 + }, 40 40 + schema: openaiResponseSchema, 41 41 + }); 44 42 45 45 - const controller = new AbortController(); 46 46 - const timeoutId = setTimeout(() => controller.abort(), this.timeout); 43 43 + const finishReasonMap: Record< 44 44 + string, 45 45 + AICompletionResponse["finishReason"] 46 46 + > = { 47 47 + stop: "stop", 48 48 + length: "length", 49 49 + content_filter: "content_filter", 50 50 + }; 47 51 48 48 - try { 49 49 - const response = await fetch(`${this.baseUrl}/v1/chat/completions`, { 50 50 - method: 'POST', 51 51 - headers: { 52 52 - 'Content-Type': 'application/json', 53 53 - 'Authorization': `Bearer ${this.apiKey}`, 54 54 - }, 55 55 - body: JSON.stringify({ 56 56 - model: this.model, 57 57 - messages, 58 58 - temperature: request.temperature ?? this.defaultTemperature, 59 59 - max_tokens: request.maxTokens ?? this.defaultMaxTokens, 60 60 - ...(request.stopSequences ? { stop: request.stopSequences } : {}), 61 61 - }), 62 62 - signal: controller.signal, 63 63 - }); 52 52 + const rawReason = result.choices?.[0]?.finish_reason ?? "stop"; 64 53 65 65 - if (!response.ok) { 66 66 - const body = await response.text(); 67 67 - throw new Error(`OpenAI API error: ${response.status} ${body}`); 68 68 - } 69 69 - 70 70 - const result = (await response.json()) as { 71 71 - choices?: Array<{ 72 72 - message?: { content?: string }; 73 73 - finish_reason?: string; 74 74 - }>; 75 75 - usage?: { 76 76 - prompt_tokens?: number; 77 77 - completion_tokens?: number; 78 78 - }; 79 79 - model?: string; 80 80 - }; 81 81 - 82 82 - const finishReasonMap: Record<string, AICompletionResponse['finishReason']> = { 83 83 - stop: 'stop', 84 84 - length: 'length', 85 85 - content_filter: 'content_filter', 86 86 - }; 87 87 - 88 88 - const rawReason = result.choices?.[0]?.finish_reason ?? 'stop'; 89 89 - 90 90 - return { 91 91 - content: result.choices?.[0]?.message?.content ?? '', 92 92 - promptTokens: result.usage?.prompt_tokens, 93 93 - completionTokens: result.usage?.completion_tokens, 94 94 - model: result.model ?? this.model, 95 95 - finishReason: finishReasonMap[rawReason] ?? 'stop', 96 96 - }; 97 97 - } finally { 98 98 - clearTimeout(timeoutId); 99 99 - } 54 54 + return { 55 55 + content: result.choices?.[0]?.message?.content ?? "", 56 56 + promptTokens: result.usage?.prompt_tokens, 57 57 + completionTokens: result.usage?.completion_tokens, 58 58 + model: result.model ?? this.model, 59 59 + finishReason: finishReasonMap[rawReason] ?? "stop", 60 60 + }; 100 61 } 101 62 102 63 async isHealthy(): Promise<boolean> { 103 103 - try { 104 104 - const controller = new AbortController(); 105 105 - const timeoutId = setTimeout(() => controller.abort(), 5000); 106 106 - 107 107 - const response = await fetch(`${this.baseUrl}/v1/models`, { 108 108 - headers: { 'Authorization': `Bearer ${this.apiKey}` }, 109 109 - signal: controller.signal, 110 110 - }); 111 111 - 112 112 - clearTimeout(timeoutId); 113 113 - return response.ok; 114 114 - } catch { 115 115 - return false; 116 116 - } 64 64 + return this.healthCheck("/v1/models", { 65 65 + headers: { Authorization: `Bearer ${this.apiKey}` }, 66 66 + }); 117 67 } 118 68 119 69 static fromConfigService(configService: ConfigService): OpenAIProvider { 120 70 return new OpenAIProvider({ 121 121 - baseUrl: configService.get<string>('OPENAI_BASE_URL', 'https://api.openai.com'), 122 122 - apiKey: configService.get<string>('OPENAI_API_KEY', ''), 123 123 - model: configService.get<string>('OPENAI_MODEL', 'gpt-4o-mini'), 124 124 - defaultTemperature: configService.get<number>('AI_TEMPERATURE', 0.1), 125 125 - defaultMaxTokens: configService.get<number>('AI_MAX_TOKENS', 2048), 126 126 - timeout: configService.get<number>('AI_TIMEOUT', 60000), 71 71 + baseUrl: configService.get<string>( 72 72 + "OPENAI_BASE_URL", 73 73 + "https://api.openai.com", 74 74 + ), 75 75 + apiKey: configService.get<string>("OPENAI_API_KEY", ""), 76 76 + model: configService.get<string>("OPENAI_MODEL", "gpt-4o-mini"), 77 77 + defaultTemperature: configService.get<number>("AI_TEMPERATURE", 0.1), 78 78 + defaultMaxTokens: configService.get<number>("AI_MAX_TOKENS", 2048), 79 79 + timeout: configService.get<number>("AI_TIMEOUT", 60000), 127 80 }); 128 81 } 129 82 } 130 130 - 131 131 - registerAIProvider('openai', (cs) => OpenAIProvider.fromConfigService(cs));

+40

packages/ai-provider/src/response-schemas.ts

reviewed

··· 1 1 + import { z } from "zod/v4"; 2 2 + 3 3 + export const anthropicResponseSchema = z.object({ 4 4 + content: z 5 5 + .array(z.object({ type: z.string(), text: z.string().optional() })) 6 6 + .optional(), 7 7 + usage: z 8 8 + .object({ 9 9 + input_tokens: z.number().optional(), 10 10 + output_tokens: z.number().optional(), 11 11 + }) 12 12 + .optional(), 13 13 + model: z.string().optional(), 14 14 + stop_reason: z.string().optional(), 15 15 + }); 16 16 + 17 17 + export const openaiResponseSchema = z.object({ 18 18 + choices: z 19 19 + .array( 20 20 + z.object({ 21 21 + message: z.object({ content: z.string().optional() }).optional(), 22 22 + finish_reason: z.string().optional(), 23 23 + }), 24 24 + ) 25 25 + .optional(), 26 26 + usage: z 27 27 + .object({ 28 28 + prompt_tokens: z.number().optional(), 29 29 + completion_tokens: z.number().optional(), 30 30 + }) 31 31 + .optional(), 32 32 + model: z.string().optional(), 33 33 + }); 34 34 + 35 35 + export const llamaCppResponseSchema = z.object({ 36 36 + content: z.string().optional(), 37 37 + tokens_predicted: z.number().optional(), 38 38 + tokens_evaluated: z.number().optional(), 39 39 + stop_type: z.string().optional(), 40 40 + });

+48 -4

packages/ai-provider/src/types.ts

reviewed

··· 21 21 /** Generated text content */ 22 22 content: string; 23 23 /** Number of tokens used in prompt */ 24 24 - promptTokens?: number; 24 24 + promptTokens?: number | undefined; 25 25 /** Number of tokens generated */ 26 26 - completionTokens?: number; 26 26 + completionTokens?: number | undefined; 27 27 /** Model used for completion */ 28 28 - model?: string; 28 28 + model?: string | undefined; 29 29 /** Whether generation was cut off */ 30 30 - finishReason?: 'stop' | 'length' | 'content_filter' | 'error'; 30 30 + finishReason?: 'stop' | 'length' | 'content_filter' | 'error' | undefined; 31 31 } 32 32 33 33 /** ··· 38 38 baseUrl: string; 39 39 /** API key for authentication (optional for local providers) */ 40 40 apiKey?: string; 41 41 + /** Model identifier */ 42 42 + model?: string; 41 43 /** Default temperature */ 42 44 defaultTemperature?: number; 43 45 /** Default max tokens */ ··· 47 49 } 48 50 49 51 /** 52 52 + * Status details for a llama.cpp local provider 53 53 + */ 54 54 + export interface LlamaCppStatusDetails { 55 55 + kind: 'llama-cpp'; 56 56 + health: Record<string, unknown> | null; 57 57 + model: Record<string, unknown> | null; 58 58 + slots: Array<Record<string, unknown>> | null; 59 59 + baseUrl: string; 60 60 + } 61 61 + 62 62 + /** 63 63 + * Status details for a cloud API provider (Anthropic, OpenAI, etc.) 64 64 + */ 65 65 + export interface ApiProviderStatusDetails { 66 66 + kind: 'api'; 67 67 + model: string; 68 68 + baseUrl: string; 69 69 + } 70 70 + 71 71 + export type AIProviderStatusDetails = 72 72 + | LlamaCppStatusDetails 73 73 + | ApiProviderStatusDetails; 74 74 + 75 75 + /** 76 76 + * Detailed status information from an AI provider 77 77 + */ 78 78 + export interface AIProviderStatus { 79 79 + /** Whether the provider is reachable and operational */ 80 80 + healthy: boolean; 81 81 + /** Provider identifier */ 82 82 + providerName: string; 83 83 + /** Provider-specific details */ 84 84 + details?: AIProviderStatusDetails; 85 85 + } 86 86 + 87 87 + /** 50 88 * Abstract AI provider interface 51 89 * Implementations can use different backends (llama.cpp, OpenAI, Anthropic, etc.) 52 90 */ ··· 66 104 * @returns Promise resolving to health status 67 105 */ 68 106 isHealthy(): Promise<boolean>; 107 107 + 108 108 + /** 109 109 + * Get detailed provider status including health, model info, and operational data 110 110 + * Optional — not all providers need to implement this. 111 111 + */ 112 112 + getStatus?(): Promise<AIProviderStatus>; 69 113 }