import type { AIProvider } from '@cv/ai-provider'; import { ParsedCVDataSchema, type ParsedCVData } from './schemas'; import { CV_SYSTEM_PROMPT, buildCvUserPrompt, type ExistingUserContext, } from './prompts'; /** * Configuration for CV parser service */ export interface CVParserConfig { /** Temperature for AI completions */ temperature?: number; /** Maximum tokens for AI completions */ maxTokens?: number; } /** * Service for parsing CV text using AI * Uses dependency injection for the AI provider */ export class CVParserService { private provider: AIProvider; private temperature: number; private maxTokens: number; constructor(provider: AIProvider, config?: CVParserConfig) { this.provider = provider; this.temperature = config?.temperature ?? 0.1; this.maxTokens = config?.maxTokens ?? 8192; } /** * Parse CV text using the AI provider * @param cvText Raw text from CV (extracted from PDF, DOCX, etc.) * @param context Optional existing user data for deduplication and disambiguation * @returns Structured CV data matching ParsedCVDataSchema */ async parseCVText( cvText: string, context?: ExistingUserContext, ): Promise { if (!cvText || cvText.trim().length === 0) { throw new Error('CV text cannot be empty'); } try { const response = await this.provider.complete({ systemPrompt: CV_SYSTEM_PROMPT, prompt: buildCvUserPrompt(cvText, context), temperature: this.temperature, maxTokens: this.maxTokens, }); if (response.finishReason === 'length') { throw new Error( 'LLM response was truncated (hit max token limit). ' + `Increase maxTokens (currently ${this.maxTokens}) to allow longer responses.` ); } // Extract JSON from response (handle markdown code blocks) const rawJson = this.extractJson(response.content); // Parse and validate with Zod const parsed = ParsedCVDataSchema.parse(JSON.parse(rawJson)); return parsed; } catch (error) { if (error instanceof SyntaxError) { throw new Error( `Failed to parse LLM response as JSON: ${error.message}` ); } if (error instanceof Error && 'issues' in error) { // Zod validation error throw new Error(`CV data validation failed: ${error.message}`); } throw error; } } /** * Extract JSON from LLM response * Handles markdown code blocks and other formatting */ private extractJson(text: string): string { // Try to extract from markdown code block const codeBlockMatch = text.match(/```(?:json)?\n?([\s\S]*?)```/); if (codeBlockMatch?.[1]) { return codeBlockMatch[1].trim(); } // Try to extract raw JSON object const jsonMatch = text.match(/\{[\s\S]*\}/); if (jsonMatch) { return jsonMatch[0]; } // If no JSON found, return as-is and let JSON.parse fail with clear error return text; } }