···11import type { AIProvider } from '@cv/ai-provider';
22import { ParsedCVDataSchema, type ParsedCVData } from './schemas';
33-import { getCV_PARSING_PROMPT } from './prompts';
33+import {
44+ CV_SYSTEM_PROMPT,
55+ buildCvUserPrompt,
66+ type ExistingUserContext,
77+} from './prompts';
4859/**
610 * Configuration for CV parser service
···2428 constructor(provider: AIProvider, config?: CVParserConfig) {
2529 this.provider = provider;
2630 this.temperature = config?.temperature ?? 0.1;
2727- this.maxTokens = config?.maxTokens ?? 2048;
3131+ this.maxTokens = config?.maxTokens ?? 8192;
2832 }
29333034 /**
3135 * Parse CV text using the AI provider
3236 * @param cvText Raw text from CV (extracted from PDF, DOCX, etc.)
3737+ * @param context Optional existing user data for deduplication and disambiguation
3338 * @returns Structured CV data matching ParsedCVDataSchema
3439 */
3535- async parseCVText(cvText: string): Promise<ParsedCVData> {
4040+ async parseCVText(
4141+ cvText: string,
4242+ context?: ExistingUserContext,
4343+ ): Promise<ParsedCVData> {
3644 if (!cvText || cvText.trim().length === 0) {
3745 throw new Error('CV text cannot be empty');
3846 }
39474040- const prompt = getCV_PARSING_PROMPT(cvText);
4141-4248 try {
4349 const response = await this.provider.complete({
4444- prompt,
5050+ systemPrompt: CV_SYSTEM_PROMPT,
5151+ prompt: buildCvUserPrompt(cvText, context),
4552 temperature: this.temperature,
4653 maxTokens: this.maxTokens,
4747- stopSequences: ['</s>'],
4854 });
5555+5656+ if (response.finishReason === 'length') {
5757+ throw new Error(
5858+ 'LLM response was truncated (hit max token limit). ' +
5959+ `Increase maxTokens (currently ${this.maxTokens}) to allow longer responses.`
6060+ );
6161+ }
49625063 // Extract JSON from response (handle markdown code blocks)
5164 const rawJson = this.extractJson(response.content);
+28-4
packages/ai-parser/src/cv-parser.module.ts
···11-import { DynamicModule, Module } from '@nestjs/common';
22-import { AI_PROVIDER, AIModule, type AIModuleOptions, type AIProvider } from '@cv/ai-provider';
33-import { CVParserService } from './ai-parser.service';
11+import { DynamicModule, Module } from "@nestjs/common";
22+import {
33+ AI_PROVIDER,
44+ AIModule,
55+ type AIModuleOptions,
66+ type AIProvider,
77+} from "@cv/ai-provider";
88+import { CVParserService } from "./ai-parser.service";
4955-export const CV_PARSER_SERVICE = Symbol('CV_PARSER_SERVICE');
1010+export const CV_PARSER_SERVICE = Symbol("CV_PARSER_SERVICE");
611712@Module({})
813export class CVParserModule {
···1015 return {
1116 module: CVParserModule,
1217 imports: [AIModule.forRoot(aiOptions)],
1818+ providers: [
1919+ {
2020+ provide: CV_PARSER_SERVICE,
2121+ inject: [AI_PROVIDER],
2222+ useFactory: (aiProvider: AIProvider): CVParserService =>
2323+ new CVParserService(aiProvider),
2424+ },
2525+ ],
2626+ exports: [CV_PARSER_SERVICE],
2727+ };
2828+ }
2929+3030+ /**
3131+ * Resolve the AI provider type from ConfigService at factory time.
3232+ */
3333+ static forConfig(): DynamicModule {
3434+ return {
3535+ module: CVParserModule,
3636+ imports: [AIModule.forConfig()],
1337 providers: [
1438 {
1539 provide: CV_PARSER_SERVICE,
+8-1
packages/ai-parser/src/index.ts
···99} from './schemas';
10101111// Prompts
1212-export { CV_PARSING_PROMPT, getCV_PARSING_PROMPT } from './prompts';
1212+export {
1313+ CV_SYSTEM_PROMPT,
1414+ buildCvUserPrompt,
1515+ buildContextBlock,
1616+ CV_PARSING_PROMPT,
1717+ getCV_PARSING_PROMPT,
1818+ type ExistingUserContext,
1919+} from './prompts';
13201421// Service
1522export { CVParserService, type CVParserConfig } from './ai-parser.service';
+104-13
packages/ai-parser/src/prompts.ts
···11/**
22- * System prompt for CV parsing
33- * Instructs the LLM to extract structured information from CV text
22+ * Existing user data to provide as context to the AI parser.
33+ * Helps with deduplication, gap-filling, and disambiguation.
44+ */
55+export interface ExistingUserContext {
66+ name?: string;
77+ headline?: string;
88+ city?: string;
99+ country?: string;
1010+ jobs?: Array<{
1111+ company: string;
1212+ role: string;
1313+ startDate: string;
1414+ endDate?: string;
1515+ }>;
1616+ education?: Array<{
1717+ institution: string;
1818+ degree: string;
1919+ startDate: string;
2020+ endDate?: string;
2121+ }>;
2222+ skills?: string[];
2323+}
2424+2525+/**
2626+ * Format a date range for display in the context block.
2727+ */
2828+const formatRange = (start: string, end?: string): string =>
2929+ `${start} - ${end ?? "present"}`;
3030+3131+/**
3232+ * Build a prompt section with existing user data for context.
3333+ * Returns empty string when context has no meaningful data.
3434+ */
3535+export const buildContextBlock = (context: ExistingUserContext): string => {
3636+ const lines: string[] = [];
3737+3838+ if (context.name) lines.push(`Name: ${context.name}`);
3939+ if (context.headline) lines.push(`Headline: ${context.headline}`);
4040+ if (context.city || context.country) {
4141+ lines.push(
4242+ `Location: ${[context.city, context.country].filter(Boolean).join(", ")}`,
4343+ );
4444+ }
4545+4646+ if (context.jobs?.length) {
4747+ const jobList = context.jobs
4848+ .map((j) => ` - ${j.role} at ${j.company} (${formatRange(j.startDate, j.endDate)})`)
4949+ .join("\n");
5050+ lines.push(`Current jobs:\n${jobList}`);
5151+ }
5252+5353+ if (context.education?.length) {
5454+ const eduList = context.education
5555+ .map((e) => ` - ${e.degree} at ${e.institution} (${formatRange(e.startDate, e.endDate)})`)
5656+ .join("\n");
5757+ lines.push(`Education:\n${eduList}`);
5858+ }
5959+6060+ if (context.skills?.length) {
6161+ lines.push(`Known skills: ${context.skills.join(", ")}`);
6262+ }
6363+6464+ return lines.length === 0
6565+ ? ""
6666+ : `\n## Existing User Data (for reference — merge, don't duplicate)\n${lines.join("\n")}`;
6767+};
6868+6969+/**
7070+ * System prompt for CV parsing.
7171+ * Instructs the LLM to extract structured information from CV text.
472 */
55-export const CV_PARSING_PROMPT = `You are a professional CV parser. Your task is to extract structured information from the provided CV text and return it as a JSON object.
7373+export const CV_SYSTEM_PROMPT = `You are a professional CV parser. Your task is to extract structured information from the provided CV text and return it as a JSON object.
674775Extract the following information:
88-1. Personal info: name and introduction/summary
7676+1. Personal info: name, headline/title, introduction/summary, city, country, phone, website, and LinkedIn URL
9772. Work experience: for each job, extract company, role, level, dates, description, and skills mentioned
10783. Education: for each entry, extract institution, degree, field of study, dates, description, and skills
11794. Skills: list of all mentioned skills
···1583- All dates must be in ISO 8601 format (YYYY-MM-DD)
1684- For current positions, set endDate to null
1785- Skills should be extracted as an array of strings
1818-- If a field is not found, omit it from the object (except for arrays, which default to [])
1986- Company/institution names should be exact as written in the CV
2087- Keep descriptions concise (1-2 sentences)
8888+- The personalInfo object is REQUIRED — always include it, even if only the name is available. Extract whatever you can find; omit individual fields only if truly absent from the CV.
8989+- For personalInfo: headline is the professional title (e.g. "Senior Software Engineer"), introduction is a summary paragraph
9090+- If existing user data is provided, use it to disambiguate ambiguous entries (e.g. a teaching position at a university could be work or education — check existing data for clues)
9191+- If the CV contains entries that match existing user data, update/enrich them rather than creating duplicates
21922293Example JSON structure:
2394{
2495 "personalInfo": {
2596 "name": "John Doe",
2626- "introduction": "Software engineer with 10 years of experience"
9797+ "headline": "Senior Software Engineer",
9898+ "introduction": "Software engineer with 10 years of experience",
9999+ "city": "Amsterdam",
100100+ "country": "Netherlands",
101101+ "phone": "+31 6 1234 5678",
102102+ "website": "https://johndoe.dev",
103103+ "linkedInUrl": "https://linkedin.com/in/johndoe"
27104 },
28105 "jobExperiences": [
29106 {
···49126 "skills": ["Kubernetes", "Go", "Docker", "PostgreSQL", "C++", "Algorithms"]
50127}
511285252-CV Text to parse:
5353----
5454-{cvText}
5555----
5656-57129Return only the JSON object.`;
581305959-export const getCV_PARSING_PROMPT = (cvText: string): string => {
6060- return CV_PARSING_PROMPT.replace('{cvText}', cvText);
131131+/**
132132+ * Build the user prompt containing the CV text to parse.
133133+ * Optionally includes existing user context for deduplication.
134134+ */
135135+export const buildCvUserPrompt = (
136136+ cvText: string,
137137+ context?: ExistingUserContext,
138138+): string => {
139139+ const contextBlock = context ? buildContextBlock(context) : "";
140140+ return `${contextBlock}\n\nParse the following CV text:\n---\n${cvText}\n---`;
61141};
142142+143143+/**
144144+ * @deprecated Use CV_SYSTEM_PROMPT + buildCvUserPrompt separately
145145+ */
146146+export const CV_PARSING_PROMPT = `${CV_SYSTEM_PROMPT}\n\nCV Text to parse:\n---\n{cvText}\n---\n\nReturn only the JSON object.`;
147147+148148+/**
149149+ * @deprecated Use CV_SYSTEM_PROMPT + buildCvUserPrompt separately
150150+ */
151151+export const getCV_PARSING_PROMPT = (cvText: string): string =>
152152+ CV_PARSING_PROMPT.replace("{cvText}", cvText);
+49-11
packages/ai-parser/src/schemas.ts
···11-import { z } from 'zod';
11+import { z } from "zod/v4";
2233/**
44 * Schema for parsed job experience extracted from CV text
···88 roleName: z.string().min(1, 'Role name is required'),
99 levelName: z
1010 .string()
1111- .optional()
1111+ .nullish()
1212 .transform((val) => val?.trim() || undefined),
1313 startDate: z.string().min(1, 'Start date is required'), // ISO date string YYYY-MM-DD
1414 endDate: z.string().nullable().optional(), // ISO date string or null for current position
1515 description: z
1616 .string()
1717- .optional()
1717+ .nullish()
1818 .transform((val) => val?.trim() || undefined),
1919 skills: z.array(z.string()).default([]),
2020});
···2626 */
2727export const ParsedEducationSchema = z.object({
2828 institutionName: z.string().min(1, 'Institution name is required'),
2929- degree: z.string().min(1, 'Degree is required'),
2929+ degree: z
3030+ .string()
3131+ .nullish()
3232+ .transform((val) => val?.trim() || "Unknown"),
3033 fieldOfStudy: z
3131- .string()
3232- .optional()
3333- .transform((val) => val?.trim() || undefined),
3434+ .union([z.string(), z.array(z.string())])
3535+ .nullish()
3636+ .transform((val) =>
3737+ Array.isArray(val) ? val.join(", ") : val?.trim() || undefined,
3838+ ),
3439 startDate: z.string().min(1, 'Start date is required'), // ISO date string YYYY-MM-DD
3540 endDate: z.string().nullable().optional(), // ISO date string or null for currently studying
3641 description: z
3742 .string()
3838- .optional()
4343+ .nullish()
3944 .transform((val) => val?.trim() || undefined),
4045 skills: z.array(z.string()).default([]),
4146});
···5055 .object({
5156 name: z
5257 .string()
5353- .optional()
5858+ .nullish()
5959+ .transform((val) => val?.trim() || undefined),
6060+ headline: z
6161+ .string()
6262+ .nullish()
5463 .transform((val) => val?.trim() || undefined),
5564 introduction: z
5665 .string()
5757- .optional()
6666+ .nullish()
6767+ .transform((val) => val?.trim() || undefined),
6868+ city: z
6969+ .string()
7070+ .nullish()
7171+ .transform((val) => val?.trim() || undefined),
7272+ country: z
7373+ .string()
7474+ .nullish()
7575+ .transform((val) => val?.trim() || undefined),
7676+ phone: z
7777+ .string()
7878+ .nullish()
7979+ .transform((val) => val?.trim() || undefined),
8080+ website: z
8181+ .string()
8282+ .nullish()
8383+ .transform((val) => val?.trim() || undefined),
8484+ linkedInUrl: z
8585+ .string()
8686+ .nullish()
5887 .transform((val) => val?.trim() || undefined),
5988 })
6060- .optional(),
8989+ .default({
9090+ name: undefined,
9191+ headline: undefined,
9292+ introduction: undefined,
9393+ city: undefined,
9494+ country: undefined,
9595+ phone: undefined,
9696+ website: undefined,
9797+ linkedInUrl: undefined,
9898+ }),
6199 jobExperiences: z.array(ParsedJobExperienceSchema).default([]),
62100 education: z.array(ParsedEducationSchema).default([]),
63101 skills: z.array(z.string()).default([]),