forked from
stevedylan.dev/sequoia
A CLI for publishing standard.site documents to ATProto
1import { webcrypto as crypto } from "node:crypto";
2import * as fs from "node:fs/promises";
3import * as path from "node:path";
4import { glob } from "glob";
5import { minimatch } from "minimatch";
6import type { BlogPost, FrontmatterMapping, PostFrontmatter } from "./types";
7
8export function parseFrontmatter(
9 content: string,
10 mapping?: FrontmatterMapping,
11): {
12 frontmatter: PostFrontmatter;
13 body: string;
14 rawFrontmatter: Record<string, unknown>;
15} {
16 // Support multiple frontmatter delimiters:
17 // --- (YAML) - Jekyll, Astro, most SSGs
18 // +++ (TOML) - Hugo
19 // *** - Alternative format
20 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n([\s\S]*)$/;
21 const match = content.match(frontmatterRegex);
22
23 if (!match) {
24 const [, titleMatch] = content.trim().match(/^# (.+)$/m) || [];
25 const title = titleMatch ?? "";
26 const [publishDate] = new Date().toISOString().split("T");
27
28 return {
29 frontmatter: {
30 title,
31 publishDate: publishDate ?? "",
32 },
33 body: content,
34 rawFrontmatter: {
35 title: publishDate,
36 },
37 };
38 }
39
40 const delimiter = match[1];
41 const frontmatterStr = match[2] ?? "";
42 const body = match[3] ?? "";
43
44 // Determine format based on delimiter:
45 // +++ uses TOML (key = value)
46 // --- and *** use YAML (key: value)
47 const isToml = delimiter === "+++";
48 const separator = isToml ? "=" : ":";
49
50 // Parse frontmatter manually
51 const raw: Record<string, unknown> = {};
52 const lines = frontmatterStr.split("\n");
53
54 let i = 0;
55 while (i < lines.length) {
56 const line = lines[i];
57 if (line === undefined) {
58 i++;
59 continue;
60 }
61 const sepIndex = line.indexOf(separator);
62 if (sepIndex === -1) {
63 i++;
64 continue;
65 }
66
67 const key = line.slice(0, sepIndex).trim();
68 let value = line.slice(sepIndex + 1).trim();
69
70 // Handle quoted strings
71 if (
72 (value.startsWith('"') && value.endsWith('"')) ||
73 (value.startsWith("'") && value.endsWith("'"))
74 ) {
75 value = value.slice(1, -1);
76 }
77
78 // Handle inline arrays (simple case for tags)
79 if (value.startsWith("[") && value.endsWith("]")) {
80 const arrayContent = value.slice(1, -1);
81 raw[key] = arrayContent
82 .split(",")
83 .map((item) => item.trim().replace(/^["']|["']$/g, ""));
84 } else if (value === "" && !isToml) {
85 // Check for YAML-style multiline array (key with no value followed by - items)
86 const arrayItems: string[] = [];
87 let j = i + 1;
88 while (j < lines.length) {
89 const nextLine = lines[j];
90 if (nextLine === undefined) {
91 j++;
92 continue;
93 }
94 // Check if line is a list item (starts with whitespace and -)
95 const listMatch = nextLine.match(/^\s+-\s*(.*)$/);
96 if (listMatch && listMatch[1] !== undefined) {
97 let itemValue = listMatch[1].trim();
98 // Remove quotes if present
99 if (
100 (itemValue.startsWith('"') && itemValue.endsWith('"')) ||
101 (itemValue.startsWith("'") && itemValue.endsWith("'"))
102 ) {
103 itemValue = itemValue.slice(1, -1);
104 }
105 arrayItems.push(itemValue);
106 j++;
107 } else if (nextLine.trim() === "") {
108 // Skip empty lines within the array
109 j++;
110 } else {
111 // Hit a new key or non-list content
112 break;
113 }
114 }
115 if (arrayItems.length > 0) {
116 raw[key] = arrayItems;
117 i = j;
118 continue;
119 } else {
120 raw[key] = value;
121 }
122 } else if (value === "true") {
123 raw[key] = true;
124 } else if (value === "false") {
125 raw[key] = false;
126 } else {
127 raw[key] = value;
128 }
129 i++;
130 }
131
132 // Apply field mappings to normalize to standard PostFrontmatter fields
133 const frontmatter: Record<string, unknown> = {};
134
135 // Title mapping
136 const titleField = mapping?.title || "title";
137 frontmatter.title = raw[titleField] || raw.title;
138
139 // Description mapping
140 const descField = mapping?.description || "description";
141 frontmatter.description = raw[descField] || raw.description;
142
143 // Publish date mapping - check custom field first, then fallbacks
144 const dateField = mapping?.publishDate;
145 if (dateField && raw[dateField]) {
146 frontmatter.publishDate = raw[dateField];
147 } else if (raw.publishDate) {
148 frontmatter.publishDate = raw.publishDate;
149 } else {
150 // Fallback to common date field names
151 const dateFields = ["pubDate", "date", "createdAt", "created_at"];
152 for (const field of dateFields) {
153 if (raw[field]) {
154 frontmatter.publishDate = raw[field];
155 break;
156 }
157 }
158 }
159
160 // Cover image mapping
161 const coverField = mapping?.coverImage || "ogImage";
162 frontmatter.ogImage = raw[coverField] || raw.ogImage;
163
164 // Theme, font family and font size
165 frontmatter.theme = raw.theme;
166 frontmatter.fontFamily = raw.fontFamily;
167 frontmatter.fontSize = raw.fontSize;
168
169 // Tags mapping
170 const tagsField = mapping?.tags || "tags";
171 frontmatter.tags = raw[tagsField] || raw.tags;
172
173 // Draft mapping
174 const draftField = mapping?.draft || "draft";
175 const draftValue = raw[draftField] ?? raw.draft;
176 if (draftValue !== undefined) {
177 frontmatter.draft = draftValue === true || draftValue === "true";
178 }
179
180 // Always preserve atUri (internal field)
181 frontmatter.atUri = raw.atUri;
182
183 return {
184 frontmatter: frontmatter as unknown as PostFrontmatter,
185 body,
186 rawFrontmatter: raw,
187 };
188}
189
190export function getSlugFromFilename(filename: string): string {
191 return filename
192 .replace(/\.mdx?$/, "")
193 .toLowerCase()
194 .replace(/\s+/g, "-");
195}
196
197export interface SlugOptions {
198 slugField?: string;
199 removeIndexFromSlug?: boolean;
200 stripDatePrefix?: boolean;
201}
202
203export function getSlugFromOptions(
204 relativePath: string,
205 rawFrontmatter: Record<string, unknown>,
206 options: SlugOptions = {},
207): string {
208 const {
209 slugField,
210 removeIndexFromSlug = false,
211 stripDatePrefix = false,
212 } = options;
213
214 let slug: string;
215
216 // If slugField is set, try to get the value from frontmatter
217 if (slugField) {
218 const frontmatterValue = rawFrontmatter[slugField];
219 if (frontmatterValue && typeof frontmatterValue === "string") {
220 // Remove leading slash if present
221 slug = frontmatterValue
222 .replace(/^\//, "")
223 .toLowerCase()
224 .replace(/\s+/g, "-");
225 } else {
226 // Fallback to filepath if frontmatter field not found
227 slug = relativePath
228 .replace(/\.mdx?$/, "")
229 .toLowerCase()
230 .replace(/\s+/g, "-");
231 }
232 } else {
233 // Default: use filepath
234 slug = relativePath
235 .replace(/\.mdx?$/, "")
236 .toLowerCase()
237 .replace(/\s+/g, "-");
238 }
239
240 // Remove /index or /_index suffix if configured
241 if (removeIndexFromSlug) {
242 slug = slug.replace(/\/_?index$/, "");
243 }
244
245 // Strip Jekyll-style date prefix (YYYY-MM-DD-) from filename
246 if (stripDatePrefix) {
247 slug = slug.replace(/(^|\/)(\d{4}-\d{2}-\d{2})-/g, "$1");
248 }
249
250 return slug;
251}
252
253export function resolvePathTemplate(template: string, post: BlogPost): string {
254 const publishDate = new Date(post.frontmatter.publishDate);
255 const year = String(publishDate.getFullYear());
256 const month = String(publishDate.getMonth() + 1).padStart(2, "0");
257 const day = String(publishDate.getDate()).padStart(2, "0");
258
259 const slugifiedTitle = (post.frontmatter.title || "")
260 .toLowerCase()
261 .replace(/\s+/g, "-")
262 .replace(/[^\w-]/g, "");
263
264 // Replace known tokens
265 let result = template
266 .replace(/\{slug\}/g, post.slug)
267 .replace(/\{year\}/g, year)
268 .replace(/\{month\}/g, month)
269 .replace(/\{day\}/g, day)
270 .replace(/\{title\}/g, slugifiedTitle);
271
272 // Replace any remaining {field} tokens with raw frontmatter values
273 result = result.replace(/\{(\w+)\}/g, (_match, field: string) => {
274 const value = post.rawFrontmatter[field];
275 if (value != null && typeof value === "string") {
276 return value;
277 }
278 return "";
279 });
280
281 // Ensure leading slash
282 if (!result.startsWith("/")) {
283 result = `/${result}`;
284 }
285
286 return result;
287}
288
289export function resolvePostPath(
290 post: BlogPost,
291 pathPrefix?: string,
292 pathTemplate?: string,
293): string {
294 if (pathTemplate) {
295 return resolvePathTemplate(pathTemplate, post);
296 }
297 const prefix = pathPrefix || "/posts";
298 return `${prefix}/${post.slug}`;
299}
300
301export async function getContentHash(content: string): Promise<string> {
302 const encoder = new TextEncoder();
303 const data = encoder.encode(content);
304 const hashBuffer = await crypto.subtle.digest("SHA-256", data);
305 const hashArray = Array.from(new Uint8Array(hashBuffer));
306 return hashArray.map((b) => b.toString(16).padStart(2, "0")).join("");
307}
308
309function shouldIgnore(relativePath: string, ignorePatterns: string[]): boolean {
310 for (const pattern of ignorePatterns) {
311 if (minimatch(relativePath, pattern)) {
312 return true;
313 }
314 }
315 return false;
316}
317
318export interface ScanOptions {
319 frontmatterMapping?: FrontmatterMapping;
320 ignorePatterns?: string[];
321 slugField?: string;
322 removeIndexFromSlug?: boolean;
323 stripDatePrefix?: boolean;
324}
325
326export async function scanContentDirectory(
327 contentDir: string,
328 frontmatterMappingOrOptions?: FrontmatterMapping | ScanOptions,
329 ignorePatterns: string[] = [],
330): Promise<BlogPost[]> {
331 // Handle both old signature (frontmatterMapping, ignorePatterns) and new signature (options)
332 let options: ScanOptions;
333 if (
334 frontmatterMappingOrOptions &&
335 ("frontmatterMapping" in frontmatterMappingOrOptions ||
336 "ignorePatterns" in frontmatterMappingOrOptions ||
337 "slugField" in frontmatterMappingOrOptions)
338 ) {
339 options = frontmatterMappingOrOptions as ScanOptions;
340 } else {
341 // Old signature: (contentDir, frontmatterMapping?, ignorePatterns?)
342 options = {
343 frontmatterMapping: frontmatterMappingOrOptions as
344 | FrontmatterMapping
345 | undefined,
346 ignorePatterns,
347 };
348 }
349
350 const {
351 frontmatterMapping,
352 ignorePatterns: ignore = [],
353 slugField,
354 removeIndexFromSlug,
355 stripDatePrefix,
356 } = options;
357
358 const patterns = ["**/*.md", "**/*.mdx"];
359 const posts: BlogPost[] = [];
360
361 for (const pattern of patterns) {
362 const files = await glob(pattern, {
363 cwd: contentDir,
364 absolute: false,
365 });
366
367 for (const relativePath of files) {
368 // Skip files matching ignore patterns
369 if (shouldIgnore(relativePath, ignore)) {
370 continue;
371 }
372
373 const filePath = path.join(contentDir, relativePath);
374 const rawContent = await fs.readFile(filePath, "utf-8");
375
376 try {
377 const { frontmatter, body, rawFrontmatter } = parseFrontmatter(
378 rawContent,
379 frontmatterMapping,
380 );
381 const slug = getSlugFromOptions(relativePath, rawFrontmatter, {
382 slugField,
383 removeIndexFromSlug,
384 stripDatePrefix,
385 });
386
387 posts.push({
388 filePath,
389 slug,
390 frontmatter,
391 content: body,
392 rawContent,
393 rawFrontmatter,
394 });
395 } catch (error) {
396 console.error(`Error parsing ${relativePath}:`, error);
397 }
398 }
399 }
400
401 // Sort by publish date (newest first)
402 posts.sort((a, b) => {
403 const dateA = new Date(a.frontmatter.publishDate);
404 const dateB = new Date(b.frontmatter.publishDate);
405 return dateB.getTime() - dateA.getTime();
406 });
407
408 return posts;
409}
410
411export function updateFrontmatterWithAtUri(
412 rawContent: string,
413 atUri: string,
414): string {
415 // Detect which delimiter is used (---, +++, or ***)
416 const delimiterMatch = rawContent.match(/^(---|\+\+\+|\*\*\*)/);
417 const delimiter = delimiterMatch?.[1] ?? "---";
418 const isToml = delimiter === "+++";
419
420 // Format the atUri entry based on frontmatter type
421 const atUriEntry = isToml ? `atUri = "${atUri}"` : `atUri: "${atUri}"`;
422
423 // No frontmatter: create one with atUri
424 if (!delimiterMatch) {
425 return `---\n${atUriEntry}\n---\n\n${rawContent}`;
426 }
427
428 // Check if atUri already exists in frontmatter (handle both formats)
429 if (rawContent.includes("atUri:") || rawContent.includes("atUri =")) {
430 // Replace existing atUri (match both YAML and TOML formats)
431 return rawContent.replace(
432 /atUri\s*[=:]\s*["']?[^"'\n]+["']?\n?/,
433 `${atUriEntry}\n`,
434 );
435 }
436
437 // Insert atUri before the closing delimiter
438 const frontmatterEndIndex = rawContent.indexOf(delimiter, 4);
439 if (frontmatterEndIndex === -1) {
440 throw new Error("Could not find frontmatter end");
441 }
442
443 const beforeEnd = rawContent.slice(0, frontmatterEndIndex);
444 const afterEnd = rawContent.slice(frontmatterEndIndex);
445
446 return `${beforeEnd}${atUriEntry}\n${afterEnd}`;
447}
448
449export function removeFrontmatterAtUri(rawContent: string): string {
450 const frontmatterRegex = /^(---|\+\+\+|\*\*\*)\n([\s\S]*?)\n\1\n/;
451 const match = rawContent.match(frontmatterRegex);
452 if (!match) return rawContent;
453
454 const delimiter = match[1];
455 const frontmatterStr = match[2] ?? "";
456
457 // Remove the atUri line
458 const lines = frontmatterStr
459 .split("\n")
460 .filter((line) => !line.match(/^\s*atUri\s*[=:]\s*/));
461
462 // Check if remaining frontmatter has any non-empty lines
463 const hasContent = lines.some((line) => line.trim() !== "");
464
465 const afterFrontmatter = rawContent.slice(match[0].length);
466
467 if (!hasContent) {
468 // Remove entire frontmatter block, trim leading newlines
469 return afterFrontmatter.replace(/^\n+/, "");
470 }
471
472 return `${delimiter}\n${lines.join("\n")}\n${delimiter}\n${afterFrontmatter}`;
473}
474
475export function stripMarkdownForText(markdown: string): string {
476 return markdown
477 .replace(/#{1,6}\s/g, "") // Remove headers
478 .replace(/\*\*([^*]+)\*\*/g, "$1") // Remove bold
479 .replace(/\*([^*]+)\*/g, "$1") // Remove italic
480 .replace(/\[([^\]]+)\]\([^)]+\)/g, "$1") // Remove links, keep text
481 .replace(/`{3}[\s\S]*?`{3}/g, "") // Remove code blocks
482 .replace(/`([^`]+)`/g, "$1") // Remove inline code formatting
483 .replace(/!\[.*?\]\(.*?\)/g, "") // Remove images
484 .replace(/\n{3,}/g, "\n\n") // Normalize multiple newlines
485 .trim();
486}
487
488export function getTextContent(
489 post: { content: string; rawFrontmatter?: Record<string, unknown> },
490 textContentField?: string,
491): string {
492 if (textContentField && post.rawFrontmatter?.[textContentField]) {
493 return String(post.rawFrontmatter[textContentField]);
494 }
495 return stripMarkdownForText(post.content);
496}