···3030 }
31313232 /**
3333+ * Resolve the PDS service endpoint for a web DID
3434+ * @param {string} did - The web DID (e.g., did:web:didd.uk)
3535+ * @returns {Promise<string>} - The PDS service endpoint URL
3636+ */
3737+ async resolveWebDidPdsEndpoint(did) {
3838+ if (!did.startsWith('did:web:')) {
3939+ throw new Error('Not a web DID');
4040+ }
4141+4242+ try {
4343+ // Extract domain from web DID (e.g., did:web:didd.uk -> didd.uk)
4444+ const webDomain = did.replace('did:web:', '').replace(/:/g, '/');
4545+ const didDocUrl = `https://${webDomain}/.well-known/did.json`;
4646+4747+ console.log(`Fetching DID document from: ${didDocUrl}`);
4848+ const response = await axios.get(didDocUrl, {
4949+ headers: { 'User-Agent': 'Mozilla/5.0 Stagehand/1.1.0' },
5050+ timeout: 10000
5151+ });
5252+5353+ const didDoc = response.data;
5454+5555+ // Look for the PDS service endpoint in the DID document
5656+ // It should have type "AtprotoPersonalDataServer"
5757+ const pdsService = didDoc.service?.find(s =>
5858+ s.type === 'AtprotoPersonalDataServer' ||
5959+ s.id === '#atproto_pds'
6060+ );
6161+6262+ if (pdsService && pdsService.serviceEndpoint) {
6363+ console.log(`Resolved PDS endpoint: ${pdsService.serviceEndpoint}`);
6464+ return pdsService.serviceEndpoint;
6565+ }
6666+6767+ // Fallback to the domain itself if no service endpoint found
6868+ const fallbackEndpoint = `https://${webDomain.split('/')[0]}`;
6969+ console.log(`No PDS service in DID document, using fallback: ${fallbackEndpoint}`);
7070+ return fallbackEndpoint;
7171+ } catch (error) {
7272+ console.error(`Failed to resolve web DID PDS endpoint: ${error.message}`);
7373+ // Fallback to direct domain
7474+ const webDomain = did.replace('did:web:', '').split(':')[0];
7575+ return `https://${webDomain}`;
7676+ }
7777+ }
7878+7979+ /**
3380 * Parse a Bluesky URL to extract handle/DID and rkey (post ID)
3481 * @param {string} url - Bluesky URL
3582 * @returns {{repo: string, rkey: string}} - Extracted repo (handle/DID) and rkey
···548595 const thumbUrl = `https://video.bsky.app/watch/${did}/${cid}/thumbnail.jpg`;
549596 const thumbnailProcessed = await mediaCache.processMediaUrl(thumbUrl, false, thumbUrl);
550597598598+ // Determine the correct PDS endpoint for web DIDs
599599+ let pdsEndpoint = this.serviceEndpoint;
600600+ if (did.startsWith('did:web:')) {
601601+ console.log(`Detected web DID: ${did}`);
602602+ // Resolve the actual PDS endpoint from the DID document
603603+ pdsEndpoint = await this.resolveWebDidPdsEndpoint(did);
604604+ console.log(`Using resolved PDS endpoint: ${pdsEndpoint}`);
605605+ }
606606+551607 // Direct blob access is most reliable for video
552552- const videoBlobUrl = `${this.serviceEndpoint}/xrpc/com.atproto.sync.getBlob?did=${did}&cid=${cid}`;
608608+ const videoBlobUrl = `${pdsEndpoint}/xrpc/com.atproto.sync.getBlob?did=${encodeURIComponent(did)}&cid=${cid}`;
609609+ console.log(`Constructed blob URL: ${videoBlobUrl}`);
553610 let videoProcessed = null;
554554- let sourceVideoUrl = `https://video.bsky.app/watch/${did}/${cid}/video.mp4`;
611611+ let sourceVideoUrl = videoBlobUrl;
555612556613 try {
557614 console.log(`Downloading video from blob URL: ${videoBlobUrl}`);
···575632 console.error(`Failed to download video from blob URL: ${blobError.message}`);
576633577634 // If direct blob access fails, try alternative URLs
578578- const videoUrls = [
579579- `https://video.bsky.app/watch/${did}/${cid}/video.mp4`,
580580- `https://video.bsky.app/watch/${did}/${cid}/480.mp4`,
581581- `https://video.bsky.app/watch/${did}/${cid}/720.mp4`,
582582- `https://video.bsky.app/watch/${did}/${cid}/1080.mp4`,
583583- `https://video.bsky.app/watch/${did}/${cid}/${cid}.mp4`
584584- ];
635635+ // Note: video.bsky.app URLs only work for standard Bluesky DIDs, not web DIDs
636636+ const isWebDid = did.startsWith('did:web:');
585637586586- for (const videoUrl of videoUrls) {
587587- try {
588588- console.log(`Trying alternative video URL: ${videoUrl}`);
589589- videoProcessed = await mediaCache.processMediaUrl(videoUrl, true, videoUrl);
590590- console.log(`Successfully downloaded video from: ${videoUrl}`);
591591- sourceVideoUrl = videoUrl;
592592- break;
593593- } catch (e) {
594594- console.log(`Failed with URL ${videoUrl}: ${e.message}`);
638638+ if (!isWebDid) {
639639+ const videoUrls = [
640640+ `https://video.bsky.app/watch/${did}/${cid}/video.mp4`,
641641+ `https://video.bsky.app/watch/${did}/${cid}/480.mp4`,
642642+ `https://video.bsky.app/watch/${did}/${cid}/720.mp4`,
643643+ `https://video.bsky.app/watch/${did}/${cid}/1080.mp4`,
644644+ `https://video.bsky.app/watch/${did}/${cid}/${cid}.mp4`
645645+ ];
646646+647647+ for (const videoUrl of videoUrls) {
648648+ try {
649649+ console.log(`Trying alternative video URL: ${videoUrl}`);
650650+ videoProcessed = await mediaCache.processMediaUrl(videoUrl, true, videoUrl);
651651+ console.log(`Successfully downloaded video from: ${videoUrl}`);
652652+ sourceVideoUrl = videoUrl;
653653+ break;
654654+ } catch (e) {
655655+ console.log(`Failed with URL ${videoUrl}: ${e.message}`);
656656+ }
595657 }
658658+ } else {
659659+ console.log(`Web DID detected - video.bsky.app URLs not available, blob API is the only option`);
596660 }
597661598662 if (videoProcessed) {
···612676 }
613677 }
614678615615- // If all video attempts fail, fall back to thumbnail only
616616- console.warn('Could not download Bluesky video, using thumbnail only');
617617- return {
618618- imageUrl: thumbnailProcessed.localPath,
619619- imageUrls: [thumbnailProcessed.localPath], // Single thumbnail
620620- sourceUrl: url,
621621- title: `Bluesky Video by ${displayName}`,
622622- siteName: 'Bluesky',
623623- isVideo: false,
624624- originalImageUrl: thumbUrl,
625625- sourceImgUrl: thumbUrl // Add the new sourceImgUrl field
626626- };
679679+ // If all video attempts fail, throw an error instead of falling back to thumbnail
680680+ const errorMsg = did.startsWith('did:web:')
681681+ ? `Failed to download video from web DID (${did}). The blob API at the custom PDS server is not accessible or returned an error.`
682682+ : `Failed to download Bluesky video from all attempted sources (blob API and CDN URLs).`;
683683+ console.error(errorMsg);
684684+ throw new Error(errorMsg);
627685 } catch (error) {
628686 console.error('Error processing Bluesky video:', error);
629687 throw new Error(`Could not process Bluesky video: ${error.message}`);
+50
utils/mediaCache.js
···7070 }
71717272 /**
7373+ * Get a set of all file paths currently in the queue
7474+ */
7575+ async getQueuedFiles() {
7676+ try {
7777+ const queueFile = config.queueFilePath || path.join(__dirname, '..', 'queue', 'queue.json');
7878+7979+ if (await fs.pathExists(queueFile)) {
8080+ const queueData = await fs.readJson(queueFile);
8181+ const filesInUse = new Set();
8282+8383+ if (queueData.queue && Array.isArray(queueData.queue)) {
8484+ for (const item of queueData.queue) {
8585+ // Add all file paths from queue items
8686+ if (item.imageUrl) filesInUse.add(path.resolve(item.imageUrl));
8787+ if (item.videoUrl) filesInUse.add(path.resolve(item.videoUrl));
8888+8989+ // Handle multiple images
9090+ if (item.imageUrls && Array.isArray(item.imageUrls)) {
9191+ item.imageUrls.forEach(url => filesInUse.add(path.resolve(url)));
9292+ }
9393+ }
9494+ }
9595+9696+ console.log(`Found ${filesInUse.size} files currently in queue`);
9797+ return filesInUse;
9898+ }
9999+ } catch (error) {
100100+ console.error('Error reading queue file for cleanup:', error);
101101+ }
102102+103103+ return new Set();
104104+ }
105105+106106+ /**
73107 * Clean up old cache files
74108 */
75109 async cleanupCache() {
···77111 const now = Date.now();
78112 const maxAgeMs = this.maxCacheAgeDays * 24 * 60 * 60 * 1000;
79113114114+ // Get files that are currently in the queue
115115+ const queuedFiles = await this.getQueuedFiles();
116116+80117 // Helper function to clean a specific directory
81118 const cleanDir = async (dir) => {
82119 try {
83120 const files = await fs.readdir(dir);
121121+ let skippedCount = 0;
8412285123 for (const file of files) {
86124 const filePath = path.join(dir, file);
125125+ const resolvedPath = path.resolve(filePath);
126126+127127+ // Skip files that are in the queue
128128+ if (queuedFiles.has(resolvedPath)) {
129129+ skippedCount++;
130130+ continue;
131131+ }
132132+87133 const stats = await fs.stat(filePath);
8813489135 // Check if file is older than max cache age
···91137 await fs.remove(filePath);
92138 console.log(`Removed old cache file: ${file}`);
93139 }
140140+ }
141141+142142+ if (skippedCount > 0) {
143143+ console.log(`Skipped ${skippedCount} files in queue from ${path.basename(dir)}`);
94144 }
95145 } catch (error) {
96146 console.error(`Error cleaning directory ${dir}:`, error);