this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

feat: add image perceptual hashing module with database integration

- Implemented ImageHashManager for detecting duplicate and similar images using perceptual hashing.
- Added SQLite database for storing image URLs, hashes, and metadata.
- Created command-line tools for managing image hashes, including processing, searching, and cleanup functionalities.
- Documented the image hashing module and its usage in a new markdown file.

+2199 -58
+22
.devcontainer/devcontainer.json
··· 1 + // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 + // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node 3 + { 4 + "name": "Node.js", 5 + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 + "image": "mcr.microsoft.com/devcontainers/javascript-node:1-24-bookworm" 7 + 8 + // Features to add to the dev container. More info: https://containers.dev/features. 9 + // "features": {}, 10 + 11 + // Use 'forwardPorts' to make a list of ports inside the container available locally. 12 + // "forwardPorts": [], 13 + 14 + // Use 'postCreateCommand' to run commands after the container is created. 15 + // "postCreateCommand": "yarn install", 16 + 17 + // Configure tool-specific properties. 18 + // "customizations": {}, 19 + 20 + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 21 + // "remoteUser": "root" 22 + }
+12
.github/dependabot.yml
··· 1 + # To get started with Dependabot version updates, you'll need to specify which 2 + # package ecosystems to update and where the package manifests are located. 3 + # Please see the documentation for more information: 4 + # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 + # https://containers.dev/guide/dependabot 6 + 7 + version: 2 8 + updates: 9 + - package-ecosystem: "devcontainers" 10 + directory: "/" 11 + schedule: 12 + interval: weekly
+3
.gitignore
··· 11 11 cache/ 12 12 alert-state.json 13 13 14 + # Image hash database 15 + data/ 16 + 14 17 # Dependencies 15 18 node_modules/ 16 19 stagehanderror.txt
+44 -2
bot/telegrambot/commands/linkHandler.js
··· 75 75 this.bot.sendMessage(chatId, mediaData.error, { reply_to_message_id: msg.message_id }); 76 76 return; 77 77 } 78 - await queueManager.addToQueue(mediaData); 78 + 79 + // Get username for blame tracking 80 + const username = msg.from.username || msg.from.first_name || `User ${msg.from.id}`; 81 + 82 + const result = await queueManager.addToQueue(mediaData, username); 83 + 84 + // Handle duplicate detection 85 + if (result && result.duplicate) { 86 + const existingInfo = result.existingItem; 87 + const addedByText = existingInfo.addedBy || 'unknown'; 88 + const timeAgo = existingInfo.timestamp ? 89 + new Date(existingInfo.timestamp).toLocaleString() : 'unknown time'; 90 + 91 + this.bot.sendMessage(chatId, `⚠️ Duplicate image detected!\nThis was already added by ${addedByText} at ${timeAgo}`, { reply_to_message_id: msg.message_id }); 92 + return; 93 + } 94 + 95 + // Check if it was successful 96 + if (!result || !result.success) { 97 + this.bot.sendMessage(chatId, 'Failed to add to queue', { reply_to_message_id: msg.message_id }); 98 + return; 99 + } 100 + 79 101 const queueLength = await queueManager.getQueueLength(); 80 102 const mediaType = mediaData.isVideo ? 'Video' : 'Image'; 81 103 this.bot.sendMessage(chatId, `Added to queue: ${mediaType} - ${mediaData.title}\nCurrent queue length: ${queueLength}`, { reply_to_message_id: msg.message_id }); ··· 226 248 if (mediaData.error && /no suitable scraper/i.test(mediaData.error)) return false; 227 249 return false; 228 250 } 229 - await queueManager.addToQueue(mediaData); 251 + 252 + // Get username for blame tracking 253 + const username = msg.from.username || msg.from.first_name || `User ${msg.from.id}`; 254 + 255 + const result = await queueManager.addToQueue(mediaData, username); 256 + 257 + // Handle duplicate detection 258 + if (result && result.duplicate) { 259 + const existingInfo = result.existingItem; 260 + const addedByText = existingInfo.addedBy || 'unknown'; 261 + const timeAgo = existingInfo.timestamp ? 262 + new Date(existingInfo.timestamp).toLocaleString() : 'unknown time'; 263 + 264 + return `Duplicate image (already added by ${addedByText} at ${timeAgo})`; 265 + } 266 + 267 + // Check if it was successful 268 + if (!result || !result.success) { 269 + return 'Failed to add to queue'; 270 + } 271 + 230 272 return true; 231 273 } catch (err) { 232 274 // If error message is about no suitable scraper, treat as discard
+3 -1
bot/telegrambot/helpers/queueHelper.js
··· 57 57 } 58 58 } 59 59 60 - message += `${itemIndex}. ${itemType} *${item.title}*\n From: ${item.siteName} ${statusIcons}\n`; 60 + const addedByText = item.addedBy ? `\n Added by: ${item.addedBy}` : ''; 61 + 62 + message += `${itemIndex}. ${itemType} *${item.title}*\n From: ${item.siteName} ${statusIcons}${addedByText}\n`; 61 63 } 62 64 63 65 // Create navigation buttons and item action buttons
+362
docs/image-hashing.md
··· 1 + # Image Perceptual Hashing Module 2 + 3 + ## Overview 4 + 5 + The Image Perceptual Hashing module provides functionality to detect duplicate and similar images across different URLs. It uses perceptual hashing (pHash) to create fingerprints of images that can be compared even if images have been slightly modified, resized, or compressed. 6 + 7 + ## Features 8 + 9 + - **Perceptual Hashing**: Generates perceptual hashes (pHash) for cached images 10 + - **Database Storage**: Stores image URLs and hashes in a SQLite database 11 + - **Similarity Detection**: Find visually similar images using Hamming distance 12 + - **Automatic Integration**: Automatically processes images when they are cached 13 + - **Cleanup Management**: Removes database entries for deleted files 14 + - **Command-line Tools**: Utilities for managing and querying the hash database 15 + 16 + ## Architecture 17 + 18 + ### Components 19 + 20 + 1. **ImageHashManager** (`utils/imageHashManager.js`) 21 + - Core module for perceptual hashing and database management 22 + - Handles hash calculation, storage, and similarity detection 23 + - Manages SQLite database operations 24 + 25 + 2. **MediaCache Integration** (`utils/mediaCache.js`) 26 + - Automatically processes images when downloaded 27 + - Calls ImageHashManager after successful image caching 28 + - Manages cleanup of orphaned hash records 29 + 30 + 3. **Command-line Tool** (`image-hash-tool.js`) 31 + - Provides CLI utilities for managing the hash database 32 + - Enables manual processing and querying 33 + 34 + ### Database Schema 35 + 36 + ```sql 37 + CREATE TABLE image_hashes ( 38 + id INTEGER PRIMARY KEY AUTOINCREMENT, 39 + url TEXT NOT NULL UNIQUE, 40 + perceptual_hash TEXT NOT NULL, 41 + file_path TEXT, 42 + cached_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 43 + metadata TEXT 44 + ); 45 + 46 + CREATE INDEX idx_perceptual_hash ON image_hashes(perceptual_hash); 47 + CREATE INDEX idx_url ON image_hashes(url); 48 + ``` 49 + 50 + ## Installation 51 + 52 + Install the required dependencies: 53 + 54 + ```bash 55 + npm install imghash better-sqlite3 56 + ``` 57 + 58 + ## Usage 59 + 60 + ### Automatic Processing 61 + 62 + Images are automatically processed when cached by the MediaCache module. No additional code is required. 63 + 64 + ```javascript 65 + const mediaCache = require('./utils/mediaCache'); 66 + 67 + // Images are automatically hashed when downloaded 68 + const { localPath } = await mediaCache.processMediaUrl('https://example.com/image.jpg'); 69 + ``` 70 + 71 + ### Manual Processing 72 + 73 + ```javascript 74 + const ImageHashManager = require('./utils/imageHashManager'); 75 + const hashManager = new ImageHashManager(); 76 + 77 + // Process a single image 78 + const result = await hashManager.processImage( 79 + 'https://example.com/image.jpg', 80 + '/path/to/cached/image.jpg', 81 + { source: 'manual' } 82 + ); 83 + 84 + console.log('Hash:', result.hash); 85 + console.log('Stored:', result.stored); 86 + 87 + // Close database connection when done 88 + hashManager.close(); 89 + ``` 90 + 91 + ### Finding Similar Images 92 + 93 + ```javascript 94 + // Get hash for an image 95 + const record = hashManager.getHashByUrl('https://example.com/image.jpg'); 96 + 97 + // Find similar images (max Hamming distance: 5) 98 + const similar = hashManager.findSimilarImages(record.perceptual_hash, 5); 99 + 100 + similar.forEach(img => { 101 + console.log(`Distance: ${img.distance}, URL: ${img.url}`); 102 + }); 103 + ``` 104 + 105 + ## Command-line Tool 106 + 107 + The `image-hash-tool.js` script provides various utilities: 108 + 109 + ### Show Statistics 110 + 111 + ```bash 112 + node image-hash-tool.js stats 113 + ``` 114 + 115 + ### List All Hashes 116 + 117 + ```bash 118 + # List all hashes 119 + node image-hash-tool.js list 120 + 121 + # Limit results 122 + node image-hash-tool.js list 10 123 + ``` 124 + 125 + ### Calculate Hash for an Image 126 + 127 + ```bash 128 + node image-hash-tool.js hash cache/images/example.jpg 129 + ``` 130 + 131 + ### Process an Image Manually 132 + 133 + ```bash 134 + node image-hash-tool.js process "https://example.com/image.jpg" "cache/images/example.jpg" 135 + ``` 136 + 137 + ### Find Similar Images 138 + 139 + ```bash 140 + # Find images with Hamming distance <= 5 141 + node image-hash-tool.js similar 8a3c5e9f 5 142 + ``` 143 + 144 + ### Search by URL 145 + 146 + ```bash 147 + node image-hash-tool.js search "https://example.com/image.jpg" 148 + ``` 149 + 150 + ### Clean Up Orphaned Records 151 + 152 + ```bash 153 + node image-hash-tool.js cleanup 154 + ``` 155 + 156 + ### Scan Cache Directory 157 + 158 + Process all images in the cache directory: 159 + 160 + ```bash 161 + node image-hash-tool.js scan-cache 162 + ``` 163 + 164 + ## How Perceptual Hashing Works 165 + 166 + Perceptual hashing creates a "fingerprint" of an image based on its visual content. Unlike cryptographic hashes (MD5, SHA), perceptual hashes are: 167 + 168 + 1. **Similar for similar images**: Visually similar images produce similar hashes 169 + 2. **Resistant to modifications**: Small changes (compression, resizing, color adjustment) produce similar hashes 170 + 3. **Comparable**: Hashes can be compared using Hamming distance 171 + 172 + ### Hamming Distance 173 + 174 + The Hamming distance measures the difference between two hashes: 175 + 176 + - **Distance 0**: Identical or nearly identical images 177 + - **Distance 1-5**: Very similar images (recommended threshold) 178 + - **Distance 6-10**: Somewhat similar images 179 + - **Distance > 10**: Different images 180 + 181 + ## API Reference 182 + 183 + ### ImageHashManager 184 + 185 + #### Constructor 186 + 187 + ```javascript 188 + const hashManager = new ImageHashManager(); 189 + ``` 190 + 191 + #### Methods 192 + 193 + ##### `async calculateHash(filePath)` 194 + 195 + Calculate perceptual hash for an image file. 196 + 197 + **Parameters:** 198 + - `filePath` (string): Path to the image file 199 + 200 + **Returns:** Promise<string> - Perceptual hash 201 + 202 + ##### `storeHash(url, hash, filePath, metadata)` 203 + 204 + Store image hash in the database. 205 + 206 + **Parameters:** 207 + - `url` (string): Original URL of the image 208 + - `hash` (string): Perceptual hash 209 + - `filePath` (string): Path to cached file 210 + - `metadata` (Object): Optional metadata 211 + 212 + **Returns:** boolean - Success status 213 + 214 + ##### `async processImage(url, filePath, metadata)` 215 + 216 + Calculate hash and store in database. 217 + 218 + **Parameters:** 219 + - `url` (string): Original URL of the image 220 + - `filePath` (string): Path to cached file 221 + - `metadata` (Object): Optional metadata 222 + 223 + **Returns:** Promise<{hash: string, stored: boolean}> 224 + 225 + ##### `getHashByUrl(url)` 226 + 227 + Retrieve hash record by URL. 228 + 229 + **Parameters:** 230 + - `url` (string): URL to look up 231 + 232 + **Returns:** Object|null - Hash record or null 233 + 234 + ##### `findSimilarImages(hash, maxDistance)` 235 + 236 + Find similar images by comparing hashes. 237 + 238 + **Parameters:** 239 + - `hash` (string): Hash to compare 240 + - `maxDistance` (number): Maximum Hamming distance (default: 5) 241 + 242 + **Returns:** Array - Similar image records sorted by distance 243 + 244 + ##### `getAllHashes(limit)` 245 + 246 + Get all stored hashes. 247 + 248 + **Parameters:** 249 + - `limit` (number): Optional limit on results 250 + 251 + **Returns:** Array - Hash records 252 + 253 + ##### `deleteHashByUrl(url)` 254 + 255 + Delete hash record by URL. 256 + 257 + **Parameters:** 258 + - `url` (string): URL to delete 259 + 260 + **Returns:** boolean - Success status 261 + 262 + ##### `async cleanupOrphanedHashes()` 263 + 264 + Remove hash records for deleted files. 265 + 266 + **Returns:** Promise<number> - Number of cleaned records 267 + 268 + ##### `getStats()` 269 + 270 + Get database statistics. 271 + 272 + **Returns:** Object - Database statistics 273 + 274 + ##### `close()` 275 + 276 + Close database connection. 277 + 278 + ## Configuration 279 + 280 + The module uses the following from `config.js`: 281 + 282 + - `cacheDir`: Base cache directory (defaults to `./cache`) 283 + 284 + ## Database Location 285 + 286 + The SQLite database is stored at: 287 + 288 + ``` 289 + <project_root>/data/image_hashes.db 290 + ``` 291 + 292 + This directory is added to `.gitignore` to avoid committing the database. 293 + 294 + ## Performance Considerations 295 + 296 + 1. **Hash Calculation**: Perceptual hashing is CPU-intensive 297 + - Processing happens asynchronously after image download 298 + - Failures in hashing don't affect media caching 299 + 300 + 2. **Database Size**: Each record is small (~200 bytes) 301 + - 10,000 images ≈ 2MB database size 302 + - Indexes ensure fast lookups 303 + 304 + 3. **Similarity Search**: Linear scan for finding similar images 305 + - Fast for databases < 100,000 images 306 + - Consider implementing optimizations for larger databases 307 + 308 + ## Error Handling 309 + 310 + The module includes comprehensive error handling: 311 + 312 + - Hash calculation failures are logged but don't affect media caching 313 + - Database errors are caught and logged 314 + - Invalid file paths are handled gracefully 315 + - Missing dependencies are caught during initialization 316 + 317 + ## Troubleshooting 318 + 319 + ### Dependencies Not Installed 320 + 321 + If you see errors about missing modules: 322 + 323 + ```bash 324 + npm install imghash better-sqlite3 325 + ``` 326 + 327 + ### Database Locked 328 + 329 + If you get "database is locked" errors: 330 + - Ensure only one process accesses the database at a time 331 + - The database uses WAL mode for better concurrency 332 + 333 + ### Invalid Image Format 334 + 335 + Some images may fail to hash: 336 + - Ensure images are valid formats (JPEG, PNG, GIF, WebP) 337 + - Check file permissions 338 + - Verify images aren't corrupted 339 + 340 + ## Future Enhancements 341 + 342 + Potential improvements for the module: 343 + 344 + 1. **Advanced Similarity Search**: Implement spatial indexing for faster searches 345 + 2. **Duplicate Detection**: Add automatic duplicate detection and alerts 346 + 3. **Web Interface**: Create a web UI for browsing and comparing images 347 + 4. **Batch Processing**: Add parallel processing for large image sets 348 + 5. **Alternative Algorithms**: Support different perceptual hash algorithms 349 + 6. **Statistics Dashboard**: Visualize similarity clusters and duplicates 350 + 351 + ## Contributing 352 + 353 + When modifying this module: 354 + 355 + 1. Maintain backward compatibility with the database schema 356 + 2. Add appropriate error handling 357 + 3. Update tests and documentation 358 + 4. Consider performance implications 359 + 360 + ## License 361 + 362 + This module is part of the Stagehand project and follows the same license.
+91
examples/image-hash-example.js
··· 1 + /** 2 + * Example: Using the Image Hash Manager 3 + * 4 + * This example demonstrates how to use the perceptual hashing module 5 + * to detect duplicate and similar images. 6 + */ 7 + 8 + const ImageHashManager = require('./utils/imageHashManager'); 9 + const path = require('path'); 10 + 11 + async function example() { 12 + // Initialize the hash manager 13 + const hashManager = new ImageHashManager(); 14 + 15 + console.log('=== Image Perceptual Hashing Example ===\n'); 16 + 17 + try { 18 + // Example 1: Process an image 19 + console.log('1. Processing an image:'); 20 + const imagePath = path.join(__dirname, 'cache', 'images', 'd9fa3f2934bb020b7612b6b367184ff9.webp'); 21 + const imageUrl = 'https://example.com/test-image.jpg'; 22 + 23 + // Note: This will only work if the image exists in your cache 24 + // const result = await hashManager.processImage(imageUrl, imagePath); 25 + // console.log(' Hash:', result.hash); 26 + // console.log(' Stored:', result.stored); 27 + console.log(' (Run with actual cached images)\n'); 28 + 29 + // Example 2: Get database statistics 30 + console.log('2. Database statistics:'); 31 + const stats = hashManager.getStats(); 32 + console.log(' Total images:', stats.totalImages); 33 + console.log(' Images (last 7 days):', stats.imagesLastWeek); 34 + console.log(' Database path:', stats.databasePath); 35 + console.log(''); 36 + 37 + // Example 3: List recent hashes 38 + console.log('3. Recent image hashes:'); 39 + const recentHashes = hashManager.getAllHashes(5); 40 + recentHashes.forEach((record, index) => { 41 + console.log(` ${index + 1}. ${record.perceptual_hash}`); 42 + console.log(` URL: ${record.url.substring(0, 60)}...`); 43 + }); 44 + console.log(''); 45 + 46 + // Example 4: Find similar images 47 + if (recentHashes.length > 0) { 48 + console.log('4. Finding similar images:'); 49 + const testHash = recentHashes[0].perceptual_hash; 50 + const similar = hashManager.findSimilarImages(testHash, 5); 51 + console.log(` Found ${similar.length} similar image(s) to hash ${testHash}`); 52 + similar.forEach((img, index) => { 53 + console.log(` ${index + 1}. Distance: ${img.distance}, URL: ${img.url.substring(0, 50)}...`); 54 + }); 55 + console.log(''); 56 + } 57 + 58 + // Example 5: Search by URL 59 + if (recentHashes.length > 0) { 60 + console.log('5. Searching by URL:'); 61 + const testUrl = recentHashes[0].url; 62 + const found = hashManager.getHashByUrl(testUrl); 63 + if (found) { 64 + console.log(` Found: ${found.perceptual_hash}`); 65 + console.log(` Cached at: ${found.cached_at}`); 66 + } 67 + console.log(''); 68 + } 69 + 70 + // Example 6: Cleanup orphaned hashes 71 + console.log('6. Cleaning up orphaned records:'); 72 + const cleaned = await hashManager.cleanupOrphanedHashes(); 73 + console.log(` Removed ${cleaned} orphaned record(s)`); 74 + console.log(''); 75 + 76 + console.log('=== Example Complete ==='); 77 + 78 + } catch (error) { 79 + console.error('Error:', error); 80 + } finally { 81 + // Always close the database connection 82 + hashManager.close(); 83 + } 84 + } 85 + 86 + // Run the example 87 + if (require.main === module) { 88 + example().catch(console.error); 89 + } 90 + 91 + module.exports = example;
+303
image-hash-tool.js
··· 1 + #!/usr/bin/env node 2 + 3 + /** 4 + * Image Hash Management Utility 5 + * Provides command-line tools for managing and querying the image hash database 6 + */ 7 + 8 + const ImageHashManager = require('./utils/imageHashManager'); 9 + const path = require('path'); 10 + const fs = require('fs-extra'); 11 + 12 + const hashManager = new ImageHashManager(); 13 + 14 + /** 15 + * Display help information 16 + */ 17 + function showHelp() { 18 + console.log(` 19 + Image Hash Management Utility 20 + 21 + Usage: node image-hash-tool.js <command> [options] 22 + 23 + Commands: 24 + stats Show database statistics 25 + list [limit] List all stored hashes (optionally limit results) 26 + hash <filepath> Calculate and display hash for an image 27 + process <url> <filepath> Process an image and store it in the database 28 + similar <hash> [distance] Find similar images to the given hash 29 + search <url> Look up hash by URL 30 + cleanup Remove hash records for deleted files 31 + scan-cache Scan cache directory and hash all images 32 + 33 + Examples: 34 + node image-hash-tool.js stats 35 + node image-hash-tool.js list 10 36 + node image-hash-tool.js hash cache/images/example.jpg 37 + node image-hash-tool.js similar 8a3c5e9f 5 38 + node image-hash-tool.js scan-cache 39 + `); 40 + } 41 + 42 + /** 43 + * Show database statistics 44 + */ 45 + function showStats() { 46 + const stats = hashManager.getStats(); 47 + if (stats) { 48 + console.log('\n=== Image Hash Database Statistics ==='); 49 + console.log(`Total Images: ${stats.totalImages}`); 50 + console.log(`Images (Last 7 Days): ${stats.imagesLastWeek}`); 51 + console.log(`Database Path: ${stats.databasePath}`); 52 + } 53 + } 54 + 55 + /** 56 + * List all hashes 57 + */ 58 + function listHashes(limit) { 59 + const hashes = hashManager.getAllHashes(limit ? parseInt(limit) : null); 60 + console.log(`\n=== Stored Image Hashes (${hashes.length} results) ===\n`); 61 + 62 + hashes.forEach((record, index) => { 63 + console.log(`${index + 1}. Hash: ${record.perceptual_hash}`); 64 + console.log(` URL: ${record.url.substring(0, 80)}...`); 65 + if (record.alternate_urls && record.alternate_urls.length > 0) { 66 + console.log(` Alternate URLs (${record.alternate_urls.length}):`); 67 + record.alternate_urls.forEach(altUrl => { 68 + console.log(` - ${altUrl.substring(0, 76)}...`); 69 + }); 70 + } 71 + console.log(` File: ${record.file_path}`); 72 + console.log(` Cached: ${record.cached_at}`); 73 + console.log(''); 74 + }); 75 + } 76 + 77 + /** 78 + * Calculate hash for a single image 79 + */ 80 + async function calculateHash(filePath) { 81 + try { 82 + console.log(`\nCalculating hash for: ${filePath}`); 83 + const hash = await hashManager.calculateHash(filePath); 84 + console.log(`Perceptual Hash: ${hash}`); 85 + return hash; 86 + } catch (error) { 87 + console.error(`Error: ${error.message}`); 88 + } 89 + } 90 + 91 + /** 92 + * Process an image and store in database 93 + */ 94 + async function processImage(url, filePath) { 95 + try { 96 + console.log(`\nProcessing image...`); 97 + console.log(`URL: ${url}`); 98 + console.log(`File: ${filePath}`); 99 + 100 + const result = await hashManager.processImage(url, filePath); 101 + 102 + console.log(`\nSuccess!`); 103 + console.log(`Hash: ${result.hash}`); 104 + console.log(`Stored: ${result.stored}`); 105 + } catch (error) { 106 + console.error(`Error: ${error.message}`); 107 + } 108 + } 109 + 110 + /** 111 + * Find similar images 112 + */ 113 + function findSimilar(hash, maxDistance) { 114 + const distance = maxDistance ? parseInt(maxDistance) : 5; 115 + console.log(`\nSearching for images similar to: ${hash}`); 116 + console.log(`Maximum distance: ${distance}\n`); 117 + 118 + const similar = hashManager.findSimilarImages(hash, distance); 119 + 120 + if (similar.length === 0) { 121 + console.log('No similar images found.'); 122 + return; 123 + } 124 + 125 + console.log(`Found ${similar.length} similar image(s):\n`); 126 + similar.forEach((record, index) => { 127 + console.log(`${index + 1}. Distance: ${record.distance}`); 128 + console.log(` Hash: ${record.perceptual_hash}`); 129 + console.log(` URL: ${record.url.substring(0, 80)}...`); 130 + console.log(` File: ${record.file_path}`); 131 + console.log(''); 132 + }); 133 + } 134 + 135 + /** 136 + * Search by URL 137 + */ 138 + function searchByUrl(url) { 139 + console.log(`\nSearching for URL: ${url}\n`); 140 + 141 + const result = hashManager.getHashByUrl(url); 142 + 143 + if (!result) { 144 + console.log('No record found for this URL.'); 145 + return; 146 + } 147 + 148 + console.log('Found record:'); 149 + console.log(`Hash: ${result.perceptual_hash}`); 150 + console.log(`File: ${result.file_path}`); 151 + console.log(`Cached: ${result.cached_at}`); 152 + if (result.alternate_urls && result.alternate_urls.length > 0) { 153 + console.log(`Alternate URLs (${result.alternate_urls.length}):`); 154 + result.alternate_urls.forEach(altUrl => { 155 + console.log(` - ${altUrl}`); 156 + }); 157 + } 158 + if (result.metadata) { 159 + console.log(`Metadata: ${JSON.stringify(result.metadata, null, 2)}`); 160 + } 161 + } 162 + 163 + /** 164 + * Cleanup orphaned hashes 165 + */ 166 + async function cleanup() { 167 + console.log('\nCleaning up orphaned hash records...'); 168 + const count = await hashManager.cleanupOrphanedHashes(); 169 + console.log(`Cleanup complete. Removed ${count} orphaned records.`); 170 + } 171 + 172 + /** 173 + * Scan cache directory and hash all images 174 + */ 175 + async function scanCache() { 176 + const cacheDir = path.join(__dirname, 'cache', 'images'); 177 + 178 + console.log(`\nScanning cache directory: ${cacheDir}\n`); 179 + 180 + if (!await fs.pathExists(cacheDir)) { 181 + console.error('Cache directory does not exist.'); 182 + return; 183 + } 184 + 185 + const files = await fs.readdir(cacheDir); 186 + const imageExtensions = ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']; 187 + 188 + let processed = 0; 189 + let skipped = 0; 190 + let errors = 0; 191 + 192 + for (const file of files) { 193 + const filePath = path.join(cacheDir, file); 194 + const ext = path.extname(file).toLowerCase(); 195 + 196 + if (!imageExtensions.includes(ext)) { 197 + skipped++; 198 + continue; 199 + } 200 + 201 + try { 202 + // Generate a placeholder URL based on filename 203 + const url = `file://${filePath}`; 204 + 205 + // Check if already processed 206 + const existing = hashManager.getHashByUrl(url); 207 + if (existing) { 208 + console.log(`Skipping ${file} (already processed)`); 209 + skipped++; 210 + continue; 211 + } 212 + 213 + console.log(`Processing: ${file}`); 214 + await hashManager.processImage(url, filePath); 215 + processed++; 216 + } catch (error) { 217 + console.error(`Error processing ${file}: ${error.message}`); 218 + errors++; 219 + } 220 + } 221 + 222 + console.log(`\nScan complete!`); 223 + console.log(`Processed: ${processed}`); 224 + console.log(`Skipped: ${skipped}`); 225 + console.log(`Errors: ${errors}`); 226 + } 227 + 228 + /** 229 + * Main function 230 + */ 231 + async function main() { 232 + const args = process.argv.slice(2); 233 + const command = args[0]; 234 + 235 + if (!command || command === 'help' || command === '--help' || command === '-h') { 236 + showHelp(); 237 + hashManager.close(); 238 + return; 239 + } 240 + 241 + try { 242 + switch (command) { 243 + case 'stats': 244 + showStats(); 245 + break; 246 + 247 + case 'list': 248 + listHashes(args[1]); 249 + break; 250 + 251 + case 'hash': 252 + if (!args[1]) { 253 + console.error('Error: Please provide a file path'); 254 + break; 255 + } 256 + await calculateHash(args[1]); 257 + break; 258 + 259 + case 'process': 260 + if (!args[1] || !args[2]) { 261 + console.error('Error: Please provide both URL and file path'); 262 + break; 263 + } 264 + await processImage(args[1], args[2]); 265 + break; 266 + 267 + case 'similar': 268 + if (!args[1]) { 269 + console.error('Error: Please provide a hash to search for'); 270 + break; 271 + } 272 + findSimilar(args[1], args[2]); 273 + break; 274 + 275 + case 'search': 276 + if (!args[1]) { 277 + console.error('Error: Please provide a URL to search for'); 278 + break; 279 + } 280 + searchByUrl(args[1]); 281 + break; 282 + 283 + case 'cleanup': 284 + await cleanup(); 285 + break; 286 + 287 + case 'scan-cache': 288 + await scanCache(); 289 + break; 290 + 291 + default: 292 + console.error(`Unknown command: ${command}`); 293 + showHelp(); 294 + } 295 + } catch (error) { 296 + console.error('Error:', error); 297 + } finally { 298 + hashManager.close(); 299 + } 300 + } 301 + 302 + // Run main function 303 + main();
+549 -1
package-lock.json
··· 12 12 "@atproto/api": "^0.14.21", 13 13 "@ffmpeg-installer/ffmpeg": "^1.1.0", 14 14 "axios": "^1.4.0", 15 + "better-sqlite3": "^12.6.2", 15 16 "cheerio": "^1.0.0-rc.12", 16 17 "crypto-js": "^4.2.0", 17 18 "dotenv": "^16.0.3", ··· 19 20 "fluent-ffmpeg": "^2.1.3", 20 21 "form-data": "^4.0.2", 21 22 "fs-extra": "^11.1.1", 23 + "imghash": "^1.1.2", 22 24 "node-cron": "^3.0.2", 23 25 "node-telegram-bot-api": "^0.61.0" 24 26 }, ··· 76 78 "dependencies": { 77 79 "@atproto/lexicon": "^0.4.10", 78 80 "zod": "^3.23.8" 81 + } 82 + }, 83 + "node_modules/@canvas/image": { 84 + "version": "2.0.0", 85 + "resolved": "https://registry.npmjs.org/@canvas/image/-/image-2.0.0.tgz", 86 + "integrity": "sha512-DQKEftZ5M4eM8Rzhv8FFZGdOsO8bVjzPC/9pBF+P//HjQM/Xf+PvQRTv97mVpsHt+Pejs1V7ooK6xf+dYgDNpw==", 87 + "license": "MIT", 88 + "dependencies": { 89 + "@canvas/image-data": "^1.0.0", 90 + "@cwasm/jpeg-turbo": "^0.1.1", 91 + "@cwasm/lodepng": "^0.1.2", 92 + "@cwasm/nsbmp": "^0.1.0", 93 + "@cwasm/nsgif": "^0.1.0", 94 + "@cwasm/webp": "^0.1.3", 95 + "decode-ico": "^0.4.1", 96 + "fast-base64-decode": "^1.0.0", 97 + "fast-base64-encode": "^1.0.0", 98 + "fast-base64-length": "^1.0.0", 99 + "simple-get": "^4.0.1" 100 + }, 101 + "engines": { 102 + "node": ">=10" 103 + } 104 + }, 105 + "node_modules/@canvas/image-data": { 106 + "version": "1.1.0", 107 + "resolved": "https://registry.npmjs.org/@canvas/image-data/-/image-data-1.1.0.tgz", 108 + "integrity": "sha512-QdObRRjRbcXGmM1tmJ+MrHcaz1MftF2+W7YI+MsphnsCrmtyfS0d5qJbk0MeSbUeyM/jCb0hmnkXPsy026L7dA==", 109 + "license": "MIT" 110 + }, 111 + "node_modules/@cwasm/jpeg-turbo": { 112 + "version": "0.1.3", 113 + "resolved": "https://registry.npmjs.org/@cwasm/jpeg-turbo/-/jpeg-turbo-0.1.3.tgz", 114 + "integrity": "sha512-FkZxwwC6r4zhzlqM0nYGaMj/MDSrZPxLOdPdM6ySlgsMfOpNAZcLQkpNF4jP+DmsuUvRoeUD0YSMBvg3jYfK6w==", 115 + "license": "MIT", 116 + "dependencies": { 117 + "@canvas/image-data": "^1.0.0" 118 + } 119 + }, 120 + "node_modules/@cwasm/lodepng": { 121 + "version": "0.1.9", 122 + "resolved": "https://registry.npmjs.org/@cwasm/lodepng/-/lodepng-0.1.9.tgz", 123 + "integrity": "sha512-vb2H7/jTxnqJi7hHiEgtFm3smIqVpeY417vN+8cwsq3iTNrHzwnMzFXbeCf2H9Dl13Vh64qgUUcC2mxf6TPODA==", 124 + "license": "MIT", 125 + "dependencies": { 126 + "@canvas/image-data": "^1.0.0" 127 + }, 128 + "engines": { 129 + "node": ">=8.0.0" 130 + } 131 + }, 132 + "node_modules/@cwasm/nsbmp": { 133 + "version": "0.1.3", 134 + "resolved": "https://registry.npmjs.org/@cwasm/nsbmp/-/nsbmp-0.1.3.tgz", 135 + "integrity": "sha512-APiz9Rj2E049rBapTtwnCGqQeqJjmC85busDQ44UCaujU+LoggUm06NuS1WIBAZcDMRbJOgCFcRWc0tMT2kpfg==", 136 + "license": "MIT", 137 + "dependencies": { 138 + "@canvas/image-data": "^1.0.0" 139 + } 140 + }, 141 + "node_modules/@cwasm/nsgif": { 142 + "version": "0.1.2", 143 + "resolved": "https://registry.npmjs.org/@cwasm/nsgif/-/nsgif-0.1.2.tgz", 144 + "integrity": "sha512-LOD5HlL0O5jpnIAl+dLSZcB3v0RBNBjtoaymdCEPe2kyKzaP20BF+jy/QUyOZogQsgMVjusZES3tgwwoiiJ2rA==", 145 + "license": "MIT", 146 + "dependencies": { 147 + "@canvas/image-data": "^1.0.0" 148 + } 149 + }, 150 + "node_modules/@cwasm/webp": { 151 + "version": "0.1.5", 152 + "resolved": "https://registry.npmjs.org/@cwasm/webp/-/webp-0.1.5.tgz", 153 + "integrity": "sha512-ceIZQkyxK+s7mmItNcWqqHdOBiJAxYxTnrnPNgUNjldB1M9j+Bp/3eVIVwC8rUFyN/zoFwuT0331pyY3ackaNA==", 154 + "license": "MIT", 155 + "dependencies": { 156 + "@canvas/image-data": "^1.0.0" 79 157 } 80 158 }, 81 159 "node_modules/@derhuerst/http-basic": { ··· 415 493 "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", 416 494 "dev": true 417 495 }, 496 + "node_modules/base64-js": { 497 + "version": "1.5.1", 498 + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", 499 + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", 500 + "funding": [ 501 + { 502 + "type": "github", 503 + "url": "https://github.com/sponsors/feross" 504 + }, 505 + { 506 + "type": "patreon", 507 + "url": "https://www.patreon.com/feross" 508 + }, 509 + { 510 + "type": "consulting", 511 + "url": "https://feross.org/support" 512 + } 513 + ], 514 + "license": "MIT" 515 + }, 418 516 "node_modules/bcrypt-pbkdf": { 419 517 "version": "1.0.2", 420 518 "resolved": "https://registry.npmjs.org/bcrypt-pbkdf/-/bcrypt-pbkdf-1.0.2.tgz", ··· 423 521 "tweetnacl": "^0.14.3" 424 522 } 425 523 }, 524 + "node_modules/better-sqlite3": { 525 + "version": "12.6.2", 526 + "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.6.2.tgz", 527 + "integrity": "sha512-8VYKM3MjCa9WcaSAI3hzwhmyHVlH8tiGFwf0RlTsZPWJ1I5MkzjiudCo4KC4DxOaL/53A5B1sI/IbldNFDbsKA==", 528 + "hasInstallScript": true, 529 + "license": "MIT", 530 + "dependencies": { 531 + "bindings": "^1.5.0", 532 + "prebuild-install": "^7.1.1" 533 + }, 534 + "engines": { 535 + "node": "20.x || 22.x || 23.x || 24.x || 25.x" 536 + } 537 + }, 426 538 "node_modules/binary-extensions": { 427 539 "version": "2.3.0", 428 540 "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", ··· 435 547 "url": "https://github.com/sponsors/sindresorhus" 436 548 } 437 549 }, 550 + "node_modules/bindings": { 551 + "version": "1.5.0", 552 + "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", 553 + "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", 554 + "license": "MIT", 555 + "dependencies": { 556 + "file-uri-to-path": "1.0.0" 557 + } 558 + }, 438 559 "node_modules/bl": { 439 560 "version": "1.2.3", 440 561 "resolved": "https://registry.npmjs.org/bl/-/bl-1.2.3.tgz", ··· 444 565 "safe-buffer": "^5.1.1" 445 566 } 446 567 }, 568 + "node_modules/blockhash-core": { 569 + "version": "0.1.0", 570 + "resolved": "https://registry.npmjs.org/blockhash-core/-/blockhash-core-0.1.0.tgz", 571 + "integrity": "sha512-Cv7BgBo0jjVPaeuel4cvxf9LqIGsYNIPz9DAGvvrF9LRlEq9Q3HXu+S8bklPCae0sCxAXic4HGMoImf3FeO3Nw==", 572 + "license": "MIT" 573 + }, 447 574 "node_modules/bluebird": { 448 575 "version": "3.7.2", 449 576 "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.7.2.tgz", ··· 476 603 "node": ">=8" 477 604 } 478 605 }, 606 + "node_modules/buffer": { 607 + "version": "5.7.1", 608 + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", 609 + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", 610 + "funding": [ 611 + { 612 + "type": "github", 613 + "url": "https://github.com/sponsors/feross" 614 + }, 615 + { 616 + "type": "patreon", 617 + "url": "https://www.patreon.com/feross" 618 + }, 619 + { 620 + "type": "consulting", 621 + "url": "https://feross.org/support" 622 + } 623 + ], 624 + "license": "MIT", 625 + "dependencies": { 626 + "base64-js": "^1.3.1", 627 + "ieee754": "^1.1.13" 628 + } 629 + }, 479 630 "node_modules/buffer-from": { 480 631 "version": "1.1.2", 481 632 "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", ··· 595 746 "fsevents": "~2.3.2" 596 747 } 597 748 }, 749 + "node_modules/chownr": { 750 + "version": "1.1.4", 751 + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", 752 + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", 753 + "license": "ISC" 754 + }, 598 755 "node_modules/combined-stream": { 599 756 "version": "1.0.8", 600 757 "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", ··· 745 902 "ms": "^2.1.1" 746 903 } 747 904 }, 905 + "node_modules/decode-bmp": { 906 + "version": "0.2.1", 907 + "resolved": "https://registry.npmjs.org/decode-bmp/-/decode-bmp-0.2.1.tgz", 908 + "integrity": "sha512-NiOaGe+GN0KJqi2STf24hfMkFitDUaIoUU3eKvP/wAbLe8o6FuW5n/x7MHPR0HKvBokp6MQY/j7w8lewEeVCIA==", 909 + "license": "MIT", 910 + "dependencies": { 911 + "@canvas/image-data": "^1.0.0", 912 + "to-data-view": "^1.1.0" 913 + }, 914 + "engines": { 915 + "node": ">=8.6.0" 916 + } 917 + }, 918 + "node_modules/decode-ico": { 919 + "version": "0.4.1", 920 + "resolved": "https://registry.npmjs.org/decode-ico/-/decode-ico-0.4.1.tgz", 921 + "integrity": "sha512-69NZfbKIzux1vBOd31al3XnMnH+2mqDhEgLdpygErm4d60N+UwA5Sq5WFjmEDQzumgB9fElojGwWG0vybVfFmA==", 922 + "license": "MIT", 923 + "dependencies": { 924 + "@canvas/image-data": "^1.0.0", 925 + "decode-bmp": "^0.2.0", 926 + "to-data-view": "^1.1.0" 927 + }, 928 + "engines": { 929 + "node": ">=8.6" 930 + } 931 + }, 932 + "node_modules/decompress-response": { 933 + "version": "6.0.0", 934 + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", 935 + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", 936 + "license": "MIT", 937 + "dependencies": { 938 + "mimic-response": "^3.1.0" 939 + }, 940 + "engines": { 941 + "node": ">=10" 942 + }, 943 + "funding": { 944 + "url": "https://github.com/sponsors/sindresorhus" 945 + } 946 + }, 947 + "node_modules/deep-extend": { 948 + "version": "0.6.0", 949 + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", 950 + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", 951 + "license": "MIT", 952 + "engines": { 953 + "node": ">=4.0.0" 954 + } 955 + }, 748 956 "node_modules/define-data-property": { 749 957 "version": "1.1.4", 750 958 "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", ··· 783 991 "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", 784 992 "engines": { 785 993 "node": ">=0.4.0" 994 + } 995 + }, 996 + "node_modules/detect-libc": { 997 + "version": "2.1.2", 998 + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", 999 + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", 1000 + "license": "Apache-2.0", 1001 + "engines": { 1002 + "node": ">=8" 786 1003 } 787 1004 }, 788 1005 "node_modules/dom-serializer": { ··· 1046 1263 "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-3.1.2.tgz", 1047 1264 "integrity": "sha512-tvtQIeLVHjDkJYnzf2dgVMxfuSGJeM/7UCG17TT4EumTfNtF+0nebF/4zWOIkCreAbtNqhGEboB6BWrwqNaw4Q==" 1048 1265 }, 1266 + "node_modules/expand-template": { 1267 + "version": "2.0.3", 1268 + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", 1269 + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", 1270 + "license": "(MIT OR WTFPL)", 1271 + "engines": { 1272 + "node": ">=6" 1273 + } 1274 + }, 1049 1275 "node_modules/extend": { 1050 1276 "version": "3.0.2", 1051 1277 "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", ··· 1059 1285 "node >=0.6.0" 1060 1286 ] 1061 1287 }, 1288 + "node_modules/fast-base64-decode": { 1289 + "version": "1.0.0", 1290 + "resolved": "https://registry.npmjs.org/fast-base64-decode/-/fast-base64-decode-1.0.0.tgz", 1291 + "integrity": "sha512-qwaScUgUGBYeDNRnbc/KyllVU88Jk1pRHPStuF/lO7B0/RTRLj7U0lkdTAutlBblY08rwZDff6tNU9cjv6j//Q==", 1292 + "license": "MIT" 1293 + }, 1294 + "node_modules/fast-base64-encode": { 1295 + "version": "1.0.0", 1296 + "resolved": "https://registry.npmjs.org/fast-base64-encode/-/fast-base64-encode-1.0.0.tgz", 1297 + "integrity": "sha512-z2XCzVK4fde2cuTEHu2QGkLD6BPtJNKJPn0Z7oINvmhq/quUuIIVPYKUdN0gYeZqOyurjJjBH/bUzK5gafyHvw==", 1298 + "license": "MIT" 1299 + }, 1300 + "node_modules/fast-base64-length": { 1301 + "version": "1.0.0", 1302 + "resolved": "https://registry.npmjs.org/fast-base64-length/-/fast-base64-length-1.0.0.tgz", 1303 + "integrity": "sha512-MV+/ioblHx6SMjc/1l4EAnRJyAku6+6DxZ6RW0FoFCF1Aol/Ldb6FqwE3Kn3Ju1aam2m1KCIVoCljhgcG+Umzg==", 1304 + "license": "MIT" 1305 + }, 1062 1306 "node_modules/fast-deep-equal": { 1063 1307 "version": "3.1.3", 1064 1308 "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", ··· 1092 1336 "engines": { 1093 1337 "node": ">=0.10.0" 1094 1338 } 1339 + }, 1340 + "node_modules/file-uri-to-path": { 1341 + "version": "1.0.0", 1342 + "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", 1343 + "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", 1344 + "license": "MIT" 1095 1345 }, 1096 1346 "node_modules/fill-range": { 1097 1347 "version": "7.1.1", ··· 1174 1424 "node": ">= 6" 1175 1425 } 1176 1426 }, 1427 + "node_modules/fs-constants": { 1428 + "version": "1.0.0", 1429 + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", 1430 + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", 1431 + "license": "MIT" 1432 + }, 1177 1433 "node_modules/fs-extra": { 1178 1434 "version": "11.3.0", 1179 1435 "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.3.0.tgz", ··· 1295 1551 "assert-plus": "^1.0.0" 1296 1552 } 1297 1553 }, 1554 + "node_modules/github-from-package": { 1555 + "version": "0.0.0", 1556 + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", 1557 + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", 1558 + "license": "MIT" 1559 + }, 1298 1560 "node_modules/glob-parent": { 1299 1561 "version": "5.1.2", 1300 1562 "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz", ··· 1527 1789 "node": ">=0.10.0" 1528 1790 } 1529 1791 }, 1792 + "node_modules/ieee754": { 1793 + "version": "1.2.1", 1794 + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", 1795 + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", 1796 + "funding": [ 1797 + { 1798 + "type": "github", 1799 + "url": "https://github.com/sponsors/feross" 1800 + }, 1801 + { 1802 + "type": "patreon", 1803 + "url": "https://www.patreon.com/feross" 1804 + }, 1805 + { 1806 + "type": "consulting", 1807 + "url": "https://feross.org/support" 1808 + } 1809 + ], 1810 + "license": "BSD-3-Clause" 1811 + }, 1530 1812 "node_modules/ignore-by-default": { 1531 1813 "version": "1.0.1", 1532 1814 "resolved": "https://registry.npmjs.org/ignore-by-default/-/ignore-by-default-1.0.1.tgz", 1533 1815 "integrity": "sha512-Ius2VYcGNk7T90CppJqcIkS5ooHUZyIQK+ClZfMfMNFEF9VSE73Fq+906u/CWu92x4gzZMWOwfFYckPObzdEbA==", 1534 1816 "dev": true 1535 1817 }, 1818 + "node_modules/image-type": { 1819 + "version": "4.1.0", 1820 + "resolved": "https://registry.npmjs.org/image-type/-/image-type-4.1.0.tgz", 1821 + "integrity": "sha512-CFJMJ8QK8lJvRlTCEgarL4ro6hfDQKif2HjSvYCdQZESaIPV4v9imrf7BQHK+sQeTeNeMpWciR9hyC/g8ybXEg==", 1822 + "license": "MIT", 1823 + "dependencies": { 1824 + "file-type": "^10.10.0" 1825 + }, 1826 + "engines": { 1827 + "node": ">=6" 1828 + } 1829 + }, 1830 + "node_modules/image-type/node_modules/file-type": { 1831 + "version": "10.11.0", 1832 + "resolved": "https://registry.npmjs.org/file-type/-/file-type-10.11.0.tgz", 1833 + "integrity": "sha512-uzk64HRpUZyTGZtVuvrjP0FYxzQrBf4rojot6J65YMEbwBLB0CWm0CLojVpwpmFmxcE/lkvYICgfcGozbBq6rw==", 1834 + "license": "MIT", 1835 + "engines": { 1836 + "node": ">=6" 1837 + } 1838 + }, 1839 + "node_modules/imghash": { 1840 + "version": "1.1.2", 1841 + "resolved": "https://registry.npmjs.org/imghash/-/imghash-1.1.2.tgz", 1842 + "integrity": "sha512-RRb0ss/B5yltehhtVLk/41PW+UKP6bBdpt08IRsfM8X5YNU+jIKgApZDb8RFqbb6QC+lOwGJ6niYk1BCusuThw==", 1843 + "license": "MIT", 1844 + "dependencies": { 1845 + "@canvas/image": "^2.0.0", 1846 + "blockhash-core": "^0.1.0", 1847 + "image-type": "^4.1.0", 1848 + "jpeg-js": "^0.4.1" 1849 + }, 1850 + "engines": { 1851 + "node": ">=20" 1852 + } 1853 + }, 1536 1854 "node_modules/inherits": { 1537 1855 "version": "2.0.4", 1538 1856 "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", 1539 1857 "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" 1858 + }, 1859 + "node_modules/ini": { 1860 + "version": "1.3.8", 1861 + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", 1862 + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", 1863 + "license": "ISC" 1540 1864 }, 1541 1865 "node_modules/internal-slot": { 1542 1866 "version": "1.1.0", ··· 1908 2232 "resolved": "https://registry.npmjs.org/isstream/-/isstream-0.1.2.tgz", 1909 2233 "integrity": "sha512-Yljz7ffyPbrLpLngrMtZ7NduUgVvi6wG9RJ9IUcyCd59YQ911PBJphODUcbOVbqYfxe1wuYf/LJ8PauMRwsM/g==" 1910 2234 }, 2235 + "node_modules/jpeg-js": { 2236 + "version": "0.4.4", 2237 + "resolved": "https://registry.npmjs.org/jpeg-js/-/jpeg-js-0.4.4.tgz", 2238 + "integrity": "sha512-WZzeDOEtTOBK4Mdsar0IqEU5sMr3vSV2RqkAIzUEV2BHnUfKGyswWFPFwK5EeDo93K3FohSHbLAjj0s1Wzd+dg==", 2239 + "license": "BSD-3-Clause" 2240 + }, 1911 2241 "node_modules/jsbn": { 1912 2242 "version": "0.1.1", 1913 2243 "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", ··· 1996 2326 "node": ">= 0.6" 1997 2327 } 1998 2328 }, 2329 + "node_modules/mimic-response": { 2330 + "version": "3.1.0", 2331 + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", 2332 + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", 2333 + "license": "MIT", 2334 + "engines": { 2335 + "node": ">=10" 2336 + }, 2337 + "funding": { 2338 + "url": "https://github.com/sponsors/sindresorhus" 2339 + } 2340 + }, 1999 2341 "node_modules/minimatch": { 2000 2342 "version": "3.1.2", 2001 2343 "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", ··· 2008 2350 "node": "*" 2009 2351 } 2010 2352 }, 2353 + "node_modules/minimist": { 2354 + "version": "1.2.8", 2355 + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", 2356 + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", 2357 + "license": "MIT", 2358 + "funding": { 2359 + "url": "https://github.com/sponsors/ljharb" 2360 + } 2361 + }, 2362 + "node_modules/mkdirp-classic": { 2363 + "version": "0.5.3", 2364 + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", 2365 + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", 2366 + "license": "MIT" 2367 + }, 2011 2368 "node_modules/ms": { 2012 2369 "version": "2.1.3", 2013 2370 "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", ··· 2017 2374 "version": "9.9.0", 2018 2375 "resolved": "https://registry.npmjs.org/multiformats/-/multiformats-9.9.0.tgz", 2019 2376 "integrity": "sha512-HoMUjhH9T8DDBNT+6xzkrd9ga/XiBI4xLr58LJACwK6G3HTOPeMz4nB4KJs33L2BelrIJa7P0VuNaVF3hMYfjg==" 2377 + }, 2378 + "node_modules/napi-build-utils": { 2379 + "version": "2.0.0", 2380 + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", 2381 + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", 2382 + "license": "MIT" 2383 + }, 2384 + "node_modules/node-abi": { 2385 + "version": "3.86.0", 2386 + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.86.0.tgz", 2387 + "integrity": "sha512-sn9Et4N3ynsetj3spsZR729DVlGH6iBG4RiDMV7HEp3guyOW6W3S0unGpLDxT50mXortGUMax/ykUNQXdqc/Xg==", 2388 + "license": "MIT", 2389 + "dependencies": { 2390 + "semver": "^7.3.5" 2391 + }, 2392 + "engines": { 2393 + "node": ">=10" 2394 + } 2020 2395 }, 2021 2396 "node_modules/node-cron": { 2022 2397 "version": "3.0.3", ··· 2247 2622 "node": ">= 0.4" 2248 2623 } 2249 2624 }, 2625 + "node_modules/prebuild-install": { 2626 + "version": "7.1.3", 2627 + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", 2628 + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", 2629 + "license": "MIT", 2630 + "dependencies": { 2631 + "detect-libc": "^2.0.0", 2632 + "expand-template": "^2.0.3", 2633 + "github-from-package": "0.0.0", 2634 + "minimist": "^1.2.3", 2635 + "mkdirp-classic": "^0.5.3", 2636 + "napi-build-utils": "^2.0.0", 2637 + "node-abi": "^3.3.0", 2638 + "pump": "^3.0.0", 2639 + "rc": "^1.2.7", 2640 + "simple-get": "^4.0.0", 2641 + "tar-fs": "^2.0.0", 2642 + "tunnel-agent": "^0.6.0" 2643 + }, 2644 + "bin": { 2645 + "prebuild-install": "bin.js" 2646 + }, 2647 + "engines": { 2648 + "node": ">=10" 2649 + } 2650 + }, 2651 + "node_modules/prebuild-install/node_modules/pump": { 2652 + "version": "3.0.3", 2653 + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", 2654 + "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", 2655 + "license": "MIT", 2656 + "dependencies": { 2657 + "end-of-stream": "^1.1.0", 2658 + "once": "^1.3.1" 2659 + } 2660 + }, 2250 2661 "node_modules/process-nextick-args": { 2251 2662 "version": "2.0.1", 2252 2663 "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", ··· 2308 2719 "node": ">=0.6" 2309 2720 } 2310 2721 }, 2722 + "node_modules/rc": { 2723 + "version": "1.2.8", 2724 + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", 2725 + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", 2726 + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", 2727 + "dependencies": { 2728 + "deep-extend": "^0.6.0", 2729 + "ini": "~1.3.0", 2730 + "minimist": "^1.2.0", 2731 + "strip-json-comments": "~2.0.1" 2732 + }, 2733 + "bin": { 2734 + "rc": "cli.js" 2735 + } 2736 + }, 2311 2737 "node_modules/readable-stream": { 2312 2738 "version": "2.3.8", 2313 2739 "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", ··· 2551 2977 "version": "7.7.1", 2552 2978 "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.1.tgz", 2553 2979 "integrity": "sha512-hlq8tAfn0m/61p4BVRcPzIGr6LKiMwo4VM6dGi6pt4qcRkmNzTcWq6eCEjEh+qXjkMDvPlOFFSGwQjoEa6gyMA==", 2554 - "dev": true, 2555 2980 "bin": { 2556 2981 "semver": "bin/semver.js" 2557 2982 }, ··· 2670 3095 "url": "https://github.com/sponsors/ljharb" 2671 3096 } 2672 3097 }, 3098 + "node_modules/simple-concat": { 3099 + "version": "1.0.1", 3100 + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", 3101 + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", 3102 + "funding": [ 3103 + { 3104 + "type": "github", 3105 + "url": "https://github.com/sponsors/feross" 3106 + }, 3107 + { 3108 + "type": "patreon", 3109 + "url": "https://www.patreon.com/feross" 3110 + }, 3111 + { 3112 + "type": "consulting", 3113 + "url": "https://feross.org/support" 3114 + } 3115 + ], 3116 + "license": "MIT" 3117 + }, 3118 + "node_modules/simple-get": { 3119 + "version": "4.0.1", 3120 + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", 3121 + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", 3122 + "funding": [ 3123 + { 3124 + "type": "github", 3125 + "url": "https://github.com/sponsors/feross" 3126 + }, 3127 + { 3128 + "type": "patreon", 3129 + "url": "https://www.patreon.com/feross" 3130 + }, 3131 + { 3132 + "type": "consulting", 3133 + "url": "https://feross.org/support" 3134 + } 3135 + ], 3136 + "license": "MIT", 3137 + "dependencies": { 3138 + "decompress-response": "^6.0.0", 3139 + "once": "^1.3.1", 3140 + "simple-concat": "^1.0.0" 3141 + } 3142 + }, 2673 3143 "node_modules/simple-update-notifier": { 2674 3144 "version": "2.0.0", 2675 3145 "resolved": "https://registry.npmjs.org/simple-update-notifier/-/simple-update-notifier-2.0.0.tgz", ··· 2780 3250 "url": "https://github.com/sponsors/ljharb" 2781 3251 } 2782 3252 }, 3253 + "node_modules/strip-json-comments": { 3254 + "version": "2.0.1", 3255 + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", 3256 + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", 3257 + "license": "MIT", 3258 + "engines": { 3259 + "node": ">=0.10.0" 3260 + } 3261 + }, 2783 3262 "node_modules/supports-color": { 2784 3263 "version": "5.5.0", 2785 3264 "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", ··· 2792 3271 "node": ">=4" 2793 3272 } 2794 3273 }, 3274 + "node_modules/tar-fs": { 3275 + "version": "2.1.4", 3276 + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", 3277 + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", 3278 + "license": "MIT", 3279 + "dependencies": { 3280 + "chownr": "^1.1.1", 3281 + "mkdirp-classic": "^0.5.2", 3282 + "pump": "^3.0.0", 3283 + "tar-stream": "^2.1.4" 3284 + } 3285 + }, 3286 + "node_modules/tar-fs/node_modules/pump": { 3287 + "version": "3.0.3", 3288 + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz", 3289 + "integrity": "sha512-todwxLMY7/heScKmntwQG8CXVkWUOdYxIvY2s0VWAAMh/nd8SoYiRaKjlr7+iCs984f2P8zvrfWcDDYVb73NfA==", 3290 + "license": "MIT", 3291 + "dependencies": { 3292 + "end-of-stream": "^1.1.0", 3293 + "once": "^1.3.1" 3294 + } 3295 + }, 3296 + "node_modules/tar-stream": { 3297 + "version": "2.2.0", 3298 + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", 3299 + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", 3300 + "license": "MIT", 3301 + "dependencies": { 3302 + "bl": "^4.0.3", 3303 + "end-of-stream": "^1.4.1", 3304 + "fs-constants": "^1.0.0", 3305 + "inherits": "^2.0.3", 3306 + "readable-stream": "^3.1.1" 3307 + }, 3308 + "engines": { 3309 + "node": ">=6" 3310 + } 3311 + }, 3312 + "node_modules/tar-stream/node_modules/bl": { 3313 + "version": "4.1.0", 3314 + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", 3315 + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", 3316 + "license": "MIT", 3317 + "dependencies": { 3318 + "buffer": "^5.5.0", 3319 + "inherits": "^2.0.4", 3320 + "readable-stream": "^3.4.0" 3321 + } 3322 + }, 3323 + "node_modules/tar-stream/node_modules/readable-stream": { 3324 + "version": "3.6.2", 3325 + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", 3326 + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", 3327 + "license": "MIT", 3328 + "dependencies": { 3329 + "inherits": "^2.0.3", 3330 + "string_decoder": "^1.1.1", 3331 + "util-deprecate": "^1.0.1" 3332 + }, 3333 + "engines": { 3334 + "node": ">= 6" 3335 + } 3336 + }, 2795 3337 "node_modules/tlds": { 2796 3338 "version": "1.256.0", 2797 3339 "resolved": "https://registry.npmjs.org/tlds/-/tlds-1.256.0.tgz", ··· 2799 3341 "bin": { 2800 3342 "tlds": "bin.js" 2801 3343 } 3344 + }, 3345 + "node_modules/to-data-view": { 3346 + "version": "1.1.0", 3347 + "resolved": "https://registry.npmjs.org/to-data-view/-/to-data-view-1.1.0.tgz", 3348 + "integrity": "sha512-1eAdufMg6mwgmlojAx3QeMnzB/BTVp7Tbndi3U7ftcT2zCZadjxkkmLmd97zmaxWi+sgGcgWrokmpEoy0Dn0vQ==", 3349 + "license": "MIT" 2802 3350 }, 2803 3351 "node_modules/to-regex-range": { 2804 3352 "version": "5.0.1",
+2
package.json
··· 27 27 "@atproto/api": "^0.14.21", 28 28 "@ffmpeg-installer/ffmpeg": "^1.1.0", 29 29 "axios": "^1.4.0", 30 + "better-sqlite3": "^12.6.2", 30 31 "cheerio": "^1.0.0-rc.12", 31 32 "crypto-js": "^4.2.0", 32 33 "dotenv": "^16.0.3", ··· 34 35 "fluent-ffmpeg": "^2.1.3", 35 36 "form-data": "^4.0.2", 36 37 "fs-extra": "^11.1.1", 38 + "imghash": "^1.1.2", 37 39 "node-cron": "^3.0.2", 38 40 "node-telegram-bot-api": "^0.61.0" 39 41 },
+2 -2
queue/alert-state.json
··· 2 2 "lowQueueAlertSent": true, 3 3 "emptyQueueAlertSent": false, 4 4 "lastLowQueueAlertTime": 1751412078653, 5 - "lastEmptyQueueAlertTime": 1749155676860, 6 - "lastSaved": 1751412115858 5 + "lastEmptyQueueAlertTime": 1768772195476, 6 + "lastSaved": 1768775980826 7 7 }
+49 -3
queue/queueManager.js
··· 3 3 const cron = require('node-cron'); 4 4 const config = require('../config'); 5 5 const crypto = require('crypto'); 6 + const ImageHashManager = require('../utils/imageHashManager'); 6 7 7 8 class QueueManager { 8 9 constructor() { ··· 20 21 // Add Discord to services if enabled 21 22 if (config.discord?.enabled) { 22 23 this.postServices.push('discord'); 24 + } 25 + 26 + // Initialize image hash manager for duplicate detection 27 + try { 28 + this.imageHashManager = new ImageHashManager(); 29 + console.log('[QueueManager] ImageHashManager initialized for duplicate detection'); 30 + } catch (error) { 31 + console.error('[QueueManager] Failed to initialize ImageHashManager:', error); 32 + this.imageHashManager = null; 23 33 } 24 34 25 35 this.initialize(); ··· 165 175 return this.queueData.queue || []; 166 176 } 167 177 168 - async addToQueue(imageData) { 178 + async addToQueue(imageData, addedBy = null) { 169 179 try { 170 180 // Initialize postedTo tracking for all services 171 181 const postedTo = {}; ··· 188 198 189 199 console.log(`Adding item to queue with sourceImgUrl: ${sourceImgUrl || 'none'}`); 190 200 201 + // Check for duplicates using perceptual hash 202 + if (this.imageHashManager && imageData.imageUrl) { 203 + try { 204 + // Extract filename from path to get perceptual hash 205 + const filename = path.basename(imageData.imageUrl, path.extname(imageData.imageUrl)); 206 + 207 + // Check if this hash already exists in the queue 208 + const duplicate = this.queueData.queue.find(item => { 209 + if (item.imageUrl) { 210 + const itemFilename = path.basename(item.imageUrl, path.extname(item.imageUrl)); 211 + return itemFilename === filename; 212 + } 213 + return false; 214 + }); 215 + 216 + if (duplicate) { 217 + console.log(`[QueueManager] Duplicate detected! Image with hash ${filename} already in queue`); 218 + console.log(`[QueueManager] Existing item added by: ${duplicate.addedBy || 'unknown'}`); 219 + console.log(`[QueueManager] Existing item source: ${duplicate.sourceImgUrl || 'unknown'}`); 220 + return { 221 + success: false, 222 + duplicate: true, 223 + existingItem: { 224 + addedBy: duplicate.addedBy, 225 + sourceUrl: duplicate.sourceImgUrl, 226 + timestamp: duplicate.timestamp 227 + } 228 + }; 229 + } 230 + } catch (error) { 231 + console.error('[QueueManager] Error checking for duplicates:', error); 232 + // Continue with adding if duplicate check fails 233 + } 234 + } 235 + 191 236 this.queueData.queue.push({ 192 237 ...imageData, 193 238 timestamp: new Date().toISOString(), 194 239 id: `${Date.now()}-${Math.random().toString(36).substring(2, 9)}`, 195 240 postedTo, 196 - sourceImgUrl // Add the new field 241 + sourceImgUrl, // Add the new field 242 + addedBy // Add the username who added this item 197 243 }); 198 244 199 245 await this.saveQueueToDisk(); 200 - return true; 246 + return { success: true }; 201 247 } catch (error) { 202 248 console.error('Error adding to queue:', error); 203 249 return false;
+81 -20
scrapers/blueskyScraper.js
··· 150 150 * Process an image from Bluesky using its CID 151 151 * @param {string} did - The user's DID 152 152 * @param {string} cid - The content ID of the image 153 + * @param {string} postUrl - The original post URL for hash database 153 154 * @returns {Promise<string>} - The local path to the cached image 154 155 */ 155 - async processImageByCid(did, cid) { 156 + async processImageByCid(did, cid, postUrl = null) { 156 157 console.log(`Processing image with CID: ${cid}`); 157 158 158 159 // Direct blob URL is most reliable ··· 186 187 // Convert binary data to base64 for hashing 187 188 const base64Data = Buffer.from(response.data).toString('base64'); 188 189 189 - // Generate MD5 hash of the image data 190 - const hash = crypto.MD5(base64Data).toString(); 190 + // Generate MD5 hash for temporary filename 191 + const tempHash = crypto.MD5(base64Data + Date.now()).toString(); 191 192 192 - // Create filename from the hash 193 - const filename = `${hash}${fileExt}`; 194 - const filePath = path.join(mediaCache.imageDir, filename); 193 + // Create temporary filename 194 + const tempFilename = `temp_${tempHash}${fileExt}`; 195 + const tempFilePath = path.join(mediaCache.imageDir, tempFilename); 196 + 197 + console.log(`Saving image to temporary file ${tempFilename}`); 195 198 196 - console.log(`Saving image as ${filename}`); 199 + // Save the image to disk temporarily 200 + await require('fs-extra').writeFile(tempFilePath, response.data); 197 201 198 - // Save the image to disk 199 - await require('fs-extra').writeFile(filePath, response.data); 202 + // Hash the image if hash manager is available and rename to perceptual hash 203 + let finalFilePath = tempFilePath; 204 + if (mediaCache.imageHashManager && postUrl) { 205 + try { 206 + console.log('[ImageHash] Processing Bluesky image with perceptual hash'); 207 + 208 + // Calculate perceptual hash 209 + const perceptualHash = await mediaCache.imageHashManager.calculateHash(tempFilePath); 210 + console.log(`[ImageHash] Calculated perceptual hash: ${perceptualHash}`); 211 + 212 + // Create filename from perceptual hash 213 + const hashFilename = `${perceptualHash}${fileExt}`; 214 + finalFilePath = path.join(mediaCache.imageDir, hashFilename); 215 + 216 + // Check if file with this hash already exists 217 + const fs = require('fs-extra'); 218 + if (await fs.pathExists(finalFilePath)) { 219 + console.log(`[ImageHash] File with same perceptual hash already exists: ${hashFilename}`); 220 + // Delete temp file 221 + await fs.unlink(tempFilePath); 222 + 223 + // Add alternate URL if this is a different source 224 + const existing = mediaCache.imageHashManager.getHashByHash(perceptualHash); 225 + if (existing && existing.url !== postUrl) { 226 + console.log(`[ImageHash] Adding alternate URL for existing hash`); 227 + mediaCache.imageHashManager.addAlternateUrl(perceptualHash, postUrl); 228 + } 229 + } else { 230 + // Rename temp file to final hash-based name 231 + await fs.rename(tempFilePath, finalFilePath); 232 + console.log(`[ImageHash] Renamed to: ${hashFilename}`); 233 + } 234 + 235 + // Store hash in database 236 + await mediaCache.imageHashManager.storeHash(postUrl, perceptualHash, finalFilePath, { 237 + contentType, 238 + cid: cid, 239 + did: did, 240 + downloadedAt: new Date().toISOString() 241 + }); 242 + console.log('[ImageHash] Successfully stored hash in database'); 243 + } catch (hashError) { 244 + console.error('[ImageHash] Failed to hash Bluesky image:', hashError); 245 + // If hashing fails, use temp file as final file with MD5 name 246 + const fallbackHash = crypto.MD5(base64Data).toString(); 247 + const fallbackFilename = `${fallbackHash}${fileExt}`; 248 + finalFilePath = path.join(mediaCache.imageDir, fallbackFilename); 249 + await require('fs-extra').rename(tempFilePath, finalFilePath); 250 + } 251 + } else { 252 + // No hash manager, use MD5 hash as before 253 + const fallbackHash = crypto.MD5(base64Data).toString(); 254 + const fallbackFilename = `${fallbackHash}${fileExt}`; 255 + finalFilePath = path.join(mediaCache.imageDir, fallbackFilename); 256 + await require('fs-extra').rename(tempFilePath, finalFilePath); 257 + } 200 258 201 - return filePath; 259 + return finalFilePath; 202 260 } catch (error) { 203 261 console.error(`Error downloading blob for CID ${cid}: ${error.message}`); 204 262 205 263 // Fall back to mediaCache if direct download fails 206 264 // Process and cache the image using mediaCache method 207 265 try { 208 - const processed = await mediaCache.processMediaUrl(`https://cdn.bsky.app/img/feed/plain/${did}/${cid}@jpeg`); 266 + const cdnUrl = `https://cdn.bsky.app/img/feed/plain/${did}/${cid}@jpeg`; 267 + const processed = await mediaCache.processMediaUrl(cdnUrl, false, cdnUrl); 209 268 return processed.localPath; 210 269 } catch (fallbackError) { 211 270 console.error(`Fallback download also failed for CID ${cid}: ${fallbackError.message}`); ··· 218 277 * Process all images from a Bluesky embed 219 278 * @param {object} images - The images array from the embed 220 279 * @param {string} did - The user's DID 280 + * @param {string} postUrl - The original post URL for hash database 221 281 * @returns {Promise<Array<string>>} - Array of local paths to the cached images 222 282 */ 223 - async processAllImages(images, did) { 283 + async processAllImages(images, did, postUrl = null) { 224 284 if (!images || !Array.isArray(images) || images.length === 0) { 225 285 throw new Error('No images found in the content'); 226 286 } ··· 236 296 237 297 const cid = image.image.ref.$link; 238 298 try { 239 - const localPath = await this.processImageByCid(did, cid); 299 + const localPath = await this.processImageByCid(did, cid, postUrl); 240 300 imagePaths.push(localPath); 241 301 } catch (error) { 242 302 console.error(`Failed to process image with CID ${cid}: ${error.message}`); ··· 302 362 * Process images from a recordWithMedia embed (post with quoted content) 303 363 * @param {object} embed - The recordWithMedia embed object 304 364 * @param {string} did - The user's DID 365 + * @param {string} url - The original post URL for hash database 305 366 * @returns {Promise<Array<string>>} - Array of local paths to the cached images 306 367 */ 307 - async processRecordWithMedia(embed, did) { 368 + async processRecordWithMedia(embed, did, url = null) { 308 369 const imagePaths = []; 309 370 310 371 // Only process media in the main post, ignore quoted content 311 372 if (embed.media && embed.media.$type === 'app.bsky.embed.images') { 312 - const mediaImages = await this.processAllImages(embed.media.images, did); 373 + const mediaImages = await this.processAllImages(embed.media.images, did, url); 313 374 imagePaths.push(...mediaImages); 314 375 } 315 376 ··· 409 470 410 471 // 7. Handle image posts 411 472 if (embedType === 'app.bsky.embed.images') { 412 - const imagePaths = await this.processAllImages(record.value.embed.images, did); 473 + const imagePaths = await this.processAllImages(record.value.embed.images, did, url); 413 474 414 475 // Get source image URLs for each image 415 476 const sourceImageUrls = []; ··· 438 499 439 500 // 8. Handle posts with quoted content and media 440 501 else if (embedType === 'app.bsky.embed.recordWithMedia') { 441 - const imagePaths = await this.processRecordWithMedia(record.value.embed, did); 502 + const imagePaths = await this.processRecordWithMedia(record.value.embed, did, url); 442 503 443 504 // Get source image URLs from recordWithMedia 444 505 const sourceImageUrls = []; ··· 485 546 try { 486 547 // First, get the thumbnail image 487 548 const thumbUrl = `https://video.bsky.app/watch/${did}/${cid}/thumbnail.jpg`; 488 - const thumbnailProcessed = await mediaCache.processMediaUrl(thumbUrl); 549 + const thumbnailProcessed = await mediaCache.processMediaUrl(thumbUrl, false, thumbUrl); 489 550 490 551 // Direct blob access is most reliable for video 491 552 const videoBlobUrl = `${this.serviceEndpoint}/xrpc/com.atproto.sync.getBlob?did=${did}&cid=${cid}`; ··· 494 555 495 556 try { 496 557 console.log(`Downloading video from blob URL: ${videoBlobUrl}`); 497 - videoProcessed = await mediaCache.processMediaUrl(videoBlobUrl, true); 558 + videoProcessed = await mediaCache.processMediaUrl(videoBlobUrl, true, videoBlobUrl); 498 559 sourceVideoUrl = videoBlobUrl; 499 560 500 561 return { ··· 525 586 for (const videoUrl of videoUrls) { 526 587 try { 527 588 console.log(`Trying alternative video URL: ${videoUrl}`); 528 - videoProcessed = await mediaCache.processMediaUrl(videoUrl, true); 589 + videoProcessed = await mediaCache.processMediaUrl(videoUrl, true, videoUrl); 529 590 console.log(`Successfully downloaded video from: ${videoUrl}`); 530 591 sourceVideoUrl = videoUrl; 531 592 break;
+6 -6
scrapers/e621Scraper.js
··· 29 29 const ogImage = $('meta[property="og:image"]').attr('content'); 30 30 const ogUrl = $('meta[property="og:url"]').attr('content') || url; 31 31 32 - // Find potential video sources 32 + // Find potential video sources - only from actual video elements 33 33 let videoUrl = $('video source').attr('src') || 34 - $('video').attr('src') || 35 - $('a:contains("original")').attr('href'); 34 + $('video').attr('src'); 36 35 37 36 // Use OpenGraph image if direct media URL not found 38 37 if (!mediaUrl && ogImage) { ··· 40 39 } 41 40 42 41 // Determine if we're dealing with a video post 43 - const isVideo = videoUrl || this.isVideoUrl(mediaUrl); 42 + // Only consider it a video if we found an actual video element OR the URL has video extension 43 + const isVideo = (videoUrl && this.isVideoUrl(videoUrl)) || this.isVideoUrl(mediaUrl); 44 44 45 45 // If it's a video, use the direct video URL 46 46 if (isVideo && videoUrl) { ··· 63 63 // Store the original source URL before processing 64 64 const sourceImageUrl = mediaUrl; 65 65 66 - // Process and cache the media 67 - const processed = await mediaCache.processMediaUrl(mediaUrl, isVideo); 66 + // Process and cache the media, passing the original post URL 67 + const processed = await mediaCache.processMediaUrl(mediaUrl, isVideo, url); 68 68 69 69 // Return appropriate data structure based on media type with generic title 70 70 if (processed.isVideo) {
+2 -2
scrapers/furAffinityScraper.js
··· 43 43 // Process and cache the media locally 44 44 let mediaData; 45 45 if (isVideo) { 46 - mediaData = await mediaCache.processMediaUrl(sourceImageUrl, true); 46 + mediaData = await mediaCache.processMediaUrl(sourceImageUrl, true, url); 47 47 return { 48 48 imageUrl: mediaData.localPath, 49 49 videoUrl: mediaData.localPath, ··· 56 56 sourceImgUrl: sourceImageUrl // Add the new sourceImgUrl field 57 57 }; 58 58 } else { 59 - mediaData = await mediaCache.processMediaUrl(sourceImageUrl); 59 + mediaData = await mediaCache.processMediaUrl(sourceImageUrl, false, url); 60 60 return { 61 61 imageUrl: mediaData.localPath, 62 62 isVideo: false,
+1 -1
scrapers/soFurryScraper.js
··· 45 45 `SoFurry submission by ${response.data.author}`; 46 46 47 47 // Process and cache the media 48 - const processed = await mediaCache.processMediaUrl(sourceImageUrl, isVideo); 48 + const processed = await mediaCache.processMediaUrl(sourceImageUrl, isVideo, url); 49 49 50 50 // Return the data in the format expected by baseScraper 51 51 if (isVideo) {
+1 -1
scrapers/weasylScraper.js
··· 60 60 `Weasyl submission by ${submissionData.owner}`; 61 61 62 62 // Process and cache the media 63 - const processed = await mediaCache.processMediaUrl(sourceImageUrl, isVideo); 63 + const processed = await mediaCache.processMediaUrl(sourceImageUrl, isVideo, url); 64 64 65 65 // Return the data in the format expected by baseScraper 66 66 if (isVideo) {
+116
test-image-hash.js
··· 1 + #!/usr/bin/env node 2 + 3 + /** 4 + * Test script for Image Hash Manager 5 + * Verifies that the module is properly set up and functional 6 + */ 7 + 8 + const ImageHashManager = require('./utils/imageHashManager'); 9 + const fs = require('fs-extra'); 10 + const path = require('path'); 11 + 12 + console.log('=== Image Hash Manager Test ===\n'); 13 + 14 + let exitCode = 0; 15 + 16 + async function runTests() { 17 + let hashManager; 18 + 19 + try { 20 + // Test 1: Module initialization 21 + console.log('✓ Test 1: Module can be imported'); 22 + 23 + // Test 2: Initialize hash manager 24 + hashManager = new ImageHashManager(); 25 + console.log('✓ Test 2: ImageHashManager can be initialized'); 26 + 27 + // Test 3: Database connection 28 + const stats = hashManager.getStats(); 29 + console.log('✓ Test 3: Database connection works'); 30 + console.log(` - Database path: ${stats.databasePath}`); 31 + console.log(` - Total images: ${stats.totalImages}`); 32 + 33 + // Test 4: Check if database file exists 34 + const dbExists = await fs.pathExists(stats.databasePath); 35 + if (dbExists) { 36 + console.log('✓ Test 4: Database file exists'); 37 + } else { 38 + console.log('⚠ Test 4: Database file created successfully'); 39 + } 40 + 41 + // Test 5: Query operations 42 + const allHashes = hashManager.getAllHashes(1); 43 + console.log('✓ Test 5: Can query database'); 44 + console.log(` - Records in database: ${stats.totalImages}`); 45 + 46 + // Test 6: Check cache directory 47 + const cacheDir = path.join(__dirname, 'cache', 'images'); 48 + const cacheDirExists = await fs.pathExists(cacheDir); 49 + if (cacheDirExists) { 50 + const files = await fs.readdir(cacheDir); 51 + const imageFiles = files.filter(f => { 52 + const ext = path.extname(f).toLowerCase(); 53 + return ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp'].includes(ext); 54 + }); 55 + console.log('✓ Test 6: Cache directory accessible'); 56 + console.log(` - Images in cache: ${imageFiles.length}`); 57 + 58 + if (imageFiles.length > 0 && stats.totalImages === 0) { 59 + console.log('\n💡 Tip: Run "node image-hash-tool.js scan-cache" to hash existing images'); 60 + } 61 + } else { 62 + console.log('⚠ Test 6: Cache directory does not exist yet (will be created when needed)'); 63 + } 64 + 65 + // Test 7: Hamming distance calculation 66 + const distance = hashManager.hammingDistance('abc123', 'abc123'); 67 + if (distance === 0) { 68 + console.log('✓ Test 7: Hamming distance calculation works'); 69 + } else { 70 + throw new Error('Hamming distance calculation failed'); 71 + } 72 + 73 + // Test 8: MediaCache integration 74 + try { 75 + const mediaCache = require('./utils/mediaCache'); 76 + const hasHashManager = mediaCache.imageHashManager !== null; 77 + if (hasHashManager) { 78 + console.log('✓ Test 8: MediaCache integration successful'); 79 + } else { 80 + console.log('⚠ Test 8: MediaCache initialized but ImageHashManager is null'); 81 + console.log(' This might be due to missing dependencies'); 82 + } 83 + } catch (error) { 84 + console.log('✗ Test 8: MediaCache integration failed:', error.message); 85 + exitCode = 1; 86 + } 87 + 88 + console.log('\n=== All Tests Passed ==='); 89 + console.log('\nThe Image Hash Manager is ready to use!'); 90 + console.log('\nNext steps:'); 91 + console.log(' 1. Start your bot - images will be automatically hashed'); 92 + console.log(' 2. Run: node image-hash-tool.js scan-cache'); 93 + console.log(' 3. Run: node image-hash-tool.js stats'); 94 + console.log(' 4. See docs/image-hashing.md for full documentation'); 95 + 96 + } catch (error) { 97 + console.error('\n✗ Test failed:', error.message); 98 + console.error('\nError details:', error); 99 + exitCode = 1; 100 + 101 + console.log('\n🔧 Troubleshooting:'); 102 + console.log(' 1. Make sure dependencies are installed:'); 103 + console.log(' npm install imghash better-sqlite3'); 104 + console.log(' 2. Check file permissions for data/ directory'); 105 + console.log(' 3. See docs/image-hashing.md for more help'); 106 + } finally { 107 + if (hashManager) { 108 + hashManager.close(); 109 + console.log('\n✓ Database connection closed'); 110 + } 111 + } 112 + } 113 + 114 + runTests().then(() => { 115 + process.exit(exitCode); 116 + });
+412
utils/imageHashManager.js
··· 1 + const fs = require('fs-extra'); 2 + const path = require('path'); 3 + const Database = require('better-sqlite3'); 4 + const imghash = require('imghash'); 5 + const config = require('../config'); 6 + 7 + /** 8 + * ImageHashManager - Manages perceptual hashing of images and stores them in a database 9 + * This helps detect duplicate or similar images across different URLs 10 + */ 11 + class ImageHashManager { 12 + constructor() { 13 + // Database path 14 + const dbDir = path.join(__dirname, '..', 'data'); 15 + this.dbPath = path.join(dbDir, 'image_hashes.db'); 16 + 17 + // Initialize database 18 + this.initDatabase(); 19 + } 20 + 21 + /** 22 + * Initialize the SQLite database and create tables if they don't exist 23 + */ 24 + initDatabase() { 25 + try { 26 + // Ensure data directory exists (synchronously) 27 + fs.ensureDirSync(path.dirname(this.dbPath)); 28 + 29 + // Open database connection 30 + this.db = new Database(this.dbPath); 31 + 32 + // Create tables 33 + this.db.exec(` 34 + CREATE TABLE IF NOT EXISTS image_hashes ( 35 + id INTEGER PRIMARY KEY AUTOINCREMENT, 36 + url TEXT NOT NULL, 37 + perceptual_hash TEXT NOT NULL, 38 + file_path TEXT, 39 + cached_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 40 + metadata TEXT, 41 + alternate_urls TEXT, 42 + UNIQUE(url) 43 + ); 44 + 45 + CREATE INDEX IF NOT EXISTS idx_perceptual_hash ON image_hashes(perceptual_hash); 46 + CREATE INDEX IF NOT EXISTS idx_url ON image_hashes(url); 47 + `); 48 + 49 + console.log('Image hash database initialized'); 50 + } catch (error) { 51 + console.error('Error initializing image hash database:', error); 52 + throw error; 53 + } 54 + } 55 + 56 + /** 57 + * Calculate perceptual hash for an image file 58 + * @param {string} filePath - Path to the image file 59 + * @returns {Promise<string>} - Perceptual hash string 60 + */ 61 + async calculateHash(filePath) { 62 + try { 63 + // Use imghash to generate a perceptual hash 64 + // This creates a hash that is similar for similar-looking images 65 + const hash = await imghash.hash(filePath); 66 + return hash; 67 + } catch (error) { 68 + console.error(`Error calculating hash for ${filePath}:`, error); 69 + throw error; 70 + } 71 + } 72 + 73 + /** 74 + * Store image hash in the database 75 + * @param {string} url - Original URL of the image 76 + * @param {string} hash - Perceptual hash of the image 77 + * @param {string} filePath - Path to cached file 78 + * @param {Object} metadata - Optional metadata to store 79 + * @returns {boolean} - Whether the operation was successful 80 + */ 81 + storeHash(url, hash, filePath, metadata = null) { 82 + try { 83 + const stmt = this.db.prepare(` 84 + INSERT INTO image_hashes (url, perceptual_hash, file_path, metadata) 85 + VALUES (?, ?, ?, ?) 86 + ON CONFLICT(url) DO UPDATE SET 87 + perceptual_hash = excluded.perceptual_hash, 88 + file_path = excluded.file_path, 89 + cached_at = CURRENT_TIMESTAMP, 90 + metadata = excluded.metadata 91 + `); 92 + 93 + const metadataJson = metadata ? JSON.stringify(metadata) : null; 94 + stmt.run(url, hash, filePath, metadataJson); 95 + 96 + console.log(`Stored hash for URL: ${url.substring(0, 50)}...`); 97 + return true; 98 + } catch (error) { 99 + console.error('Error storing image hash:', error); 100 + return false; 101 + } 102 + } 103 + 104 + /** 105 + * Process an image file and store its hash 106 + * @param {string} url - Original URL of the image 107 + * @param {string} filePath - Path to the cached image file 108 + * @param {Object} metadata - Optional metadata to store 109 + * @returns {Promise<{hash: string, stored: boolean, isDuplicate: boolean}>} 110 + */ 111 + async processImage(url, filePath, metadata = null) { 112 + try { 113 + // Check if file exists 114 + if (!await fs.pathExists(filePath)) { 115 + throw new Error(`File not found: ${filePath}`); 116 + } 117 + 118 + // Calculate perceptual hash 119 + const hash = await this.calculateHash(filePath); 120 + 121 + // Check if this URL already exists 122 + const existingByUrl = this.getHashByUrl(url); 123 + if (existingByUrl) { 124 + console.log(`URL already in database: ${url.substring(0, 50)}...`); 125 + return { hash, stored: true, isDuplicate: false }; 126 + } 127 + 128 + // Check if this hash already exists with a different URL 129 + const existingByHash = this.getHashByHash(hash); 130 + if (existingByHash) { 131 + // Same image, different URL - add as alternate URL 132 + console.log(`Found duplicate image with hash ${hash}, adding alternate URL`); 133 + const added = this.addAlternateUrl(existingByHash.url, url); 134 + return { hash, stored: added, isDuplicate: true, primaryUrl: existingByHash.url }; 135 + } 136 + 137 + // New image - store in database 138 + const stored = this.storeHash(url, hash, filePath, metadata); 139 + 140 + return { hash, stored, isDuplicate: false }; 141 + } catch (error) { 142 + console.error(`Error processing image ${url}:`, error); 143 + throw error; 144 + } 145 + } 146 + 147 + /** 148 + * Get hash for a specific URL 149 + * @param {string} url - URL to look up 150 + * @returns {Object|null} - Hash data or null if not found 151 + */ 152 + getHashByUrl(url) { 153 + try { 154 + const stmt = this.db.prepare(` 155 + SELECT * FROM image_hashes WHERE url = ? 156 + `); 157 + 158 + const result = stmt.get(url); 159 + 160 + if (result) { 161 + if (result.metadata) { 162 + result.metadata = JSON.parse(result.metadata); 163 + } 164 + if (result.alternate_urls) { 165 + result.alternate_urls = JSON.parse(result.alternate_urls); 166 + } 167 + } 168 + 169 + return result; 170 + } catch (error) { 171 + console.error('Error getting hash by URL:', error); 172 + return null; 173 + } 174 + } 175 + 176 + /** 177 + * Get hash record by perceptual hash value 178 + * @param {string} hash - Perceptual hash to look up 179 + * @returns {Object|null} - Hash data or null if not found 180 + */ 181 + getHashByHash(hash) { 182 + try { 183 + const stmt = this.db.prepare(` 184 + SELECT * FROM image_hashes WHERE perceptual_hash = ? 185 + `); 186 + 187 + const result = stmt.get(hash); 188 + 189 + if (result) { 190 + if (result.metadata) { 191 + result.metadata = JSON.parse(result.metadata); 192 + } 193 + if (result.alternate_urls) { 194 + result.alternate_urls = JSON.parse(result.alternate_urls); 195 + } 196 + } 197 + 198 + return result; 199 + } catch (error) { 200 + console.error('Error getting hash by hash value:', error); 201 + return null; 202 + } 203 + } 204 + 205 + /** 206 + * Add an alternate URL to an existing hash record 207 + * @param {string} primaryUrl - The primary URL of the existing record 208 + * @param {string} newUrl - The new URL to add as an alternate 209 + * @returns {boolean} - Whether the operation was successful 210 + */ 211 + addAlternateUrl(primaryUrl, newUrl) { 212 + try { 213 + // Get existing record 214 + const existing = this.getHashByUrl(primaryUrl); 215 + if (!existing) { 216 + console.error(`Primary URL not found: ${primaryUrl}`); 217 + return false; 218 + } 219 + 220 + // Parse existing alternate URLs or create new array 221 + let alternateUrls = existing.alternate_urls || []; 222 + 223 + // Check if this URL is already in alternates or is the primary 224 + if (newUrl === primaryUrl || alternateUrls.includes(newUrl)) { 225 + console.log(`URL already tracked: ${newUrl.substring(0, 50)}...`); 226 + return true; 227 + } 228 + 229 + // Add new URL to alternates 230 + alternateUrls.push(newUrl); 231 + 232 + // Update database 233 + const stmt = this.db.prepare(` 234 + UPDATE image_hashes 235 + SET alternate_urls = ? 236 + WHERE url = ? 237 + `); 238 + 239 + stmt.run(JSON.stringify(alternateUrls), primaryUrl); 240 + 241 + console.log(`Added alternate URL to ${primaryUrl.substring(0, 50)}...`); 242 + console.log(` -> ${newUrl.substring(0, 50)}...`); 243 + return true; 244 + } catch (error) { 245 + console.error('Error adding alternate URL:', error); 246 + return false; 247 + } 248 + } 249 + 250 + /** 251 + * Find similar images by comparing perceptual hashes 252 + * @param {string} hash - Perceptual hash to compare 253 + * @param {number} maxDistance - Maximum hamming distance for similarity (default: 5) 254 + * @returns {Array} - Array of similar images 255 + */ 256 + findSimilarImages(hash, maxDistance = 5) { 257 + try { 258 + const stmt = this.db.prepare(` 259 + SELECT * FROM image_hashes 260 + `); 261 + 262 + const allHashes = stmt.all(); 263 + const similar = []; 264 + 265 + // Calculate hamming distance for each hash 266 + for (const record of allHashes) { 267 + const distance = this.hammingDistance(hash, record.perceptual_hash); 268 + if (distance <= maxDistance) { 269 + similar.push({ 270 + ...record, 271 + distance, 272 + metadata: record.metadata ? JSON.parse(record.metadata) : null 273 + }); 274 + } 275 + } 276 + 277 + // Sort by distance (most similar first) 278 + similar.sort((a, b) => a.distance - b.distance); 279 + 280 + return similar; 281 + } catch (error) { 282 + console.error('Error finding similar images:', error); 283 + return []; 284 + } 285 + } 286 + 287 + /** 288 + * Calculate hamming distance between two hash strings 289 + * @param {string} hash1 - First hash 290 + * @param {string} hash2 - Second hash 291 + * @returns {number} - Hamming distance 292 + */ 293 + hammingDistance(hash1, hash2) { 294 + if (!hash1 || !hash2 || hash1.length !== hash2.length) { 295 + return Infinity; 296 + } 297 + 298 + let distance = 0; 299 + for (let i = 0; i < hash1.length; i++) { 300 + if (hash1[i] !== hash2[i]) { 301 + distance++; 302 + } 303 + } 304 + 305 + return distance; 306 + } 307 + 308 + /** 309 + * Get all stored hashes 310 + * @param {number} limit - Maximum number of results (optional) 311 + * @returns {Array} - Array of hash records 312 + */ 313 + getAllHashes(limit = null) { 314 + try { 315 + let query = 'SELECT * FROM image_hashes ORDER BY cached_at DESC'; 316 + if (limit) { 317 + query += ` LIMIT ${limit}`; 318 + } 319 + 320 + const stmt = this.db.prepare(query); 321 + const results = stmt.all(); 322 + 323 + return results.map(record => ({ 324 + ...record, 325 + metadata: record.metadata ? JSON.parse(record.metadata) : null, 326 + alternate_urls: record.alternate_urls ? JSON.parse(record.alternate_urls) : [] 327 + })); 328 + } catch (error) { 329 + console.error('Error getting all hashes:', error); 330 + return []; 331 + } 332 + } 333 + 334 + /** 335 + * Delete a hash record by URL 336 + * @param {string} url - URL to delete 337 + * @returns {boolean} - Whether deletion was successful 338 + */ 339 + deleteHashByUrl(url) { 340 + try { 341 + const stmt = this.db.prepare('DELETE FROM image_hashes WHERE url = ?'); 342 + const result = stmt.run(url); 343 + return result.changes > 0; 344 + } catch (error) { 345 + console.error('Error deleting hash:', error); 346 + return false; 347 + } 348 + } 349 + 350 + /** 351 + * Clean up hash records for files that no longer exist 352 + * @returns {Promise<number>} - Number of records cleaned up 353 + */ 354 + async cleanupOrphanedHashes() { 355 + try { 356 + const allHashes = this.getAllHashes(); 357 + let cleaned = 0; 358 + 359 + for (const record of allHashes) { 360 + if (record.file_path && !await fs.pathExists(record.file_path)) { 361 + if (this.deleteHashByUrl(record.url)) { 362 + cleaned++; 363 + } 364 + } 365 + } 366 + 367 + console.log(`Cleaned up ${cleaned} orphaned hash records`); 368 + return cleaned; 369 + } catch (error) { 370 + console.error('Error cleaning up orphaned hashes:', error); 371 + return 0; 372 + } 373 + } 374 + 375 + /** 376 + * Get statistics about the hash database 377 + * @returns {Object} - Database statistics 378 + */ 379 + getStats() { 380 + try { 381 + const totalStmt = this.db.prepare('SELECT COUNT(*) as count FROM image_hashes'); 382 + const total = totalStmt.get().count; 383 + 384 + const recentStmt = this.db.prepare(` 385 + SELECT COUNT(*) as count FROM image_hashes 386 + WHERE cached_at > datetime('now', '-7 days') 387 + `); 388 + const recentWeek = recentStmt.get().count; 389 + 390 + return { 391 + totalImages: total, 392 + imagesLastWeek: recentWeek, 393 + databasePath: this.dbPath 394 + }; 395 + } catch (error) { 396 + console.error('Error getting stats:', error); 397 + return null; 398 + } 399 + } 400 + 401 + /** 402 + * Close the database connection 403 + */ 404 + close() { 405 + if (this.db) { 406 + this.db.close(); 407 + console.log('Image hash database connection closed'); 408 + } 409 + } 410 + } 411 + 412 + module.exports = ImageHashManager;
+138 -19
utils/mediaCache.js
··· 5 5 const ffmpeg = require('fluent-ffmpeg'); 6 6 const ffmpegPath = require('@ffmpeg-installer/ffmpeg').path; 7 7 const config = require('../config'); 8 + const ImageHashManager = require('./imageHashManager'); 8 9 9 10 // Set ffmpeg path 10 11 ffmpeg.setFfmpegPath(ffmpegPath); ··· 19 20 20 21 // Maximum cache age in days (15 days by default) 21 22 this.maxCacheAgeDays = config.maxCacheAgeDays || 15; 23 + 24 + // Initialize image hash manager 25 + try { 26 + this.imageHashManager = new ImageHashManager(); 27 + } catch (error) { 28 + console.error('Failed to initialize ImageHashManager:', error); 29 + this.imageHashManager = null; 30 + } 22 31 23 32 // Initialize cache directories 24 33 this.initCacheDirs(); ··· 92 101 await cleanDir(this.imageDir); 93 102 await cleanDir(this.videoDir); 94 103 await cleanDir(this.transcodedDir); 104 + 105 + // Clean up orphaned hash records 106 + if (this.imageHashManager) { 107 + try { 108 + await this.imageHashManager.cleanupOrphanedHashes(); 109 + } catch (error) { 110 + console.error('Error cleaning orphaned hashes:', error); 111 + } 112 + } 95 113 96 114 console.log('Cache cleanup completed'); 97 115 } ··· 213 231 * Download media (image or video) and cache it 214 232 * @param {string} url - URL to download 215 233 * @param {boolean} isVideo - Whether URL is known to be a video 234 + * @param {string} sourceUrl - Original source URL (for hash database) 216 235 * @returns {Promise<{filePath: string, contentType: string, isVideo: boolean}>} - Path to cached file 217 236 */ 218 - async downloadMedia(url, isVideo = false) { 237 + async downloadMedia(url, isVideo = false, sourceUrl = null) { 219 238 try { 220 239 let contentType = null; 221 240 ··· 252 271 isVideo = this.isVideoUrl(url, contentType); 253 272 } 254 273 255 - // Generate unique filename 256 - const hash = this.getHashedFilename(url); 274 + // For videos, use URL hash as before 275 + // For images, we'll calculate perceptual hash after download 276 + const storageDir = isVideo ? this.videoDir : this.imageDir; 257 277 const ext = this.getFileExtension(url, contentType); 258 - const filename = `${hash}${ext}`; 259 278 260 - // Determine storage directory and full path 261 - const storageDir = isVideo ? this.videoDir : this.imageDir; 262 - const filePath = path.join(storageDir, filename); 279 + let finalFilePath; 263 280 264 - // Check if already cached and valid 265 - if (await this.isValidCacheFile(filePath)) { 266 - console.log(`Using cached ${isVideo ? 'video' : 'image'}: ${filename}`); 267 - return { filePath, contentType, isVideo }; 281 + if (isVideo) { 282 + // Videos use URL hash as before 283 + const hash = this.getHashedFilename(url); 284 + const filename = `${hash}${ext}`; 285 + finalFilePath = path.join(storageDir, filename); 286 + 287 + // Check if already cached and valid 288 + if (await this.isValidCacheFile(finalFilePath)) { 289 + console.log(`Using cached video: ${filename}`); 290 + return { filePath: finalFilePath, contentType, isVideo }; 291 + } 292 + } else { 293 + // For images, check if we already have this URL in the hash database 294 + if (this.imageHashManager && sourceUrl) { 295 + const existing = this.imageHashManager.getHashByUrl(sourceUrl); 296 + if (existing && existing.file_path && await this.isValidCacheFile(existing.file_path)) { 297 + console.log(`Using cached image from hash database: ${path.basename(existing.file_path)}`); 298 + return { filePath: existing.file_path, contentType, isVideo }; 299 + } 300 + } 268 301 } 269 302 270 - // Download the file 303 + // Download the file to a temporary location first 271 304 console.log(`Downloading ${isVideo ? 'video' : 'image'} from ${url}`); 272 305 const response = await axios({ 273 306 method: 'GET', ··· 286 319 contentType = response.headers['content-type']; 287 320 } 288 321 289 - // Save to cache 290 - const writer = fs.createWriteStream(filePath); 322 + // For videos, save directly with URL hash 323 + // For images, save to temp then rename with perceptual hash 324 + let tempFilePath = null; 325 + if (isVideo) { 326 + // Videos use URL hash as before 327 + const hash = this.getHashedFilename(url); 328 + const filename = `${hash}${ext}`; 329 + finalFilePath = path.join(storageDir, filename); 330 + } else { 331 + // Images: download to temp file first 332 + const tempHash = this.getHashedFilename(url + Date.now()); // Unique temp name 333 + tempFilePath = path.join(storageDir, `temp_${tempHash}${ext}`); 334 + } 335 + 336 + // Save to cache (temp location for images, final for videos) 337 + const downloadPath = isVideo ? finalFilePath : tempFilePath; 338 + const writer = fs.createWriteStream(downloadPath); 291 339 response.data.pipe(writer); 292 340 293 341 return new Promise((resolve, reject) => { 294 - writer.on('finish', () => { 295 - resolve({ filePath, contentType, isVideo }); 342 + writer.on('finish', async () => { 343 + try { 344 + // If it's an image, calculate perceptual hash and rename 345 + if (!isVideo && this.imageHashManager) { 346 + try { 347 + // Use sourceUrl if provided, otherwise fall back to download url 348 + const urlForHash = sourceUrl || url; 349 + console.log(`[ImageHash] Processing image: ${urlForHash.substring(0, 80)}...`); 350 + 351 + // Calculate perceptual hash 352 + const perceptualHash = await this.imageHashManager.calculateHash(tempFilePath); 353 + console.log(`[ImageHash] Calculated perceptual hash: ${perceptualHash}`); 354 + 355 + // Rename file to use perceptual hash 356 + const hashFilename = `${perceptualHash}${ext}`; 357 + finalFilePath = path.join(storageDir, hashFilename); 358 + 359 + // Check if file with this hash already exists 360 + if (await this.isValidCacheFile(finalFilePath)) { 361 + console.log(`[ImageHash] File with same perceptual hash already exists: ${hashFilename}`); 362 + // Delete temp file 363 + await fs.unlink(tempFilePath); 364 + 365 + // Add alternate URL if this is a different source 366 + const existing = this.imageHashManager.getHashByHash(perceptualHash); 367 + if (existing && existing.url !== urlForHash) { 368 + console.log(`[ImageHash] Adding alternate URL for existing hash`); 369 + this.imageHashManager.addAlternateUrl(perceptualHash, urlForHash); 370 + } 371 + } else { 372 + // Rename temp file to final hash-based name 373 + await fs.rename(tempFilePath, finalFilePath); 374 + console.log(`[ImageHash] Renamed to: ${hashFilename}`); 375 + } 376 + 377 + // Store hash in database 378 + await this.imageHashManager.storeHash(urlForHash, perceptualHash, finalFilePath, { 379 + contentType, 380 + downloadUrl: url, 381 + downloadedAt: new Date().toISOString() 382 + }); 383 + console.log(`[ImageHash] Successfully stored hash in database`); 384 + } catch (error) { 385 + // If hashing fails, fall back to URL hash naming 386 + console.error('[ImageHash] Failed to hash image, falling back to URL hash:', error); 387 + const fallbackHash = this.getHashedFilename(url); 388 + const fallbackFilename = `${fallbackHash}${ext}`; 389 + finalFilePath = path.join(storageDir, fallbackFilename); 390 + 391 + // Move temp file to fallback name if it doesn't exist 392 + if (!await this.isValidCacheFile(finalFilePath)) { 393 + await fs.rename(tempFilePath, finalFilePath); 394 + } else { 395 + await fs.unlink(tempFilePath); 396 + } 397 + } 398 + } else if (!isVideo && !this.imageHashManager) { 399 + // No hash manager, fall back to URL hash naming 400 + console.log('[ImageHash] Hash manager not initialized, using URL hash'); 401 + const fallbackHash = this.getHashedFilename(url); 402 + const fallbackFilename = `${fallbackHash}${ext}`; 403 + finalFilePath = path.join(storageDir, fallbackFilename); 404 + await fs.rename(tempFilePath, finalFilePath); 405 + } 406 + 407 + resolve({ filePath: finalFilePath, contentType, isVideo }); 408 + } catch (error) { 409 + reject(error); 410 + } 296 411 }); 297 412 writer.on('error', reject); 298 413 }); ··· 354 469 * Process a media URL - download, cache, and transcode if needed 355 470 * @param {string} url - Media URL to process 356 471 * @param {boolean} isVideo - Whether URL is known to be a video 472 + * @param {string} sourceUrl - Original source URL (for hash database) 357 473 * @returns {Promise<{localPath: string, isVideo: boolean, contentType: string}>} - Processed media info 358 474 */ 359 - async processMediaUrl(url, isVideo = false) { 475 + async processMediaUrl(url, isVideo = false, sourceUrl = null) { 360 476 // Download and cache the media 361 - const { filePath, contentType, isVideo: detectedVideo } = await this.downloadMedia(url, isVideo); 477 + const { filePath, contentType, isVideo: detectedVideo } = await this.downloadMedia(url, isVideo, sourceUrl); 362 478 363 479 // Determine if it's a video based primarily on MIME type 364 480 const isVideoContent = contentType && contentType.toLowerCase().startsWith('video/'); ··· 392 508 */ 393 509 async shutdown() { 394 510 console.log('MediaCache shutting down...'); 395 - // Perform any necessary cleanup 511 + // Close image hash database connection 512 + if (this.imageHashManager) { 513 + this.imageHashManager.close(); 514 + } 396 515 } 397 516 } 398 517