ATlast — you'll never need to find your favorites on another platform again. Find your favs in the ATmosphere.
atproto
16
fork

Configure Feed

Select the types of activity you want to include in your feed.

test(web): add extractor logic unit tests

byarielm.fyi 64a4bca9 b4ba282a

verified
+232
+232
packages/web/src/lib/parsers/fileExtractor.test.ts
··· 1 + import { describe, it, expect } from "vitest"; 2 + import JSZip from "jszip"; 3 + import { DataExtractor } from "./fileExtractor"; 4 + import type { ParseRule } from "./platformDefinitions"; 5 + 6 + /** Helper: creates a JSZip instance with files at given paths */ 7 + async function createTestZip( 8 + files: Record<string, string>, 9 + ): Promise<JSZip> { 10 + const zip = new JSZip(); 11 + for (const [path, content] of Object.entries(files)) { 12 + zip.file(path, content); 13 + } 14 + return zip; 15 + } 16 + 17 + describe("DataExtractor", () => { 18 + describe("processZipArchive", () => { 19 + it("extracts usernames from Instagram HTML in ZIP", async () => { 20 + const htmlContent = ` 21 + <a target="_blank" href="https://www.instagram.com/_u/user_one">user_one</a> 22 + <a target="_blank" href="https://www.instagram.com/_u/user_two">user_two</a> 23 + `; 24 + const zip = await createTestZip({ 25 + "connections/followers_and_following/following.html": htmlContent, 26 + }); 27 + 28 + const rules: ParseRule[] = [ 29 + { 30 + zipPath: "connections/followers_and_following/following.html", 31 + format: "HTML", 32 + rule: '<a target="_blank" href="https://www.instagram.com/_u/([^"]+)"', 33 + }, 34 + ]; 35 + 36 + const extractor = new DataExtractor(new Blob()); 37 + const results = await extractor.processZipArchive(zip, rules); 38 + 39 + expect(results.uniqueUsernames).toEqual(["user_one", "user_two"]); 40 + }); 41 + 42 + it("extracts usernames from Instagram JSON in ZIP", async () => { 43 + const jsonContent = JSON.stringify({ 44 + relationships_following: [ 45 + { title: "alice" }, 46 + { title: "bob" }, 47 + { title: "charlie" }, 48 + ], 49 + }); 50 + const zip = await createTestZip({ 51 + "connections/followers_and_following/following.json": jsonContent, 52 + }); 53 + 54 + const rules: ParseRule[] = [ 55 + { 56 + zipPath: "connections/followers_and_following/following.json", 57 + format: "JSON", 58 + rule: ["relationships_following", "title"], 59 + }, 60 + ]; 61 + 62 + const extractor = new DataExtractor(new Blob()); 63 + const results = await extractor.processZipArchive(zip, rules); 64 + 65 + expect(results.uniqueUsernames).toEqual(["alice", "bob", "charlie"]); 66 + }); 67 + 68 + it("extracts usernames from TikTok JSON in ZIP", async () => { 69 + const jsonContent = JSON.stringify({ 70 + "Your Activity": { 71 + Following: { 72 + Following: [ 73 + { UserName: "tiktoker1" }, 74 + { UserName: "tiktoker2" }, 75 + ], 76 + }, 77 + }, 78 + }); 79 + const zip = await createTestZip({ 80 + "user_data_tiktok.json": jsonContent, 81 + }); 82 + 83 + const rules: ParseRule[] = [ 84 + { 85 + zipPath: "user_data_tiktok.json", 86 + format: "JSON", 87 + rule: ["Your Activity", "Following", "Following", "UserName"], 88 + }, 89 + ]; 90 + 91 + const extractor = new DataExtractor(new Blob()); 92 + const results = await extractor.processZipArchive(zip, rules); 93 + 94 + expect(results.uniqueUsernames).toEqual(["tiktoker1", "tiktoker2"]); 95 + }); 96 + 97 + it("skips rules where ZIP file is not found", async () => { 98 + const zip = await createTestZip({ 99 + "other/file.txt": "content", 100 + }); 101 + 102 + const rules: ParseRule[] = [ 103 + { 104 + zipPath: "missing/file.html", 105 + format: "HTML", 106 + rule: "(.+)", 107 + }, 108 + ]; 109 + 110 + const extractor = new DataExtractor(new Blob()); 111 + const results = await extractor.processZipArchive(zip, rules); 112 + 113 + expect(results.uniqueUsernames).toEqual([]); 114 + expect(Object.keys(results.allExtracted)).toHaveLength(0); 115 + }); 116 + 117 + it("deduplicates usernames across multiple rules", async () => { 118 + const htmlContent = ` 119 + <a target="_blank" href="https://www.instagram.com/_u/overlap_user">overlap_user</a> 120 + <a target="_blank" href="https://www.instagram.com/_u/html_only">html_only</a> 121 + `; 122 + const jsonContent = JSON.stringify({ 123 + relationships_following: [ 124 + { title: "overlap_user" }, 125 + { title: "json_only" }, 126 + ], 127 + }); 128 + 129 + const zip = await createTestZip({ 130 + "connections/followers_and_following/following.html": htmlContent, 131 + "connections/followers_and_following/following.json": jsonContent, 132 + }); 133 + 134 + const rules: ParseRule[] = [ 135 + { 136 + zipPath: "connections/followers_and_following/following.html", 137 + format: "HTML", 138 + rule: '<a target="_blank" href="https://www.instagram.com/_u/([^"]+)"', 139 + }, 140 + { 141 + zipPath: "connections/followers_and_following/following.json", 142 + format: "JSON", 143 + rule: ["relationships_following", "title"], 144 + }, 145 + ]; 146 + 147 + const extractor = new DataExtractor(new Blob()); 148 + const results = await extractor.processZipArchive(zip, rules); 149 + 150 + // Should be sorted and deduplicated 151 + expect(results.uniqueUsernames).toEqual([ 152 + "html_only", 153 + "json_only", 154 + "overlap_user", 155 + ]); 156 + }); 157 + 158 + it("returns sorted unique usernames", async () => { 159 + const jsonContent = JSON.stringify({ 160 + users: [ 161 + { name: "charlie" }, 162 + { name: "alice" }, 163 + { name: "bob" }, 164 + ], 165 + }); 166 + const zip = await createTestZip({ 167 + "data.json": jsonContent, 168 + }); 169 + 170 + const rules: ParseRule[] = [ 171 + { 172 + zipPath: "data.json", 173 + format: "JSON", 174 + rule: ["users", "name"], 175 + }, 176 + ]; 177 + 178 + const extractor = new DataExtractor(new Blob()); 179 + const results = await extractor.processZipArchive(zip, rules); 180 + 181 + expect(results.uniqueUsernames).toEqual(["alice", "bob", "charlie"]); 182 + }); 183 + 184 + it("stores results keyed by rule ID", async () => { 185 + const jsonContent = JSON.stringify({ 186 + users: [{ name: "user1" }], 187 + }); 188 + const zip = await createTestZip({ 189 + "data.json": jsonContent, 190 + }); 191 + 192 + const rules: ParseRule[] = [ 193 + { 194 + zipPath: "data.json", 195 + format: "JSON", 196 + rule: ["users", "name"], 197 + }, 198 + ]; 199 + 200 + const extractor = new DataExtractor(new Blob()); 201 + const results = await extractor.processZipArchive(zip, rules); 202 + 203 + expect(results.allExtracted["Rule_1_data.json"]).toEqual(["user1"]); 204 + }); 205 + 206 + it("handles empty ZIP gracefully", async () => { 207 + const zip = new JSZip(); 208 + const rules: ParseRule[] = [ 209 + { 210 + zipPath: "missing.json", 211 + format: "JSON", 212 + rule: ["users", "name"], 213 + }, 214 + ]; 215 + 216 + const extractor = new DataExtractor(new Blob()); 217 + const results = await extractor.processZipArchive(zip, rules); 218 + 219 + expect(results.uniqueUsernames).toEqual([]); 220 + }); 221 + 222 + it("handles empty rules array", async () => { 223 + const zip = await createTestZip({ "file.txt": "data" }); 224 + 225 + const extractor = new DataExtractor(new Blob()); 226 + const results = await extractor.processZipArchive(zip, []); 227 + 228 + expect(results.uniqueUsernames).toEqual([]); 229 + expect(Object.keys(results.allExtracted)).toHaveLength(0); 230 + }); 231 + }); 232 + });