Convert Google Takeout Maps data to GeoJSON
0
takeout_to_geojson.ts
221 lines 8.6 kB view raw
1 2/** 3 * Convert Google Takeout Maps data to GeoJSON files, so that I can finally 4 * escape from google maps. 5 * 6 * Setup: download a Google Takeout export including "Maps (your places)" and 7 * "Saved", place the export folder next to this script and use it as --input 8 * 9 * deno run ./takeout_to_geojson.ts --input ./Takeout 10 * 11 * NOTE: For the first usage, you will probably need to accept Google T&C manually 12 * in the browser before url redirects work. 13 */ 14 15import { Command } from 'jsr:@cliffy/command' 16import { colors } from 'jsr:@cliffy/ansi/colors' 17import { parse } from 'jsr:@std/csv' 18import { ensureDir } from 'jsr:@std/fs' 19import { launch } from 'jsr:@astral/astral' 20 21// ── Browser ─────────────────────────────────────────────────────────────────── 22 23let _browser: Awaited<ReturnType<typeof launch>> | null = null 24let _profileDir = './browser-profile' 25 26async function getBrowser() { 27 if (!_browser) _browser = await launch({ 28 headless: false, 29 args: [`--user-data-dir=${_profileDir}`], 30 }) 31 return _browser 32} 33 34async function closeBrowser() { 35 await _browser?.close() 36 _browser = null 37} 38 39// ── Geocoding ───────────────────────────────────────────────────────────────── 40 41const matchLatLon = /@([-+]?\d{1,2}(?:\.\d+)?),([-+]?\d{1,3}(?:\.\d+)?)/ 42 43function coordsFromUrl(url: string) { 44 const m = url.match(matchLatLon) 45 if (!m) return null 46 return { lat: parseFloat(m[1]), lon: parseFloat(m[2]) } 47} 48 49// Opens the Google Maps URL in a real browser so JS executes and the page 50// redirects to a URL containing @lat,lon, then reverse-geocodes with Nominatim. 51async function geocodeGoogleMapsUrl(url: string) { 52 const browser = await getBrowser() 53 const page = await browser.newPage() 54 try { 55 await page.goto(url) 56 let finalUrl = url 57 for (let i = 0; i < 15; i++) { 58 finalUrl = await page.evaluate(() => window.location.href) 59 if (matchLatLon.test(finalUrl)) break 60 await new Promise(r => setTimeout(r, 1000)) 61 } 62 const coords = coordsFromUrl(finalUrl) 63 if (!coords) return null 64 65 const nominatim = await fetch( 66 `https://nominatim.openstreetmap.org/reverse?lat=${coords.lat}&lon=${coords.lon}&format=json` 67 ).then(r => r.json()).catch(() => null) 68 69 return { ...nominatim, ...coords } 70 } finally { 71 await page.close() 72 } 73} 74 75// ── Helpers ─────────────────────────────────────────────────────────────────── 76 77function urlName(url?: string) { 78 if (!url) return null 79 const q = url.match(/[?&]q=([^&]+)/)?.[1] 80 return q ? decodeURIComponent(q.replace(/\+/g, ' ')) : null 81} 82 83function coordName(coords?: [number, number]) { 84 if (!coords) return null 85 const [lon, lat] = coords 86 return `${lat}, ${lon}` 87} 88 89// ── CLI ─────────────────────────────────────────────────────────────────────── 90 91await new Command() 92 .name('takeout-to-geojson') 93 .description('Convert Google Takeout Maps data to GeoJSON files.') 94 .option('-i, --input <dir:string>', 'Path to the takeout directory.', { default: './takeout' }) 95 .option('-o, --output <dir:string>', 'Path to write results.', { default: './results' }) 96 .action(async ({ input, output }) => { 97 _profileDir = `${output}/browser-profile` 98 await ensureDir(`${output}/lists`) 99 100 const decoder = new TextDecoder() 101 102 // Index Saved Places by name for fast lookup 103 const savedPlaces = JSON.parse(decoder.decode( 104 await Deno.readFile(`${input}/Maps (your places)/Saved Places.json`) 105 )) 106 const placesByName: Record<string, any> = {} 107 for (const place of savedPlaces.features) { 108 const name = place.properties?.location?.name 109 ?? urlName(place.properties?.google_maps_url) 110 ?? coordName(place.geometry?.coordinates) 111 place.properties.location ??= {} 112 place.properties.location.name = name 113 placesByName[name] = place 114 } 115 116 // Load any previously recorded unfound entries so reruns skip them 117 const unfoundPath = `${output}/unfound.json` 118 let unfound: { list: string; title: string; url: string }[] = [] 119 try { unfound = JSON.parse(await Deno.readTextFile(unfoundPath)) } catch { /* first run */ } 120 const unfoundKeys = new Set(unfound.map(e => `${e.list}::${e.title}`)) 121 122 async function recordUnfound(entry: { list: string; title: string; url: string }) { 123 unfound.push(entry) 124 unfoundKeys.add(`${entry.list}::${entry.title}`) 125 await Deno.writeTextFile(unfoundPath, JSON.stringify(unfound, null, 2)) 126 } 127 128 // Rate-limit geocoding to stay within Nominatim's 1 req/sec policy 129 const GEOCODE_DELAY_MS = 1500 130 let lastGeocodeTime = 0 131 132 async function geocodeStraggler(title: string, url: string) { 133 const wait = GEOCODE_DELAY_MS - (Date.now() - lastGeocodeTime) 134 if (wait > 0) await new Promise(r => setTimeout(r, wait)) 135 136 const geo = await geocodeGoogleMapsUrl(url).catch(() => null) 137 lastGeocodeTime = Date.now() 138 if (!geo?.lat || !geo?.lon) return null 139 140 return { 141 type: 'Feature', 142 geometry: { type: 'Point', coordinates: [geo.lon, geo.lat] }, 143 properties: { 144 ...geo, 145 location: { name: title, address: geo.display_name ?? '' }, 146 date: new Date().toISOString(), 147 google_maps_url: url, 148 }, 149 } 150 } 151 152 // Process each saved list CSV 153 for await (const dirEntry of Deno.readDir(`${input}/Saved`)) { 154 const listName = dirEntry.name.split('.')[0] 155 const outputPath = `${output}/lists/${listName}.json` 156 157 let existing = { name: listName, type: 'FeatureCollection', features: [] as any[] } 158 try { existing = JSON.parse(await Deno.readTextFile(outputPath)) } catch { /* first run */ } 159 const completedTitles = new Set(existing.features.map((f: any) => f.properties.Name)) 160 const features = existing.features 161 162 const rawCsv = decoder.decode(await Deno.readFile(`${input}/Saved/${dirEntry.name}`)) 163 const headerIndex = rawCsv.split('\n').findIndex(l => l.startsWith('Title,')) 164 const csvToParse = headerIndex > 0 ? rawCsv.split('\n').slice(headerIndex).join('\n') : rawCsv 165 const rows = parse(csvToParse, { skipFirstRow: true }) as Record<string, string>[] 166 167 const pending = rows.filter(r => 168 r.Title && !completedTitles.has(r.Title) && !unfoundKeys.has(`${listName}::${r.Title}`) 169 ) 170 console.log(colors.bold(`\n${listName} `) + colors.dim(`(${pending.length} remaining of ${rows.length})`)) 171 172 for (const { Title, Note, URL } of rows) { 173 if (!Title) continue 174 if (completedTitles.has(Title) || unfoundKeys.has(`${listName}::${Title}`)) continue 175 176 let place = placesByName[Title] 177 178 if (!place && URL) { 179 console.log(colors.dim(` geocoding: ${Title}`)) 180 place = await geocodeStraggler(Title, URL) 181 } 182 183 if (!place) { 184 await recordUnfound({ list: listName, title: Title, url: URL }) 185 console.log(colors.red(`${Title}`)) 186 continue 187 } 188 189 place.properties.timestamp = place.properties.date 190 place.properties.Name = place.properties.location?.name ?? Title 191 place.properties.description = Note 192 delete place.properties.date 193 194 features.push(place) 195 await Deno.writeTextFile(outputPath, JSON.stringify( 196 { name: listName, type: 'FeatureCollection', features }, null, 2 197 )) 198 console.log(colors.green(`${Title}`)) 199 } 200 } 201 202 await closeBrowser() 203 204 // Copy supplementary GeoJSON files as-is 205 console.log('\nCopying supplementary files...') 206 const supplementary: [string, string][] = [ 207 [`${input}/Maps (your places)/Saved Places.json`, `${output}/saved_places.json`], 208 [`${input}/Maps (your places)/Reviews.json`, `${output}/reviews.json`], 209 ] 210 for (const [src, dest] of supplementary) { 211 try { 212 await Deno.copyFile(src, dest) 213 console.log(colors.green(`${dest}`)) 214 } catch { 215 console.log(colors.dim(` - skipped (not found): ${src}`)) 216 } 217 } 218 219 console.log(colors.bold.green(`\nDone! Results written to ${output}/`)) 220 }) 221 .parse(Deno.args)