Convert Google Takeout Maps data to GeoJSON
0
takeout_to_geojson.ts
1
2/**
3 * Convert Google Takeout Maps data to GeoJSON files, so that I can finally
4 * escape from google maps.
5 *
6 * Setup: download a Google Takeout export including "Maps (your places)" and
7 * "Saved", place the export folder next to this script and use it as --input
8 *
9 * deno run ./takeout_to_geojson.ts --input ./Takeout
10 *
11 * NOTE: For the first usage, you will probably need to accept Google T&C manually
12 * in the browser before url redirects work.
13 */
14
15import { Command } from 'jsr:@cliffy/command'
16import { colors } from 'jsr:@cliffy/ansi/colors'
17import { parse } from 'jsr:@std/csv'
18import { ensureDir } from 'jsr:@std/fs'
19import { launch } from 'jsr:@astral/astral'
20
21// ── Browser ───────────────────────────────────────────────────────────────────
22
23let _browser: Awaited<ReturnType<typeof launch>> | null = null
24let _profileDir = './browser-profile'
25
26async function getBrowser() {
27 if (!_browser) _browser = await launch({
28 headless: false,
29 args: [`--user-data-dir=${_profileDir}`],
30 })
31 return _browser
32}
33
34async function closeBrowser() {
35 await _browser?.close()
36 _browser = null
37}
38
39// ── Geocoding ─────────────────────────────────────────────────────────────────
40
41const matchLatLon = /@([-+]?\d{1,2}(?:\.\d+)?),([-+]?\d{1,3}(?:\.\d+)?)/
42
43function coordsFromUrl(url: string) {
44 const m = url.match(matchLatLon)
45 if (!m) return null
46 return { lat: parseFloat(m[1]), lon: parseFloat(m[2]) }
47}
48
49// Opens the Google Maps URL in a real browser so JS executes and the page
50// redirects to a URL containing @lat,lon, then reverse-geocodes with Nominatim.
51async function geocodeGoogleMapsUrl(url: string) {
52 const browser = await getBrowser()
53 const page = await browser.newPage()
54 try {
55 await page.goto(url)
56 let finalUrl = url
57 for (let i = 0; i < 15; i++) {
58 finalUrl = await page.evaluate(() => window.location.href)
59 if (matchLatLon.test(finalUrl)) break
60 await new Promise(r => setTimeout(r, 1000))
61 }
62 const coords = coordsFromUrl(finalUrl)
63 if (!coords) return null
64
65 const nominatim = await fetch(
66 `https://nominatim.openstreetmap.org/reverse?lat=${coords.lat}&lon=${coords.lon}&format=json`
67 ).then(r => r.json()).catch(() => null)
68
69 return { ...nominatim, ...coords }
70 } finally {
71 await page.close()
72 }
73}
74
75// ── Helpers ───────────────────────────────────────────────────────────────────
76
77function urlName(url?: string) {
78 if (!url) return null
79 const q = url.match(/[?&]q=([^&]+)/)?.[1]
80 return q ? decodeURIComponent(q.replace(/\+/g, ' ')) : null
81}
82
83function coordName(coords?: [number, number]) {
84 if (!coords) return null
85 const [lon, lat] = coords
86 return `${lat}, ${lon}`
87}
88
89// ── CLI ───────────────────────────────────────────────────────────────────────
90
91await new Command()
92 .name('takeout-to-geojson')
93 .description('Convert Google Takeout Maps data to GeoJSON files.')
94 .option('-i, --input <dir:string>', 'Path to the takeout directory.', { default: './takeout' })
95 .option('-o, --output <dir:string>', 'Path to write results.', { default: './results' })
96 .action(async ({ input, output }) => {
97 _profileDir = `${output}/browser-profile`
98 await ensureDir(`${output}/lists`)
99
100 const decoder = new TextDecoder()
101
102 // Index Saved Places by name for fast lookup
103 const savedPlaces = JSON.parse(decoder.decode(
104 await Deno.readFile(`${input}/Maps (your places)/Saved Places.json`)
105 ))
106 const placesByName: Record<string, any> = {}
107 for (const place of savedPlaces.features) {
108 const name = place.properties?.location?.name
109 ?? urlName(place.properties?.google_maps_url)
110 ?? coordName(place.geometry?.coordinates)
111 place.properties.location ??= {}
112 place.properties.location.name = name
113 placesByName[name] = place
114 }
115
116 // Load any previously recorded unfound entries so reruns skip them
117 const unfoundPath = `${output}/unfound.json`
118 let unfound: { list: string; title: string; url: string }[] = []
119 try { unfound = JSON.parse(await Deno.readTextFile(unfoundPath)) } catch { /* first run */ }
120 const unfoundKeys = new Set(unfound.map(e => `${e.list}::${e.title}`))
121
122 async function recordUnfound(entry: { list: string; title: string; url: string }) {
123 unfound.push(entry)
124 unfoundKeys.add(`${entry.list}::${entry.title}`)
125 await Deno.writeTextFile(unfoundPath, JSON.stringify(unfound, null, 2))
126 }
127
128 // Rate-limit geocoding to stay within Nominatim's 1 req/sec policy
129 const GEOCODE_DELAY_MS = 1500
130 let lastGeocodeTime = 0
131
132 async function geocodeStraggler(title: string, url: string) {
133 const wait = GEOCODE_DELAY_MS - (Date.now() - lastGeocodeTime)
134 if (wait > 0) await new Promise(r => setTimeout(r, wait))
135
136 const geo = await geocodeGoogleMapsUrl(url).catch(() => null)
137 lastGeocodeTime = Date.now()
138 if (!geo?.lat || !geo?.lon) return null
139
140 return {
141 type: 'Feature',
142 geometry: { type: 'Point', coordinates: [geo.lon, geo.lat] },
143 properties: {
144 ...geo,
145 location: { name: title, address: geo.display_name ?? '' },
146 date: new Date().toISOString(),
147 google_maps_url: url,
148 },
149 }
150 }
151
152 // Process each saved list CSV
153 for await (const dirEntry of Deno.readDir(`${input}/Saved`)) {
154 const listName = dirEntry.name.split('.')[0]
155 const outputPath = `${output}/lists/${listName}.json`
156
157 let existing = { name: listName, type: 'FeatureCollection', features: [] as any[] }
158 try { existing = JSON.parse(await Deno.readTextFile(outputPath)) } catch { /* first run */ }
159 const completedTitles = new Set(existing.features.map((f: any) => f.properties.Name))
160 const features = existing.features
161
162 const rawCsv = decoder.decode(await Deno.readFile(`${input}/Saved/${dirEntry.name}`))
163 const headerIndex = rawCsv.split('\n').findIndex(l => l.startsWith('Title,'))
164 const csvToParse = headerIndex > 0 ? rawCsv.split('\n').slice(headerIndex).join('\n') : rawCsv
165 const rows = parse(csvToParse, { skipFirstRow: true }) as Record<string, string>[]
166
167 const pending = rows.filter(r =>
168 r.Title && !completedTitles.has(r.Title) && !unfoundKeys.has(`${listName}::${r.Title}`)
169 )
170 console.log(colors.bold(`\n${listName} `) + colors.dim(`(${pending.length} remaining of ${rows.length})`))
171
172 for (const { Title, Note, URL } of rows) {
173 if (!Title) continue
174 if (completedTitles.has(Title) || unfoundKeys.has(`${listName}::${Title}`)) continue
175
176 let place = placesByName[Title]
177
178 if (!place && URL) {
179 console.log(colors.dim(` geocoding: ${Title}`))
180 place = await geocodeStraggler(Title, URL)
181 }
182
183 if (!place) {
184 await recordUnfound({ list: listName, title: Title, url: URL })
185 console.log(colors.red(` ✗ ${Title}`))
186 continue
187 }
188
189 place.properties.timestamp = place.properties.date
190 place.properties.Name = place.properties.location?.name ?? Title
191 place.properties.description = Note
192 delete place.properties.date
193
194 features.push(place)
195 await Deno.writeTextFile(outputPath, JSON.stringify(
196 { name: listName, type: 'FeatureCollection', features }, null, 2
197 ))
198 console.log(colors.green(` ✓ ${Title}`))
199 }
200 }
201
202 await closeBrowser()
203
204 // Copy supplementary GeoJSON files as-is
205 console.log('\nCopying supplementary files...')
206 const supplementary: [string, string][] = [
207 [`${input}/Maps (your places)/Saved Places.json`, `${output}/saved_places.json`],
208 [`${input}/Maps (your places)/Reviews.json`, `${output}/reviews.json`],
209 ]
210 for (const [src, dest] of supplementary) {
211 try {
212 await Deno.copyFile(src, dest)
213 console.log(colors.green(` ✓ ${dest}`))
214 } catch {
215 console.log(colors.dim(` - skipped (not found): ${src}`))
216 }
217 }
218
219 console.log(colors.bold.green(`\nDone! Results written to ${output}/`))
220 })
221 .parse(Deno.args)