···28282929 return inner
30303131-3231def register_multi(filename):
3332 def inner(fn):
3433 transformers[filename] = (fn, None)
···197196198197@register_each("tweet", pk="id")
199198@register_each("tweets", pk="id")
199199+@register_each("tweets-part1", pk="id")
200200def tweet(item):
201201 # Older versions of the archive have the tweet data at the top level of the
202202 # item; newer versions have it all in a 'tweet' sub-key.
···208208 item[key] = int(item[key])
209209210210 # Handle some columns that are sometimes missing
211211- optional_columns = ["possibly_sensitive", "coordinates", "geo", "extended_entities"]
211211+ optional_columns = ["possibly_sensitive", "coordinates", "geo", "extended_entities", "withheld_in_countries"]
212212 for col in optional_columns:
213213 item.setdefault(col, None)
214214···239239240240def import_from_file(db, filename, content):
241241 assert filename.endswith(".js"), "{} does not end with .js".format(filename)
242242- existing_tables = set(db.table_names())
243242 filename = filename[: -len(".js")]
244243 if filename not in transformers:
245244 if filename not in IGNORE:
···249248 data = extract_json(content)
250249 to_insert = transformer(data)
251250 for table, rows in to_insert.items():
252252- table_name = "archive_{}".format(table.replace("-", "_"))
253253- # Drop and re-create if it already exists
254254- if table_name in existing_tables:
255255- db[table_name].drop()
251251+ table_name = "{}".format(table.replace("-", "_"))
252252+ if table_name == "tweets_part1":
253253+ table_name = "tweets"
256254 if pk is not None:
257255 db[table_name].insert_all(rows, pk=pk, replace=True)
258256 else: