this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

fix importing split tweets file; remove archive_ prefix

alice 5f2f1f74 f5f224a3

+5 -7
+5 -7
twitter_to_sqlite/archive.py
··· 28 28 29 29 return inner 30 30 31 - 32 31 def register_multi(filename): 33 32 def inner(fn): 34 33 transformers[filename] = (fn, None) ··· 197 196 198 197 @register_each("tweet", pk="id") 199 198 @register_each("tweets", pk="id") 199 + @register_each("tweets-part1", pk="id") 200 200 def tweet(item): 201 201 # Older versions of the archive have the tweet data at the top level of the 202 202 # item; newer versions have it all in a 'tweet' sub-key. ··· 208 208 item[key] = int(item[key]) 209 209 210 210 # Handle some columns that are sometimes missing 211 - optional_columns = ["possibly_sensitive", "coordinates", "geo", "extended_entities"] 211 + optional_columns = ["possibly_sensitive", "coordinates", "geo", "extended_entities", "withheld_in_countries"] 212 212 for col in optional_columns: 213 213 item.setdefault(col, None) 214 214 ··· 239 239 240 240 def import_from_file(db, filename, content): 241 241 assert filename.endswith(".js"), "{} does not end with .js".format(filename) 242 - existing_tables = set(db.table_names()) 243 242 filename = filename[: -len(".js")] 244 243 if filename not in transformers: 245 244 if filename not in IGNORE: ··· 249 248 data = extract_json(content) 250 249 to_insert = transformer(data) 251 250 for table, rows in to_insert.items(): 252 - table_name = "archive_{}".format(table.replace("-", "_")) 253 - # Drop and re-create if it already exists 254 - if table_name in existing_tables: 255 - db[table_name].drop() 251 + table_name = "{}".format(table.replace("-", "_")) 252 + if table_name == "tweets_part1": 253 + table_name = "tweets" 256 254 if pk is not None: 257 255 db[table_name].insert_all(rows, pk=pk, replace=True) 258 256 else: