this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

followers-ids and friends-ids subcommands

Closes #9

+135
+17
README.md
··· 91 91 92 92 $ twitter-to-sqlite list-members members.db 927913322841653248 --ids 93 93 94 + ## Retrieving just follower and friend IDs 95 + 96 + It's also possible to retrieve just the numeric Twitter IDs of the accounts that specific users are following ("friends" in Twitter's API terminology) or followed-by: 97 + 98 + $ twitter-to-sqlite followers-ids members.db simonw cleopaws 99 + 100 + This will populate the `following` table with `followed_id`/`follower_id` pairs for the two specified accounts, listing every account ID that is following either of those two accounts. 101 + 102 + $ twitter-to-sqlite friends-ids members.db simonw cleopaws 103 + 104 + This will do the same thing but pull the IDs that those accounts are following. 105 + 106 + Both of these commands also support `--sql` and `--attach` as an alternative to passing screen names as direct command-line arguments. You can use `--ids` to process the inputs as user IDs rather than screen names. 107 + 108 + The underlying Twitter APIs have a rate limit of 15 requests every 15 minutes - though they do return up to 5,000 IDs in each call. By default both of these subcommands will wait for 61 seconds between API calls in order to stay within the rate limit - you can adjust this behaviour down to just one second delay if you know you will not be making many calls using `--sleep=1`. 109 + 94 110 ## Design notes 95 111 96 112 * Tweet IDs are stored as integers, to afford sorting by ID in a sensible way 113 + * While we configure foreign key relationships between tables, we do not ask SQLite to enforce them. This is used by the `following` table to allow the `followers-ids` and `friends-ids` commands to populate it with user IDs even if the user accounts themselves are not yet present in the `users` table.
+101
twitter_to_sqlite/cli.py
··· 1 1 import click 2 + import datetime 2 3 import os 3 4 import sqlite_utils 4 5 import json ··· 243 244 db = sqlite_utils.Database(db_path) 244 245 for identifier in identifiers: 245 246 utils.fetch_and_save_list(db, session, identifier, ids) 247 + 248 + 249 + @cli.command(name="followers-ids") 250 + @click.argument( 251 + "db_path", 252 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 253 + required=True, 254 + ) 255 + @add_identifier_options 256 + @click.option( 257 + "-a", 258 + "--auth", 259 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 260 + default="auth.json", 261 + help="Path to auth.json token file", 262 + ) 263 + @click.option( 264 + "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 265 + ) 266 + @click.option( 267 + "--sleep", type=int, default=61, help="Seconds to sleep between API calls" 268 + ) 269 + def followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep): 270 + "Populate followers table with IDs of account followers" 271 + _shared_friends_ids_followers_ids( 272 + db_path, 273 + identifiers, 274 + attach, 275 + sql, 276 + auth, 277 + ids, 278 + sleep, 279 + api_url="https://api.twitter.com/1.1/followers/ids.json", 280 + first_key="followed_id", 281 + second_key="follower_id", 282 + ) 283 + 284 + 285 + @cli.command(name="friends-ids") 286 + @click.argument( 287 + "db_path", 288 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 289 + required=True, 290 + ) 291 + @add_identifier_options 292 + @click.option( 293 + "-a", 294 + "--auth", 295 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 296 + default="auth.json", 297 + help="Path to auth.json token file", 298 + ) 299 + @click.option( 300 + "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 301 + ) 302 + @click.option( 303 + "--sleep", type=int, default=61, help="Seconds to sleep between API calls" 304 + ) 305 + def friends_ids(db_path, identifiers, attach, sql, auth, ids, sleep): 306 + "Populate followers table with IDs of account friends" 307 + _shared_friends_ids_followers_ids( 308 + db_path, 309 + identifiers, 310 + attach, 311 + sql, 312 + auth, 313 + ids, 314 + sleep, 315 + api_url="https://api.twitter.com/1.1/friends/ids.json", 316 + first_key="follower_id", 317 + second_key="followed_id", 318 + ) 319 + 320 + 321 + def _shared_friends_ids_followers_ids( 322 + db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key 323 + ): 324 + auth = json.load(open(auth)) 325 + session = utils.session_for_auth(auth) 326 + db = sqlite_utils.Database(db_path) 327 + identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 328 + for identifier in identifiers: 329 + # Make sure this user is saved 330 + arg_user_id = identifier if ids else None 331 + arg_screen_name = None if ids else identifier 332 + profile = utils.get_profile(session, arg_user_id, arg_screen_name) 333 + user_id = profile["id"] 334 + utils.save_users(db, [profile]) 335 + args = {("user_id" if ids else "screen_name"): identifier} 336 + for id_batch in utils.cursor_paginate( 337 + session, api_url, args, "ids", 5000, sleep 338 + ): 339 + first_seen = datetime.datetime.utcnow().isoformat() 340 + db["following"].insert_all( 341 + ( 342 + {first_key: user_id, second_key: other_id, "first_seen": first_seen} 343 + for other_id in id_batch 344 + ), 345 + ignore=True, 346 + )
+17
twitter_to_sqlite/utils.py
··· 309 309 if not cursor: 310 310 break 311 311 time.sleep(1) # Rate limit = 900 per 15 minutes 312 + 313 + 314 + def cursor_paginate(session, url, args, key, page_size=200, sleep=None): 315 + "Execute cursor pagination, yelding 'key' for each page" 316 + args = dict(args) 317 + args["page_size"] = page_size 318 + cursor = -1 319 + while cursor: 320 + args["cursor"] = cursor 321 + r = session.get(url, params=args) 322 + body = r.json() 323 + yield body[key] 324 + cursor = body["next_cursor"] 325 + if not cursor: 326 + break 327 + if sleep is not None: 328 + time.sleep(sleep)