···91919292 $ twitter-to-sqlite list-members members.db 927913322841653248 --ids
93939494+## Retrieving just follower and friend IDs
9595+9696+It's also possible to retrieve just the numeric Twitter IDs of the accounts that specific users are following ("friends" in Twitter's API terminology) or followed-by:
9797+9898+ $ twitter-to-sqlite followers-ids members.db simonw cleopaws
9999+100100+This will populate the `following` table with `followed_id`/`follower_id` pairs for the two specified accounts, listing every account ID that is following either of those two accounts.
101101+102102+ $ twitter-to-sqlite friends-ids members.db simonw cleopaws
103103+104104+This will do the same thing but pull the IDs that those accounts are following.
105105+106106+Both of these commands also support `--sql` and `--attach` as an alternative to passing screen names as direct command-line arguments. You can use `--ids` to process the inputs as user IDs rather than screen names.
107107+108108+The underlying Twitter APIs have a rate limit of 15 requests every 15 minutes - though they do return up to 5,000 IDs in each call. By default both of these subcommands will wait for 61 seconds between API calls in order to stay within the rate limit - you can adjust this behaviour down to just one second delay if you know you will not be making many calls using `--sleep=1`.
109109+94110## Design notes
9511196112* Tweet IDs are stored as integers, to afford sorting by ID in a sensible way
113113+* While we configure foreign key relationships between tables, we do not ask SQLite to enforce them. This is used by the `following` table to allow the `followers-ids` and `friends-ids` commands to populate it with user IDs even if the user accounts themselves are not yet present in the `users` table.
+101
twitter_to_sqlite/cli.py
···11import click
22+import datetime
23import os
34import sqlite_utils
45import json
···243244 db = sqlite_utils.Database(db_path)
244245 for identifier in identifiers:
245246 utils.fetch_and_save_list(db, session, identifier, ids)
247247+248248+249249+@cli.command(name="followers-ids")
250250+@click.argument(
251251+ "db_path",
252252+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
253253+ required=True,
254254+)
255255+@add_identifier_options
256256+@click.option(
257257+ "-a",
258258+ "--auth",
259259+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
260260+ default="auth.json",
261261+ help="Path to auth.json token file",
262262+)
263263+@click.option(
264264+ "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
265265+)
266266+@click.option(
267267+ "--sleep", type=int, default=61, help="Seconds to sleep between API calls"
268268+)
269269+def followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep):
270270+ "Populate followers table with IDs of account followers"
271271+ _shared_friends_ids_followers_ids(
272272+ db_path,
273273+ identifiers,
274274+ attach,
275275+ sql,
276276+ auth,
277277+ ids,
278278+ sleep,
279279+ api_url="https://api.twitter.com/1.1/followers/ids.json",
280280+ first_key="followed_id",
281281+ second_key="follower_id",
282282+ )
283283+284284+285285+@cli.command(name="friends-ids")
286286+@click.argument(
287287+ "db_path",
288288+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
289289+ required=True,
290290+)
291291+@add_identifier_options
292292+@click.option(
293293+ "-a",
294294+ "--auth",
295295+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
296296+ default="auth.json",
297297+ help="Path to auth.json token file",
298298+)
299299+@click.option(
300300+ "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
301301+)
302302+@click.option(
303303+ "--sleep", type=int, default=61, help="Seconds to sleep between API calls"
304304+)
305305+def friends_ids(db_path, identifiers, attach, sql, auth, ids, sleep):
306306+ "Populate followers table with IDs of account friends"
307307+ _shared_friends_ids_followers_ids(
308308+ db_path,
309309+ identifiers,
310310+ attach,
311311+ sql,
312312+ auth,
313313+ ids,
314314+ sleep,
315315+ api_url="https://api.twitter.com/1.1/friends/ids.json",
316316+ first_key="follower_id",
317317+ second_key="followed_id",
318318+ )
319319+320320+321321+def _shared_friends_ids_followers_ids(
322322+ db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key
323323+):
324324+ auth = json.load(open(auth))
325325+ session = utils.session_for_auth(auth)
326326+ db = sqlite_utils.Database(db_path)
327327+ identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
328328+ for identifier in identifiers:
329329+ # Make sure this user is saved
330330+ arg_user_id = identifier if ids else None
331331+ arg_screen_name = None if ids else identifier
332332+ profile = utils.get_profile(session, arg_user_id, arg_screen_name)
333333+ user_id = profile["id"]
334334+ utils.save_users(db, [profile])
335335+ args = {("user_id" if ids else "screen_name"): identifier}
336336+ for id_batch in utils.cursor_paginate(
337337+ session, api_url, args, "ids", 5000, sleep
338338+ ):
339339+ first_seen = datetime.datetime.utcnow().isoformat()
340340+ db["following"].insert_all(
341341+ (
342342+ {first_key: user_id, second_key: other_id, "first_seen": first_seen}
343343+ for other_id in id_batch
344344+ ),
345345+ ignore=True,
346346+ )
+17
twitter_to_sqlite/utils.py
···309309 if not cursor:
310310 break
311311 time.sleep(1) # Rate limit = 900 per 15 minutes
312312+313313+314314+def cursor_paginate(session, url, args, key, page_size=200, sleep=None):
315315+ "Execute cursor pagination, yelding 'key' for each page"
316316+ args = dict(args)
317317+ args["page_size"] = page_size
318318+ cursor = -1
319319+ while cursor:
320320+ args["cursor"] = cursor
321321+ r = session.get(url, params=args)
322322+ body = r.json()
323323+ yield body[key]
324324+ cursor = body["next_cursor"]
325325+ if not cursor:
326326+ break
327327+ if sleep is not None:
328328+ time.sleep(sleep)