···59596060 $ twitter-to-sqlite users-lookup users.db simonw cleopaws
61616262-You can pass user IDs instead usincg the `--ids` option:
6262+You can pass user IDs instead using the `--ids` option:
63636464 $ twitter-to-sqlite users-lookup users.db 12497 3166449535 --ids
6565+6666+This command also accepts `--sql` and `--attach` options, documented below.
6767+6868+## Retrieve tweets in bulk
6969+7070+If you have a list of tweet IDS you can bulk fetch them using the `statuses-lookup` command:
7171+7272+ $ twitter-to-sqlite statuses-lookup tweets.db 1122154819815239680 1122154178493575169
7373+7474+The `--sql` and `--attach` options are supported.
7575+7676+Here's a recipe to retrieve any tweets that existing tweets are in-reply-to which have not yet been stored in your database:
7777+7878+ $ twitter-to-sqlite statuses-lookup tweets.db \
7979+ --sql='
8080+ select in_reply_to_status_id
8181+ from tweets
8282+ where in_reply_to_status_id is not null' \
8383+ --skip-existing
8484+8585+The `--skip-existing` option means that tweets that have already been stored in the database will not be fetched again.
65866687## Retrieving Twitter followers
6788···111132112133This option is available for some subcommands - run `twitter-to-sqlite command-name --help` to check.
113134114114-You can provide Twitter screen names (or user IDs) directly as command-line arguments, or you can provide those screen names or IDs by executing a SQL query.
135135+You can provide Twitter screen names (or user IDs or tweet IDs) directly as command-line arguments, or you can provide those screen names or IDs by executing a SQL query.
115136116137For example: consider a SQLite database with an `attendees` table listing names and Twitter accounts - something like this:
117138
+44
twitter_to_sqlite/cli.py
···221221 utils.save_users(db, batch)
222222223223224224+@cli.command(name="statuses-lookup")
225225+@click.argument(
226226+ "db_path",
227227+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
228228+ required=True,
229229+)
230230+@add_identifier_options
231231+@click.option(
232232+ "-a",
233233+ "--auth",
234234+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
235235+ default="auth.json",
236236+ help="Path to auth.json token file",
237237+)
238238+@click.option(
239239+ "--skip-existing", is_flag=True, help="Skip tweets that are already in the DB"
240240+)
241241+@click.option("--silent", is_flag=True, help="Disable progress bar")
242242+def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent):
243243+ "Fetch tweets by their IDs"
244244+ auth = json.load(open(auth))
245245+ session = utils.session_for_auth(auth)
246246+ db = sqlite_utils.Database(db_path)
247247+ identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
248248+ if skip_existing:
249249+ existing_ids = set(
250250+ r[0] for r in db.conn.execute("select id from tweets").fetchall()
251251+ )
252252+ identifiers = [i for i in identifiers if int(i) not in existing_ids]
253253+ if silent:
254254+ for batch in utils.fetch_status_batches(session, identifiers):
255255+ utils.save_tweets(db, batch)
256256+ else:
257257+ # Do it with a progress bar
258258+ count = len(identifiers)
259259+ with click.progressbar(
260260+ length=count,
261261+ label="Importing {:,} tweet{}".format(count, "" if count == 1 else "s"),
262262+ ) as bar:
263263+ for batch in utils.fetch_status_batches(session, identifiers):
264264+ utils.save_tweets(db, batch)
265265+ bar.update(len(batch))
266266+267267+224268@cli.command(name="list-members")
225269@click.argument(
226270 "db_path",
+19
twitter_to_sqlite/utils.py
···255255 time.sleep(sleep)
256256257257258258+def fetch_status_batches(session, tweet_ids, sleep=1):
259259+ # Yields lists of up to 100 tweets
260260+ batches = []
261261+ batch = []
262262+ for id in tweet_ids:
263263+ batch.append(id)
264264+ if len(batch) == 100:
265265+ batches.append(batch)
266266+ batch = []
267267+ if batch:
268268+ batches.append(batch)
269269+ url = "https://api.twitter.com/1.1/statuses/lookup.json"
270270+ for batch in batches:
271271+ args = {"id": ",".join(map(str, batch)), "tweet_mode": "extended"}
272272+ tweets = session.get(url, params=args).json()
273273+ yield tweets
274274+ time.sleep(sleep)
275275+276276+258277def resolve_identifiers(db, identifiers, attach, sql):
259278 if sql:
260279 if attach: