this repo has no description
1import datetime
2import hashlib
3import json
4import os
5import pathlib
6import time
7
8import click
9
10from twitter_to_sqlite import archive
11from twitter_to_sqlite import utils
12
13
14def add_identifier_options(subcommand):
15 for decorator in reversed(
16 (
17 click.argument("identifiers", type=str, nargs=-1),
18 click.option(
19 "--attach",
20 type=click.Path(
21 file_okay=True, dir_okay=False, allow_dash=False, exists=True
22 ),
23 multiple=True,
24 help="Additional database file to attach",
25 ),
26 click.option("--sql", help="SQL query to fetch identifiers to use"),
27 )
28 ):
29 subcommand = decorator(subcommand)
30 return subcommand
31
32
33@click.group()
34@click.version_option()
35def cli():
36 "Save data from Twitter to a SQLite database"
37
38
39@cli.command()
40@click.argument("url")
41@click.option(
42 "-a",
43 "--auth",
44 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
45 default="auth.json",
46 help="Path to auth.json token file",
47)
48def fetch(url, auth):
49 "Make an authenticated request to the Twitter API"
50 auth = json.load(open(auth))
51 session = utils.session_for_auth(auth)
52 click.echo(json.dumps(session.get(url).json(), indent=4))
53
54
55@cli.command()
56@click.option(
57 "-a",
58 "--auth",
59 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
60 default="auth.json",
61 help="Path to save tokens to, defaults to auth.json",
62)
63def auth(auth):
64 "Save authentication credentials to a JSON file"
65 click.echo("Create an app here: https://developer.twitter.com/en/apps")
66 click.echo("Then navigate to 'Keys and tokens' and paste in the following:")
67 click.echo()
68 api_key = click.prompt("API key")
69 api_secret_key = click.prompt("API secret key")
70 access_token = click.prompt("Access token")
71 access_token_secret = click.prompt("Access token secret")
72 open(auth, "w").write(
73 json.dumps(
74 {
75 "api_key": api_key,
76 "api_secret_key": api_secret_key,
77 "access_token": access_token,
78 "access_token_secret": access_token_secret,
79 },
80 indent=4,
81 )
82 + "\n"
83 )
84
85
86@cli.command()
87@click.argument(
88 "db_path",
89 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
90 required=True,
91)
92@add_identifier_options
93@click.option(
94 "-a",
95 "--auth",
96 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
97 default="auth.json",
98 help="Path to auth.json token file",
99)
100@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
101@click.option("--silent", is_flag=True, help="Disable progress bar")
102def followers(db_path, identifiers, attach, sql, auth, ids, silent):
103 "Save followers for specified users (defaults to authenticated user)"
104 _shared_friends_followers(
105 db_path, identifiers, attach, sql, auth, ids, silent, "followers"
106 )
107
108
109def _shared_friends_followers(
110 db_path, identifiers, attach, sql, auth, ids, silent, noun
111):
112 assert noun in ("friends", "followers")
113 auth = json.load(open(auth))
114 session = utils.session_for_auth(auth)
115 db = utils.open_database(db_path)
116
117 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
118
119 if not identifiers:
120 profile = utils.get_profile(db, session)
121 identifiers = [profile["screen_name"]]
122
123 for identifier in identifiers:
124 if ids:
125 kwargs = {"user_id": identifier}
126 else:
127 kwargs = {"screen_name": identifier}
128
129 fetched = []
130 # Get the follower count, so we can have a progress bar
131 count = 0
132
133 profile = utils.get_profile(db, session, **kwargs)
134 screen_name = profile["screen_name"]
135 user_id = profile["id"]
136
137 save_users_kwargs = {}
138 if noun == "followers":
139 save_users_kwargs["followed_id"] = user_id
140 elif noun == "friends":
141 save_users_kwargs["follower_id"] = user_id
142
143 def go(update):
144 for users_chunk in utils.fetch_user_list_chunks(
145 session, user_id, screen_name, noun=noun
146 ):
147 fetched.extend(users_chunk)
148 utils.save_users(db, users_chunk, **save_users_kwargs)
149 update(len(users_chunk))
150
151 if not silent:
152 count = profile["{}_count".format(noun)]
153 with click.progressbar(
154 length=count,
155 label="Importing {:,} {} for @{}".format(count, noun, screen_name),
156 ) as bar:
157 go(bar.update)
158 else:
159 go(lambda x: None)
160
161
162@cli.command()
163@click.argument(
164 "db_path",
165 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
166 required=True,
167)
168@add_identifier_options
169@click.option(
170 "-a",
171 "--auth",
172 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
173 default="auth.json",
174 help="Path to auth.json token file",
175)
176@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
177@click.option("--silent", is_flag=True, help="Disable progress bar")
178def friends(db_path, identifiers, attach, sql, auth, ids, silent):
179 "Save friends for specified users (defaults to authenticated user)"
180 _shared_friends_followers(
181 db_path, identifiers, attach, sql, auth, ids, silent, "friends"
182 )
183
184
185@cli.command()
186@click.argument(
187 "db_path",
188 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
189 required=True,
190)
191@click.option(
192 "-a",
193 "--auth",
194 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
195 default="auth.json",
196 help="Path to auth.json token file",
197)
198@click.option("--user_id", help="Numeric user ID")
199@click.option("--screen_name", help="Screen name")
200@click.option("--stop_after", type=int, help="Stop after this many")
201def favorites(db_path, auth, user_id, screen_name, stop_after):
202 "Save tweets favorited by specified user"
203 auth = json.load(open(auth))
204 session = utils.session_for_auth(auth)
205 db = utils.open_database(db_path)
206 profile = utils.get_profile(db, session, user_id, screen_name)
207 with click.progressbar(
208 utils.fetch_favorites(session, db, user_id, screen_name, stop_after),
209 label="Importing favorites",
210 show_pos=True,
211 ) as bar:
212 utils.save_tweets(db, bar, favorited_by=profile["id"])
213
214
215@cli.command(name="user-timeline")
216@click.argument(
217 "db_path",
218 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
219 required=True,
220)
221@add_identifier_options
222@click.option(
223 "-a",
224 "--auth",
225 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
226 default="auth.json",
227 help="Path to auth.json token file",
228)
229@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
230@click.option("--stop_after", type=int, help="Only pull this number of recent tweets")
231@click.option("--user_id", help="Numeric user ID", hidden=True)
232@click.option("--screen_name", help="Screen name", hidden=True)
233@click.option(
234 "--since",
235 is_flag=True,
236 help="Pull tweets since last retrieved tweet",
237)
238@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID")
239def user_timeline(
240 db_path,
241 identifiers,
242 attach,
243 sql,
244 auth,
245 ids,
246 stop_after,
247 user_id,
248 screen_name,
249 since,
250 since_id,
251):
252 "Save tweets posted by specified user"
253 auth = json.load(open(auth))
254 session = utils.session_for_auth(auth)
255 db = utils.open_database(db_path)
256 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
257
258 # Backwards compatible support for old --user_id and --screen_name options
259 if screen_name:
260 if ids:
261 raise click.ClickException("Cannot use --screen_name with --ids")
262 identifiers.append(screen_name)
263
264 if user_id:
265 if not identifiers:
266 identifiers = [user_id]
267 else:
268 if not ids:
269 raise click.ClickException("Use --user_id with --ids")
270 identifiers.append(user_id)
271
272 # If identifiers is empty, fetch the authenticated user
273 fetch_profiles = True
274 if not identifiers:
275 fetch_profiles = False
276 profile = utils.get_profile(db, session, user_id, screen_name)
277 identifiers = [profile["screen_name"]]
278 ids = False
279
280 format_string = (
281 "@{:" + str(max(len(str(identifier)) for identifier in identifiers)) + "}"
282 )
283
284 for identifier in identifiers:
285 kwargs = {}
286 if ids:
287 kwargs["user_id"] = identifier
288 else:
289 kwargs["screen_name"] = identifier
290 if fetch_profiles:
291 profile = utils.get_profile(db, session, **kwargs)
292 else:
293 profile = db["users"].get(profile["id"])
294 expected_length = profile["statuses_count"]
295
296 if since or since_id:
297 expected_length = None
298
299 with click.progressbar(
300 utils.fetch_user_timeline(
301 session,
302 db,
303 stop_after=stop_after,
304 since_id=since_id,
305 since=since,
306 **kwargs
307 ),
308 length=expected_length,
309 label=format_string.format(profile["screen_name"]),
310 show_pos=True,
311 ) as bar:
312 # Save them 100 at a time
313 chunk = []
314 for tweet in bar:
315 chunk.append(tweet)
316 if len(chunk) >= 100:
317 utils.save_tweets(db, chunk)
318 chunk = []
319 if chunk:
320 utils.save_tweets(db, chunk)
321
322
323@cli.command(name="home-timeline")
324@click.argument(
325 "db_path",
326 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
327 required=True,
328)
329@click.option(
330 "-a",
331 "--auth",
332 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
333 default="auth.json",
334 help="Path to auth.json token file",
335)
336@click.option(
337 "--since",
338 is_flag=True,
339 help="Pull tweets since last retrieved tweet",
340)
341@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID")
342def home_timeline(db_path, auth, since, since_id):
343 "Save tweets from timeline for authenticated user"
344 _shared_timeline(
345 db_path,
346 auth,
347 since,
348 since_id,
349 table="timeline_tweets",
350 api_url="https://api.twitter.com/1.1/statuses/home_timeline.json",
351 since_type="home",
352 )
353
354
355@cli.command(name="mentions-timeline")
356@click.argument(
357 "db_path",
358 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
359 required=True,
360)
361@click.option(
362 "-a",
363 "--auth",
364 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
365 default="auth.json",
366 help="Path to auth.json token file",
367)
368@click.option(
369 "--since",
370 is_flag=True,
371 help="Pull tweets since last retrieved mention",
372)
373@click.option("--since_id", type=str, help="Pull mentions since this Tweet ID")
374def mentions_timeline(db_path, auth, since, since_id):
375 "Save tweets that mention the authenticated user"
376 _shared_timeline(
377 db_path,
378 auth,
379 since,
380 since_id,
381 table="mentions_tweets",
382 api_url="https://api.twitter.com/1.1/statuses/mentions_timeline.json",
383 sleep=10,
384 since_type="mentions",
385 )
386
387
388def _shared_timeline(
389 db_path, auth, since, since_id, table, api_url, sleep=1, since_type=None
390):
391 auth = json.load(open(auth))
392 session = utils.session_for_auth(auth)
393 db = utils.open_database(db_path)
394 profile = utils.get_profile(db, session)
395 expected_length = 800
396 since_key = profile["id"]
397
398 with click.progressbar(
399 utils.fetch_timeline(
400 session,
401 api_url,
402 db,
403 sleep=sleep,
404 since=since,
405 since_id=since_id,
406 since_type=since_type,
407 since_key=since_key,
408 ),
409 length=expected_length,
410 label="Importing tweets",
411 show_pos=True,
412 ) as bar:
413 # Save them 100 at a time
414 def save_chunk(db, chunk):
415 utils.save_tweets(db, chunk)
416 # Record who's timeline they came from
417 db[table].insert_all(
418 [{"user": profile["id"], "tweet": tweet["id"]} for tweet in chunk],
419 pk=("user", "tweet"),
420 foreign_keys=("user", "tweet"),
421 replace=True,
422 )
423
424 chunk = []
425 for tweet in bar:
426 chunk.append(tweet)
427 if len(chunk) >= 100:
428 save_chunk(db, chunk)
429 chunk = []
430 if chunk:
431 save_chunk(db, chunk)
432
433
434@cli.command(name="users-lookup")
435@click.argument(
436 "db_path",
437 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
438 required=True,
439)
440@add_identifier_options
441@click.option(
442 "-a",
443 "--auth",
444 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
445 default="auth.json",
446 help="Path to auth.json token file",
447)
448@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
449def users_lookup(db_path, identifiers, attach, sql, auth, ids):
450 "Fetch user accounts"
451 auth = json.load(open(auth))
452 session = utils.session_for_auth(auth)
453 db = utils.open_database(db_path)
454 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
455 for batch in utils.fetch_user_batches(session, identifiers, ids):
456 utils.save_users(db, batch)
457
458
459@cli.command(name="statuses-lookup")
460@click.argument(
461 "db_path",
462 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
463 required=True,
464)
465@add_identifier_options
466@click.option(
467 "-a",
468 "--auth",
469 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
470 default="auth.json",
471 help="Path to auth.json token file",
472)
473@click.option(
474 "--skip-existing", is_flag=True, help="Skip tweets that are already in the DB"
475)
476@click.option("--silent", is_flag=True, help="Disable progress bar")
477def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent):
478 "Fetch tweets by their IDs"
479 auth = json.load(open(auth))
480 session = utils.session_for_auth(auth)
481 db = utils.open_database(db_path)
482 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
483 if skip_existing:
484 existing_ids = set(
485 r[0] for r in db.conn.execute("select id from tweets").fetchall()
486 )
487 identifiers = [i for i in identifiers if int(i) not in existing_ids]
488 if silent:
489 for batch in utils.fetch_status_batches(session, identifiers):
490 utils.save_tweets(db, batch)
491 else:
492 # Do it with a progress bar
493 count = len(identifiers)
494 with click.progressbar(
495 length=count,
496 label="Importing {:,} tweet{}".format(count, "" if count == 1 else "s"),
497 ) as bar:
498 for batch in utils.fetch_status_batches(session, identifiers):
499 utils.save_tweets(db, batch)
500 bar.update(len(batch))
501
502
503@cli.command(name="lists")
504@click.argument(
505 "db_path",
506 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
507 required=True,
508)
509@add_identifier_options
510@click.option(
511 "-a",
512 "--auth",
513 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
514 default="auth.json",
515 help="Path to auth.json token file",
516)
517@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen_names")
518@click.option("--members", is_flag=True, help="Retrieve members for each list")
519def lists(db_path, identifiers, attach, sql, auth, ids, members):
520 "Fetch lists belonging to specified users"
521 auth = json.load(open(auth))
522 session = utils.session_for_auth(auth)
523 db = utils.open_database(db_path)
524 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
525 # Make sure we have saved these users to the database
526 for batch in utils.fetch_user_batches(session, identifiers, ids):
527 utils.save_users(db, batch)
528 first = True
529 for identifier in identifiers:
530 if ids:
531 kwargs = {"user_id": identifier}
532 else:
533 kwargs = {"screen_name": identifier}
534 fetched_lists = utils.fetch_lists(db, session, **kwargs)
535 if members:
536 for new_list in fetched_lists:
537 utils.fetch_and_save_list(
538 db, session, new_list["full_name"].rstrip("@")
539 )
540 if not first:
541 # Rate limit is one per minute
542 first = False
543 time.sleep(60)
544
545
546@cli.command(name="list-members")
547@click.argument(
548 "db_path",
549 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
550 required=True,
551)
552@click.argument("identifiers", type=str, nargs=-1)
553@click.option(
554 "-a",
555 "--auth",
556 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
557 default="auth.json",
558 help="Path to auth.json token file",
559)
560@click.option(
561 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
562)
563def list_members(db_path, identifiers, auth, ids):
564 "Fetch lists - accepts one or more screen_name/list_slug identifiers"
565 auth = json.load(open(auth))
566 session = utils.session_for_auth(auth)
567 db = utils.open_database(db_path)
568 for identifier in identifiers:
569 utils.fetch_and_save_list(db, session, identifier, ids)
570
571
572@cli.command(name="followers-ids")
573@click.argument(
574 "db_path",
575 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
576 required=True,
577)
578@add_identifier_options
579@click.option(
580 "-a",
581 "--auth",
582 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
583 default="auth.json",
584 help="Path to auth.json token file",
585)
586@click.option(
587 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
588)
589@click.option(
590 "--sleep", type=int, default=61, help="Seconds to sleep between API calls"
591)
592def followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep):
593 "Populate followers table with IDs of account followers"
594 _shared_friends_ids_followers_ids(
595 db_path,
596 identifiers,
597 attach,
598 sql,
599 auth,
600 ids,
601 sleep,
602 api_url="https://api.twitter.com/1.1/followers/ids.json",
603 first_key="followed_id",
604 second_key="follower_id",
605 )
606
607
608@cli.command(name="friends-ids")
609@click.argument(
610 "db_path",
611 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
612 required=True,
613)
614@add_identifier_options
615@click.option(
616 "-a",
617 "--auth",
618 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
619 default="auth.json",
620 help="Path to auth.json token file",
621)
622@click.option(
623 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
624)
625@click.option(
626 "--sleep", type=int, default=61, help="Seconds to sleep between API calls"
627)
628def friends_ids(db_path, identifiers, attach, sql, auth, ids, sleep):
629 "Populate followers table with IDs of account friends"
630 _shared_friends_ids_followers_ids(
631 db_path,
632 identifiers,
633 attach,
634 sql,
635 auth,
636 ids,
637 sleep,
638 api_url="https://api.twitter.com/1.1/friends/ids.json",
639 first_key="follower_id",
640 second_key="followed_id",
641 )
642
643
644@cli.command()
645@click.argument(
646 "db_path",
647 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
648 required=True,
649)
650@click.argument("track", type=str, required=True, nargs=-1)
651@click.option(
652 "-a",
653 "--auth",
654 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
655 default="auth.json",
656 help="Path to auth.json token file",
657)
658@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet")
659def track(db_path, track, auth, verbose):
660 "Experimental: Save tweets matching these keywords in real-time"
661 auth = json.load(open(auth))
662 session = utils.session_for_auth(auth)
663 db = utils.open_database(db_path)
664 for tweet in utils.stream_filter(session, track=track):
665 if verbose:
666 print(json.dumps(tweet, indent=2))
667 with db.conn:
668 utils.save_tweets(db, [tweet])
669
670
671@cli.command()
672@click.argument(
673 "db_path",
674 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
675 required=True,
676)
677@add_identifier_options
678@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
679@click.option(
680 "-a",
681 "--auth",
682 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
683 default="auth.json",
684 help="Path to auth.json token file",
685)
686@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet")
687def follow(db_path, identifiers, attach, sql, ids, auth, verbose):
688 "Experimental: Follow these Twitter users and save tweets in real-time"
689 auth = json.load(open(auth))
690 session = utils.session_for_auth(auth)
691 db = utils.open_database(db_path)
692 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
693 # Make sure we have saved these users to the database
694 for batch in utils.fetch_user_batches(session, identifiers, ids):
695 utils.save_users(db, batch)
696 # Ensure we have user IDs, not screen names
697 if ids:
698 follow = identifiers
699 else:
700 follow = utils.user_ids_for_screen_names(db, identifiers)
701 # Start streaming:
702 for tweet in utils.stream_filter(session, follow=follow):
703 if verbose:
704 print(json.dumps(tweet, indent=2))
705 with db.conn:
706 utils.save_tweets(db, [tweet])
707
708
709def _shared_friends_ids_followers_ids(
710 db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key
711):
712 auth = json.load(open(auth))
713 session = utils.session_for_auth(auth)
714 db = utils.open_database(db_path)
715 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
716 for identifier in identifiers:
717 # Make sure this user is saved
718 arg_user_id = identifier if ids else None
719 arg_screen_name = None if ids else identifier
720 profile = utils.get_profile(db, session, arg_user_id, arg_screen_name)
721 user_id = profile["id"]
722 args = {("user_id" if ids else "screen_name"): identifier}
723 for id_batch in utils.cursor_paginate(
724 session, api_url, args, "ids", 5000, sleep
725 ):
726 first_seen = datetime.datetime.utcnow().isoformat()
727 db["following"].insert_all(
728 (
729 {first_key: user_id, second_key: other_id, "first_seen": first_seen}
730 for other_id in id_batch
731 ),
732 ignore=True,
733 )
734 time.sleep(sleep)
735
736
737@cli.command(name="import")
738@click.argument(
739 "db_path",
740 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False),
741 required=True,
742)
743@click.argument(
744 "paths",
745 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False, exists=True),
746 required=True,
747 nargs=-1,
748)
749def import_(db_path, paths):
750 """
751 Import data from a Twitter exported archive. Input can be the path to a zip
752 file, a directory full of .js files or one or more direct .js files.
753 """
754 db = utils.open_database(db_path)
755 for filepath in paths:
756 path = pathlib.Path(filepath)
757 if path.suffix == ".zip":
758 for filename, content in utils.read_archive_js(filepath):
759 archive.import_from_file(db, filename, content)
760 elif path.is_dir():
761 # Import every .js file in this directory
762 for filepath in path.glob("*.js"):
763 archive.import_from_file(db, filepath.name, open(filepath, "rb").read())
764 elif path.suffix == ".js":
765 archive.import_from_file(db, path.name, open(path, "rb").read())
766 else:
767 raise click.ClickException("Path must be a .js or .zip file or a directory")
768
769
770@cli.command()
771@click.argument(
772 "db_path",
773 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
774 required=True,
775)
776@click.argument("q")
777@click.option(
778 "-a",
779 "--auth",
780 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
781 default="auth.json",
782 help="Path to auth.json token file",
783)
784@click.option(
785 "--since",
786 is_flag=True,
787 help="Pull tweets since last retrieved tweet",
788)
789@click.option(
790 "--geocode",
791 type=str,
792 help="latitude,longitude,radius - where radius is a number followed by mi or km",
793)
794@click.option("--lang", type=str, help="ISO 639-1 language code")
795@click.option("--locale", type=str, help="Locale: only 'ja' is currently effective")
796@click.option("--result_type", type=click.Choice(["mixed", "recent", "popular"]))
797@click.option("--count", type=int, default=100, help="Number of results per page")
798@click.option("--stop_after", type=int, help="Stop after this many")
799@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID")
800def search(db_path, q, auth, since, **kwargs):
801 """
802 Save tweets from a search. Full documentation here:
803
804 https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets
805 """
806 since_id = kwargs.pop("since_id", None)
807 stop_after = kwargs.pop("stop_after", None)
808 auth = json.load(open(auth))
809 session = utils.session_for_auth(auth)
810 db = utils.open_database(db_path)
811
812 search_args = {"q": q}
813 for key, value in kwargs.items():
814 if value is not None:
815 search_args[key] = value
816
817 args_hash = hashlib.sha1(
818 json.dumps(search_args, sort_keys=True, separators=(",", ":")).encode("utf8")
819 ).hexdigest()
820
821 tweets = utils.fetch_timeline(
822 session,
823 "https://api.twitter.com/1.1/search/tweets.json",
824 db,
825 search_args,
826 sleep=6,
827 key="statuses",
828 stop_after=stop_after,
829 since_id=since_id,
830 since_type="search",
831 since_key=args_hash,
832 )
833 chunk = []
834 first = True
835
836 if not db["search_runs"].exists():
837 db["search_runs"].create(
838 {"id": int, "name": str, "args": str, "started": str, "hash": str}, pk="id"
839 )
840
841 def save_chunk(db, search_run_id, chunk):
842 utils.save_tweets(db, chunk)
843 # Record which search run produced them
844 db["search_runs_tweets"].insert_all(
845 [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk],
846 pk=("search_run", "tweet"),
847 foreign_keys=(
848 ("search_run", "search_runs", "id"),
849 ("tweet", "tweets", "id"),
850 ),
851 replace=True,
852 )
853
854 search_run_id = None
855 for tweet in tweets:
856 if first:
857 first = False
858 search_run_id = (
859 db["search_runs"]
860 .insert(
861 {
862 "name": search_args["q"],
863 "args": {
864 key: value
865 for key, value in search_args.items()
866 if key not in {"q", "count"}
867 },
868 "started": datetime.datetime.utcnow().isoformat(),
869 "hash": args_hash,
870 },
871 alter=True,
872 )
873 .last_pk
874 )
875 chunk.append(tweet)
876 if len(chunk) >= 10:
877 save_chunk(db, search_run_id, chunk)
878 chunk = []
879 if chunk:
880 save_chunk(db, search_run_id, chunk)