this repo has no description
1import datetime
2import hashlib
3import json
4import os
5import pathlib
6import time
7
8import click
9
10from twitter_to_sqlite import archive
11from twitter_to_sqlite import utils
12
13
14def add_identifier_options(subcommand):
15 for decorator in reversed(
16 (
17 click.argument("identifiers", type=str, nargs=-1),
18 click.option(
19 "--attach",
20 type=click.Path(
21 file_okay=True, dir_okay=False, allow_dash=False, exists=True
22 ),
23 multiple=True,
24 help="Additional database file to attach",
25 ),
26 click.option("--sql", help="SQL query to fetch identifiers to use"),
27 )
28 ):
29 subcommand = decorator(subcommand)
30 return subcommand
31
32
33@click.group()
34@click.version_option()
35def cli():
36 "Save data from Twitter to a SQLite database"
37
38
39@cli.command()
40@click.argument("url")
41@click.option(
42 "-a",
43 "--auth",
44 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
45 default="auth.json",
46 help="Path to auth.json token file",
47)
48def fetch(url, auth):
49 "Make an authenticated request to the Twitter API"
50 auth = json.load(open(auth))
51 session = utils.session_for_auth(auth)
52 click.echo(json.dumps(session.get(url).json(), indent=4))
53
54
55@cli.command()
56@click.option(
57 "-a",
58 "--auth",
59 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
60 default="auth.json",
61 help="Path to save tokens to, defaults to auth.json",
62)
63def auth(auth):
64 "Save authentication credentials to a JSON file"
65 click.echo("Create an app here: https://developer.twitter.com/en/apps")
66 click.echo("Then navigate to 'Keys and tokens' and paste in the following:")
67 click.echo()
68 api_key = click.prompt("API key")
69 api_secret_key = click.prompt("API secret key")
70 access_token = click.prompt("Access token")
71 access_token_secret = click.prompt("Access token secret")
72 open(auth, "w").write(
73 json.dumps(
74 {
75 "api_key": api_key,
76 "api_secret_key": api_secret_key,
77 "access_token": access_token,
78 "access_token_secret": access_token_secret,
79 },
80 indent=4,
81 )
82 + "\n"
83 )
84
85
86@cli.command()
87@click.argument(
88 "db_path",
89 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
90 required=True,
91)
92@add_identifier_options
93@click.option(
94 "-a",
95 "--auth",
96 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
97 default="auth.json",
98 help="Path to auth.json token file",
99)
100@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
101@click.option("--silent", is_flag=True, help="Disable progress bar")
102def followers(db_path, identifiers, attach, sql, auth, ids, silent):
103 "Save followers for specified users (defaults to authenticated user)"
104 _shared_friends_followers(
105 db_path, identifiers, attach, sql, auth, ids, silent, "followers"
106 )
107
108
109def _shared_friends_followers(
110 db_path, identifiers, attach, sql, auth, ids, silent, noun
111):
112 assert noun in ("friends", "followers")
113 auth = json.load(open(auth))
114 session = utils.session_for_auth(auth)
115 db = utils.open_database(db_path)
116
117 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
118
119 if not identifiers:
120 profile = utils.get_profile(db, session)
121 identifiers = [profile["screen_name"]]
122
123 for identifier in identifiers:
124 if ids:
125 kwargs = {"user_id": identifier}
126 else:
127 kwargs = {"screen_name": identifier}
128
129 fetched = []
130 # Get the follower count, so we can have a progress bar
131 count = 0
132
133 profile = utils.get_profile(db, session, **kwargs)
134 screen_name = profile["screen_name"]
135 user_id = profile["id"]
136
137 save_users_kwargs = {}
138 if noun == "followers":
139 save_users_kwargs["followed_id"] = user_id
140 elif noun == "friends":
141 save_users_kwargs["follower_id"] = user_id
142
143 def go(update):
144 for users_chunk in utils.fetch_user_list_chunks(
145 session, user_id, screen_name, noun=noun
146 ):
147 fetched.extend(users_chunk)
148 utils.save_users(db, users_chunk, **save_users_kwargs)
149 update(len(users_chunk))
150
151 if not silent:
152 count = profile["{}_count".format(noun)]
153 with click.progressbar(
154 length=count,
155 label="Importing {:,} {} for @{}".format(count, noun, screen_name),
156 ) as bar:
157 go(bar.update)
158 else:
159 go(lambda x: None)
160
161
162@cli.command()
163@click.argument(
164 "db_path",
165 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
166 required=True,
167)
168@add_identifier_options
169@click.option(
170 "-a",
171 "--auth",
172 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
173 default="auth.json",
174 help="Path to auth.json token file",
175)
176@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
177@click.option("--silent", is_flag=True, help="Disable progress bar")
178def friends(db_path, identifiers, attach, sql, auth, ids, silent):
179 "Save friends for specified users (defaults to authenticated user)"
180 _shared_friends_followers(
181 db_path, identifiers, attach, sql, auth, ids, silent, "friends"
182 )
183
184
185@cli.command()
186@click.argument(
187 "db_path",
188 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
189 required=True,
190)
191@click.option(
192 "-a",
193 "--auth",
194 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
195 default="auth.json",
196 help="Path to auth.json token file",
197)
198@click.option("--user_id", help="Numeric user ID")
199@click.option("--screen_name", help="Screen name")
200@click.option("--stop_after", type=int, help="Stop after this many")
201def favorites(db_path, auth, user_id, screen_name, stop_after):
202 "Save tweets favorited by specified user"
203 auth = json.load(open(auth))
204 session = utils.session_for_auth(auth)
205 db = utils.open_database(db_path)
206 profile = utils.get_profile(db, session, user_id, screen_name)
207 with click.progressbar(
208 utils.fetch_favorites(session, db, user_id, screen_name, stop_after),
209 label="Importing favorites",
210 show_pos=True,
211 ) as bar:
212 utils.save_tweets(db, bar, favorited_by=profile["id"])
213
214
215@cli.command(name="user-timeline")
216@click.argument(
217 "db_path",
218 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
219 required=True,
220)
221@add_identifier_options
222@click.option(
223 "-a",
224 "--auth",
225 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
226 default="auth.json",
227 help="Path to auth.json token file",
228)
229@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
230@click.option("--stop_after", type=int, help="Only pull this number of recent tweets")
231@click.option("--user_id", help="Numeric user ID", hidden=True)
232@click.option("--screen_name", help="Screen name", hidden=True)
233@click.option(
234 "--since",
235 is_flag=True,
236 help="Pull tweets since last retrieved tweet",
237)
238@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID")
239def user_timeline(
240 db_path,
241 identifiers,
242 attach,
243 sql,
244 auth,
245 ids,
246 stop_after,
247 user_id,
248 screen_name,
249 since,
250 since_id,
251):
252 "Save tweets posted by specified user"
253 auth = json.load(open(auth))
254 session = utils.session_for_auth(auth)
255 db = utils.open_database(db_path)
256 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
257
258 # Backwards compatible support for old --user_id and --screen_name options
259 if screen_name:
260 if ids:
261 raise click.ClickException("Cannot use --screen_name with --ids")
262 identifiers.append(screen_name)
263
264 if user_id:
265 if not identifiers:
266 identifiers = [user_id]
267 else:
268 if not ids:
269 raise click.ClickException("Use --user_id with --ids")
270 identifiers.append(user_id)
271
272 # If identifiers is empty, fetch the authenticated user
273 fetch_profiles = True
274 if not identifiers:
275 fetch_profiles = False
276 profile = utils.get_profile(db, session, user_id, screen_name)
277 identifiers = [profile["screen_name"]]
278 ids = False
279
280 format_string = (
281 "@{:" + str(max(len(str(identifier)) for identifier in identifiers)) + "}"
282 )
283
284 for identifier in identifiers:
285 kwargs = {}
286 if ids:
287 kwargs["user_id"] = identifier
288 else:
289 kwargs["screen_name"] = identifier
290 if fetch_profiles:
291 profile = utils.get_profile(db, session, **kwargs)
292 else:
293 profile = db["users"].get(profile["id"])
294 expected_length = profile["statuses_count"]
295
296 if since or since_id:
297 expected_length = None
298
299 with click.progressbar(
300 utils.fetch_user_timeline(
301 session,
302 db,
303 stop_after=stop_after,
304 since_id=since_id,
305 since=since,
306 **kwargs
307 ),
308 length=expected_length,
309 label=format_string.format(profile["screen_name"]),
310 show_pos=True,
311 ) as bar:
312 # Save them 100 at a time
313 chunk = []
314 for tweet in bar:
315 chunk.append(tweet)
316 if len(chunk) >= 100:
317 utils.save_tweets(db, chunk)
318 chunk = []
319 if chunk:
320 utils.save_tweets(db, chunk)
321
322
323@cli.command(name="home-timeline")
324@click.argument(
325 "db_path",
326 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
327 required=True,
328)
329@click.option(
330 "-a",
331 "--auth",
332 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
333 default="auth.json",
334 help="Path to auth.json token file",
335)
336@click.option(
337 "--since",
338 is_flag=True,
339 default=False,
340 help="Pull tweets since last retrieved tweet",
341)
342@click.option(
343 "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID"
344)
345def home_timeline(db_path, auth, since, since_id):
346 "Save tweets from timeline for authenticated user"
347 _shared_timeline(
348 db_path,
349 auth,
350 since,
351 since_id,
352 table="timeline_tweets",
353 api_url="https://api.twitter.com/1.1/statuses/home_timeline.json",
354 since_type="home",
355 )
356
357
358@cli.command(name="mentions-timeline")
359@click.argument(
360 "db_path",
361 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
362 required=True,
363)
364@click.option(
365 "-a",
366 "--auth",
367 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
368 default="auth.json",
369 help="Path to auth.json token file",
370)
371@click.option(
372 "--since",
373 is_flag=True,
374 default=False,
375 help="Pull tweets since last retrieved mention",
376)
377@click.option(
378 "--since_id", type=str, default=False, help="Pull mentions since this Tweet ID"
379)
380def mentions_timeline(db_path, auth, since, since_id):
381 "Save tweets that mention the authenticated user"
382 _shared_timeline(
383 db_path,
384 auth,
385 since,
386 since_id,
387 table="mentions_tweets",
388 api_url="https://api.twitter.com/1.1/statuses/mentions_timeline.json",
389 sleep=10,
390 since_type="mentions",
391 )
392
393
394def _shared_timeline(
395 db_path, auth, since, since_id, table, api_url, sleep=1, since_type=None
396):
397 auth = json.load(open(auth))
398 session = utils.session_for_auth(auth)
399 db = utils.open_database(db_path)
400 profile = utils.get_profile(db, session)
401 expected_length = 800
402 since_key = profile["id"]
403
404 with click.progressbar(
405 utils.fetch_timeline(
406 session,
407 api_url,
408 db,
409 sleep=sleep,
410 since=since,
411 since_id=since_id,
412 since_type=since_type,
413 since_key=since_key,
414 ),
415 length=expected_length,
416 label="Importing tweets",
417 show_pos=True,
418 ) as bar:
419 # Save them 100 at a time
420 def save_chunk(db, chunk):
421 utils.save_tweets(db, chunk)
422 # Record who's timeline they came from
423 db[table].insert_all(
424 [{"user": profile["id"], "tweet": tweet["id"]} for tweet in chunk],
425 pk=("user", "tweet"),
426 foreign_keys=("user", "tweet"),
427 replace=True,
428 )
429
430 chunk = []
431 for tweet in bar:
432 chunk.append(tweet)
433 if len(chunk) >= 100:
434 save_chunk(db, chunk)
435 chunk = []
436 if chunk:
437 save_chunk(db, chunk)
438
439
440@cli.command(name="users-lookup")
441@click.argument(
442 "db_path",
443 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
444 required=True,
445)
446@add_identifier_options
447@click.option(
448 "-a",
449 "--auth",
450 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
451 default="auth.json",
452 help="Path to auth.json token file",
453)
454@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
455def users_lookup(db_path, identifiers, attach, sql, auth, ids):
456 "Fetch user accounts"
457 auth = json.load(open(auth))
458 session = utils.session_for_auth(auth)
459 db = utils.open_database(db_path)
460 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
461 for batch in utils.fetch_user_batches(session, identifiers, ids):
462 utils.save_users(db, batch)
463
464
465@cli.command(name="statuses-lookup")
466@click.argument(
467 "db_path",
468 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
469 required=True,
470)
471@add_identifier_options
472@click.option(
473 "-a",
474 "--auth",
475 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
476 default="auth.json",
477 help="Path to auth.json token file",
478)
479@click.option(
480 "--skip-existing", is_flag=True, help="Skip tweets that are already in the DB"
481)
482@click.option("--silent", is_flag=True, help="Disable progress bar")
483def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent):
484 "Fetch tweets by their IDs"
485 auth = json.load(open(auth))
486 session = utils.session_for_auth(auth)
487 db = utils.open_database(db_path)
488 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
489 if skip_existing:
490 existing_ids = set(
491 r[0] for r in db.conn.execute("select id from tweets").fetchall()
492 )
493 identifiers = [i for i in identifiers if int(i) not in existing_ids]
494 if silent:
495 for batch in utils.fetch_status_batches(session, identifiers):
496 utils.save_tweets(db, batch)
497 else:
498 # Do it with a progress bar
499 count = len(identifiers)
500 with click.progressbar(
501 length=count,
502 label="Importing {:,} tweet{}".format(count, "" if count == 1 else "s"),
503 ) as bar:
504 for batch in utils.fetch_status_batches(session, identifiers):
505 utils.save_tweets(db, batch)
506 bar.update(len(batch))
507
508
509@cli.command(name="lists")
510@click.argument(
511 "db_path",
512 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
513 required=True,
514)
515@add_identifier_options
516@click.option(
517 "-a",
518 "--auth",
519 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
520 default="auth.json",
521 help="Path to auth.json token file",
522)
523@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen_names")
524@click.option("--members", is_flag=True, help="Retrieve members for each list")
525def lists(db_path, identifiers, attach, sql, auth, ids, members):
526 "Fetch lists belonging to specified users"
527 auth = json.load(open(auth))
528 session = utils.session_for_auth(auth)
529 db = utils.open_database(db_path)
530 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
531 # Make sure we have saved these users to the database
532 for batch in utils.fetch_user_batches(session, identifiers, ids):
533 utils.save_users(db, batch)
534 first = True
535 for identifier in identifiers:
536 if ids:
537 kwargs = {"user_id": identifier}
538 else:
539 kwargs = {"screen_name": identifier}
540 fetched_lists = utils.fetch_lists(db, session, **kwargs)
541 if members:
542 for new_list in fetched_lists:
543 utils.fetch_and_save_list(
544 db, session, new_list["full_name"].rstrip("@")
545 )
546 if not first:
547 # Rate limit is one per minute
548 first = False
549 time.sleep(60)
550
551
552@cli.command(name="list-members")
553@click.argument(
554 "db_path",
555 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
556 required=True,
557)
558@click.argument("identifiers", type=str, nargs=-1)
559@click.option(
560 "-a",
561 "--auth",
562 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
563 default="auth.json",
564 help="Path to auth.json token file",
565)
566@click.option(
567 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
568)
569def list_members(db_path, identifiers, auth, ids):
570 "Fetch lists - accepts one or more screen_name/list_slug identifiers"
571 auth = json.load(open(auth))
572 session = utils.session_for_auth(auth)
573 db = utils.open_database(db_path)
574 for identifier in identifiers:
575 utils.fetch_and_save_list(db, session, identifier, ids)
576
577
578@cli.command(name="followers-ids")
579@click.argument(
580 "db_path",
581 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
582 required=True,
583)
584@add_identifier_options
585@click.option(
586 "-a",
587 "--auth",
588 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
589 default="auth.json",
590 help="Path to auth.json token file",
591)
592@click.option(
593 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
594)
595@click.option(
596 "--sleep", type=int, default=61, help="Seconds to sleep between API calls"
597)
598def followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep):
599 "Populate followers table with IDs of account followers"
600 _shared_friends_ids_followers_ids(
601 db_path,
602 identifiers,
603 attach,
604 sql,
605 auth,
606 ids,
607 sleep,
608 api_url="https://api.twitter.com/1.1/followers/ids.json",
609 first_key="followed_id",
610 second_key="follower_id",
611 )
612
613
614@cli.command(name="friends-ids")
615@click.argument(
616 "db_path",
617 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
618 required=True,
619)
620@add_identifier_options
621@click.option(
622 "-a",
623 "--auth",
624 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
625 default="auth.json",
626 help="Path to auth.json token file",
627)
628@click.option(
629 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings"
630)
631@click.option(
632 "--sleep", type=int, default=61, help="Seconds to sleep between API calls"
633)
634def friends_ids(db_path, identifiers, attach, sql, auth, ids, sleep):
635 "Populate followers table with IDs of account friends"
636 _shared_friends_ids_followers_ids(
637 db_path,
638 identifiers,
639 attach,
640 sql,
641 auth,
642 ids,
643 sleep,
644 api_url="https://api.twitter.com/1.1/friends/ids.json",
645 first_key="follower_id",
646 second_key="followed_id",
647 )
648
649
650@cli.command()
651@click.argument(
652 "db_path",
653 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
654 required=True,
655)
656@click.argument("track", type=str, required=True, nargs=-1)
657@click.option(
658 "-a",
659 "--auth",
660 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
661 default="auth.json",
662 help="Path to auth.json token file",
663)
664@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet")
665def track(db_path, track, auth, verbose):
666 "Experimental: Save tweets matching these keywords in real-time"
667 auth = json.load(open(auth))
668 session = utils.session_for_auth(auth)
669 db = utils.open_database(db_path)
670 for tweet in utils.stream_filter(session, track=track):
671 if verbose:
672 print(json.dumps(tweet, indent=2))
673 with db.conn:
674 utils.save_tweets(db, [tweet])
675
676
677@cli.command()
678@click.argument(
679 "db_path",
680 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
681 required=True,
682)
683@add_identifier_options
684@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names")
685@click.option(
686 "-a",
687 "--auth",
688 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
689 default="auth.json",
690 help="Path to auth.json token file",
691)
692@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet")
693def follow(db_path, identifiers, attach, sql, ids, auth, verbose):
694 "Experimental: Follow these Twitter users and save tweets in real-time"
695 auth = json.load(open(auth))
696 session = utils.session_for_auth(auth)
697 db = utils.open_database(db_path)
698 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
699 # Make sure we have saved these users to the database
700 for batch in utils.fetch_user_batches(session, identifiers, ids):
701 utils.save_users(db, batch)
702 # Ensure we have user IDs, not screen names
703 if ids:
704 follow = identifiers
705 else:
706 follow = utils.user_ids_for_screen_names(db, identifiers)
707 # Start streaming:
708 for tweet in utils.stream_filter(session, follow=follow):
709 if verbose:
710 print(json.dumps(tweet, indent=2))
711 with db.conn:
712 utils.save_tweets(db, [tweet])
713
714
715def _shared_friends_ids_followers_ids(
716 db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key
717):
718 auth = json.load(open(auth))
719 session = utils.session_for_auth(auth)
720 db = utils.open_database(db_path)
721 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql)
722 for identifier in identifiers:
723 # Make sure this user is saved
724 arg_user_id = identifier if ids else None
725 arg_screen_name = None if ids else identifier
726 profile = utils.get_profile(db, session, arg_user_id, arg_screen_name)
727 user_id = profile["id"]
728 args = {("user_id" if ids else "screen_name"): identifier}
729 for id_batch in utils.cursor_paginate(
730 session, api_url, args, "ids", 5000, sleep
731 ):
732 first_seen = datetime.datetime.utcnow().isoformat()
733 db["following"].insert_all(
734 (
735 {first_key: user_id, second_key: other_id, "first_seen": first_seen}
736 for other_id in id_batch
737 ),
738 ignore=True,
739 )
740 time.sleep(sleep)
741
742
743@cli.command(name="import")
744@click.argument(
745 "db_path",
746 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False),
747 required=True,
748)
749@click.argument(
750 "paths",
751 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False, exists=True),
752 required=True,
753 nargs=-1,
754)
755def import_(db_path, paths):
756 """
757 Import data from a Twitter exported archive. Input can be the path to a zip
758 file, a directory full of .js files or one or more direct .js files.
759 """
760 db = utils.open_database(db_path)
761 for filepath in paths:
762 path = pathlib.Path(filepath)
763 if path.suffix == ".zip":
764 for filename, content in utils.read_archive_js(filepath):
765 archive.import_from_file(db, filename, content)
766 elif path.is_dir():
767 # Import every .js file in this directory
768 for filepath in path.glob("*.js"):
769 archive.import_from_file(db, filepath.name, open(filepath, "rb").read())
770 elif path.suffix == ".js":
771 archive.import_from_file(db, path.name, open(path, "rb").read())
772 else:
773 raise click.ClickException("Path must be a .js or .zip file or a directory")
774
775
776@cli.command()
777@click.argument(
778 "db_path",
779 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
780 required=True,
781)
782@click.argument("q")
783@click.option(
784 "-a",
785 "--auth",
786 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
787 default="auth.json",
788 help="Path to auth.json token file",
789)
790@click.option(
791 "--since",
792 is_flag=True,
793 default=False,
794 help="Pull tweets since last retrieved tweet",
795)
796@click.option(
797 "--geocode",
798 type=str,
799 help="latitude,longitude,radius - where radius is a number followed by mi or km",
800)
801@click.option("--lang", type=str, help="ISO 639-1 language code")
802@click.option("--locale", type=str, help="Locale: only 'ja' is currently effective")
803@click.option("--result_type", type=click.Choice(["mixed", "recent", "popular"]))
804@click.option("--count", type=int, default=100, help="Number of results per page")
805@click.option("--stop_after", type=int, help="Stop after this many")
806@click.option(
807 "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID"
808)
809def search(db_path, q, auth, since, **kwargs):
810 """
811 Save tweets from a search. Full documentation here:
812
813 https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets
814 """
815 since_id = kwargs.pop("since_id", None)
816 stop_after = kwargs.pop("stop_after", None)
817 auth = json.load(open(auth))
818 session = utils.session_for_auth(auth)
819 db = utils.open_database(db_path)
820
821 search_args = {"q": q}
822 for key, value in kwargs.items():
823 if value is not None:
824 search_args[key] = value
825
826 args_hash = hashlib.sha1(
827 json.dumps(search_args, sort_keys=True, separators=(",", ":")).encode("utf8")
828 ).hexdigest()
829
830 tweets = utils.fetch_timeline(
831 session,
832 "https://api.twitter.com/1.1/search/tweets.json",
833 db,
834 search_args,
835 sleep=6,
836 key="statuses",
837 stop_after=stop_after,
838 since_id=since_id,
839 since_type="search",
840 since_key=args_hash,
841 )
842 chunk = []
843 first = True
844
845 if not db["search_runs"].exists():
846 db["search_runs"].create(
847 {"id": int, "name": str, "args": str, "started": str, "hash": str}, pk="id"
848 )
849
850 def save_chunk(db, search_run_id, chunk):
851 utils.save_tweets(db, chunk)
852 # Record which search run produced them
853 db["search_runs_tweets"].insert_all(
854 [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk],
855 pk=("search_run", "tweet"),
856 foreign_keys=(
857 ("search_run", "search_runs", "id"),
858 ("tweet", "tweets", "id"),
859 ),
860 replace=True,
861 )
862
863 search_run_id = None
864 for tweet in tweets:
865 if first:
866 first = False
867 search_run_id = (
868 db["search_runs"]
869 .insert(
870 {
871 "name": search_args["q"],
872 "args": {
873 key: value
874 for key, value in search_args.items()
875 if key not in {"q", "count"}
876 },
877 "started": datetime.datetime.utcnow().isoformat(),
878 "hash": args_hash,
879 },
880 alter=True,
881 )
882 .last_pk
883 )
884 chunk.append(tweet)
885 if len(chunk) >= 10:
886 save_chunk(db, search_run_id, chunk)
887 chunk = []
888 if chunk:
889 save_chunk(db, search_run_id, chunk)