this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

1import datetime 2import hashlib 3import json 4import os 5import pathlib 6import time 7 8import click 9 10from twitter_to_sqlite import archive 11from twitter_to_sqlite import utils 12 13 14def add_identifier_options(subcommand): 15 for decorator in reversed( 16 ( 17 click.argument("identifiers", type=str, nargs=-1), 18 click.option( 19 "--attach", 20 type=click.Path( 21 file_okay=True, dir_okay=False, allow_dash=False, exists=True 22 ), 23 multiple=True, 24 help="Additional database file to attach", 25 ), 26 click.option("--sql", help="SQL query to fetch identifiers to use"), 27 ) 28 ): 29 subcommand = decorator(subcommand) 30 return subcommand 31 32 33@click.group() 34@click.version_option() 35def cli(): 36 "Save data from Twitter to a SQLite database" 37 38 39@cli.command() 40@click.argument("url") 41@click.option( 42 "-a", 43 "--auth", 44 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 45 default="auth.json", 46 help="Path to auth.json token file", 47) 48def fetch(url, auth): 49 "Make an authenticated request to the Twitter API" 50 auth = json.load(open(auth)) 51 session = utils.session_for_auth(auth) 52 click.echo(json.dumps(session.get(url).json(), indent=4)) 53 54 55@cli.command() 56@click.option( 57 "-a", 58 "--auth", 59 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 60 default="auth.json", 61 help="Path to save tokens to, defaults to auth.json", 62) 63def auth(auth): 64 "Save authentication credentials to a JSON file" 65 click.echo("Create an app here: https://developer.twitter.com/en/apps") 66 click.echo("Then navigate to 'Keys and tokens' and paste in the following:") 67 click.echo() 68 api_key = click.prompt("API key") 69 api_secret_key = click.prompt("API secret key") 70 access_token = click.prompt("Access token") 71 access_token_secret = click.prompt("Access token secret") 72 open(auth, "w").write( 73 json.dumps( 74 { 75 "api_key": api_key, 76 "api_secret_key": api_secret_key, 77 "access_token": access_token, 78 "access_token_secret": access_token_secret, 79 }, 80 indent=4, 81 ) 82 + "\n" 83 ) 84 85 86@cli.command() 87@click.argument( 88 "db_path", 89 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 90 required=True, 91) 92@add_identifier_options 93@click.option( 94 "-a", 95 "--auth", 96 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 97 default="auth.json", 98 help="Path to auth.json token file", 99) 100@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 101@click.option("--silent", is_flag=True, help="Disable progress bar") 102def followers(db_path, identifiers, attach, sql, auth, ids, silent): 103 "Save followers for specified users (defaults to authenticated user)" 104 _shared_friends_followers( 105 db_path, identifiers, attach, sql, auth, ids, silent, "followers" 106 ) 107 108 109def _shared_friends_followers( 110 db_path, identifiers, attach, sql, auth, ids, silent, noun 111): 112 assert noun in ("friends", "followers") 113 auth = json.load(open(auth)) 114 session = utils.session_for_auth(auth) 115 db = utils.open_database(db_path) 116 117 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 118 119 if not identifiers: 120 profile = utils.get_profile(db, session) 121 identifiers = [profile["screen_name"]] 122 123 for identifier in identifiers: 124 if ids: 125 kwargs = {"user_id": identifier} 126 else: 127 kwargs = {"screen_name": identifier} 128 129 fetched = [] 130 # Get the follower count, so we can have a progress bar 131 count = 0 132 133 profile = utils.get_profile(db, session, **kwargs) 134 screen_name = profile["screen_name"] 135 user_id = profile["id"] 136 137 save_users_kwargs = {} 138 if noun == "followers": 139 save_users_kwargs["followed_id"] = user_id 140 elif noun == "friends": 141 save_users_kwargs["follower_id"] = user_id 142 143 def go(update): 144 for users_chunk in utils.fetch_user_list_chunks( 145 session, user_id, screen_name, noun=noun 146 ): 147 fetched.extend(users_chunk) 148 utils.save_users(db, users_chunk, **save_users_kwargs) 149 update(len(users_chunk)) 150 151 if not silent: 152 count = profile["{}_count".format(noun)] 153 with click.progressbar( 154 length=count, 155 label="Importing {:,} {} for @{}".format(count, noun, screen_name), 156 ) as bar: 157 go(bar.update) 158 else: 159 go(lambda x: None) 160 161 162@cli.command() 163@click.argument( 164 "db_path", 165 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 166 required=True, 167) 168@add_identifier_options 169@click.option( 170 "-a", 171 "--auth", 172 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 173 default="auth.json", 174 help="Path to auth.json token file", 175) 176@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 177@click.option("--silent", is_flag=True, help="Disable progress bar") 178def friends(db_path, identifiers, attach, sql, auth, ids, silent): 179 "Save friends for specified users (defaults to authenticated user)" 180 _shared_friends_followers( 181 db_path, identifiers, attach, sql, auth, ids, silent, "friends" 182 ) 183 184 185@cli.command() 186@click.argument( 187 "db_path", 188 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 189 required=True, 190) 191@click.option( 192 "-a", 193 "--auth", 194 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 195 default="auth.json", 196 help="Path to auth.json token file", 197) 198@click.option("--user_id", help="Numeric user ID") 199@click.option("--screen_name", help="Screen name") 200@click.option("--stop_after", type=int, help="Stop after this many") 201def favorites(db_path, auth, user_id, screen_name, stop_after): 202 "Save tweets favorited by specified user" 203 auth = json.load(open(auth)) 204 session = utils.session_for_auth(auth) 205 db = utils.open_database(db_path) 206 profile = utils.get_profile(db, session, user_id, screen_name) 207 with click.progressbar( 208 utils.fetch_favorites(session, db, user_id, screen_name, stop_after), 209 label="Importing favorites", 210 show_pos=True, 211 ) as bar: 212 utils.save_tweets(db, bar, favorited_by=profile["id"]) 213 214 215@cli.command(name="user-timeline") 216@click.argument( 217 "db_path", 218 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 219 required=True, 220) 221@add_identifier_options 222@click.option( 223 "-a", 224 "--auth", 225 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 226 default="auth.json", 227 help="Path to auth.json token file", 228) 229@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 230@click.option("--stop_after", type=int, help="Only pull this number of recent tweets") 231@click.option("--user_id", help="Numeric user ID", hidden=True) 232@click.option("--screen_name", help="Screen name", hidden=True) 233@click.option( 234 "--since", 235 is_flag=True, 236 help="Pull tweets since last retrieved tweet", 237) 238@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID") 239def user_timeline( 240 db_path, 241 identifiers, 242 attach, 243 sql, 244 auth, 245 ids, 246 stop_after, 247 user_id, 248 screen_name, 249 since, 250 since_id, 251): 252 "Save tweets posted by specified user" 253 auth = json.load(open(auth)) 254 session = utils.session_for_auth(auth) 255 db = utils.open_database(db_path) 256 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 257 258 # Backwards compatible support for old --user_id and --screen_name options 259 if screen_name: 260 if ids: 261 raise click.ClickException("Cannot use --screen_name with --ids") 262 identifiers.append(screen_name) 263 264 if user_id: 265 if not identifiers: 266 identifiers = [user_id] 267 else: 268 if not ids: 269 raise click.ClickException("Use --user_id with --ids") 270 identifiers.append(user_id) 271 272 # If identifiers is empty, fetch the authenticated user 273 fetch_profiles = True 274 if not identifiers: 275 fetch_profiles = False 276 profile = utils.get_profile(db, session, user_id, screen_name) 277 identifiers = [profile["screen_name"]] 278 ids = False 279 280 format_string = ( 281 "@{:" + str(max(len(str(identifier)) for identifier in identifiers)) + "}" 282 ) 283 284 for identifier in identifiers: 285 kwargs = {} 286 if ids: 287 kwargs["user_id"] = identifier 288 else: 289 kwargs["screen_name"] = identifier 290 if fetch_profiles: 291 profile = utils.get_profile(db, session, **kwargs) 292 else: 293 profile = db["users"].get(profile["id"]) 294 expected_length = profile["statuses_count"] 295 296 if since or since_id: 297 expected_length = None 298 299 with click.progressbar( 300 utils.fetch_user_timeline( 301 session, 302 db, 303 stop_after=stop_after, 304 since_id=since_id, 305 since=since, 306 **kwargs 307 ), 308 length=expected_length, 309 label=format_string.format(profile["screen_name"]), 310 show_pos=True, 311 ) as bar: 312 # Save them 100 at a time 313 chunk = [] 314 for tweet in bar: 315 chunk.append(tweet) 316 if len(chunk) >= 100: 317 utils.save_tweets(db, chunk) 318 chunk = [] 319 if chunk: 320 utils.save_tweets(db, chunk) 321 322 323@cli.command(name="home-timeline") 324@click.argument( 325 "db_path", 326 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 327 required=True, 328) 329@click.option( 330 "-a", 331 "--auth", 332 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 333 default="auth.json", 334 help="Path to auth.json token file", 335) 336@click.option( 337 "--since", 338 is_flag=True, 339 default=False, 340 help="Pull tweets since last retrieved tweet", 341) 342@click.option( 343 "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID" 344) 345def home_timeline(db_path, auth, since, since_id): 346 "Save tweets from timeline for authenticated user" 347 _shared_timeline( 348 db_path, 349 auth, 350 since, 351 since_id, 352 table="timeline_tweets", 353 api_url="https://api.twitter.com/1.1/statuses/home_timeline.json", 354 since_type="home", 355 ) 356 357 358@cli.command(name="mentions-timeline") 359@click.argument( 360 "db_path", 361 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 362 required=True, 363) 364@click.option( 365 "-a", 366 "--auth", 367 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 368 default="auth.json", 369 help="Path to auth.json token file", 370) 371@click.option( 372 "--since", 373 is_flag=True, 374 default=False, 375 help="Pull tweets since last retrieved mention", 376) 377@click.option( 378 "--since_id", type=str, default=False, help="Pull mentions since this Tweet ID" 379) 380def mentions_timeline(db_path, auth, since, since_id): 381 "Save tweets that mention the authenticated user" 382 _shared_timeline( 383 db_path, 384 auth, 385 since, 386 since_id, 387 table="mentions_tweets", 388 api_url="https://api.twitter.com/1.1/statuses/mentions_timeline.json", 389 sleep=10, 390 since_type="mentions", 391 ) 392 393 394def _shared_timeline( 395 db_path, auth, since, since_id, table, api_url, sleep=1, since_type=None 396): 397 auth = json.load(open(auth)) 398 session = utils.session_for_auth(auth) 399 db = utils.open_database(db_path) 400 profile = utils.get_profile(db, session) 401 expected_length = 800 402 since_key = profile["id"] 403 404 with click.progressbar( 405 utils.fetch_timeline( 406 session, 407 api_url, 408 db, 409 sleep=sleep, 410 since=since, 411 since_id=since_id, 412 since_type=since_type, 413 since_key=since_key, 414 ), 415 length=expected_length, 416 label="Importing tweets", 417 show_pos=True, 418 ) as bar: 419 # Save them 100 at a time 420 def save_chunk(db, chunk): 421 utils.save_tweets(db, chunk) 422 # Record who's timeline they came from 423 db[table].insert_all( 424 [{"user": profile["id"], "tweet": tweet["id"]} for tweet in chunk], 425 pk=("user", "tweet"), 426 foreign_keys=("user", "tweet"), 427 replace=True, 428 ) 429 430 chunk = [] 431 for tweet in bar: 432 chunk.append(tweet) 433 if len(chunk) >= 100: 434 save_chunk(db, chunk) 435 chunk = [] 436 if chunk: 437 save_chunk(db, chunk) 438 439 440@cli.command(name="users-lookup") 441@click.argument( 442 "db_path", 443 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 444 required=True, 445) 446@add_identifier_options 447@click.option( 448 "-a", 449 "--auth", 450 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 451 default="auth.json", 452 help="Path to auth.json token file", 453) 454@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 455def users_lookup(db_path, identifiers, attach, sql, auth, ids): 456 "Fetch user accounts" 457 auth = json.load(open(auth)) 458 session = utils.session_for_auth(auth) 459 db = utils.open_database(db_path) 460 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 461 for batch in utils.fetch_user_batches(session, identifiers, ids): 462 utils.save_users(db, batch) 463 464 465@cli.command(name="statuses-lookup") 466@click.argument( 467 "db_path", 468 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 469 required=True, 470) 471@add_identifier_options 472@click.option( 473 "-a", 474 "--auth", 475 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 476 default="auth.json", 477 help="Path to auth.json token file", 478) 479@click.option( 480 "--skip-existing", is_flag=True, help="Skip tweets that are already in the DB" 481) 482@click.option("--silent", is_flag=True, help="Disable progress bar") 483def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent): 484 "Fetch tweets by their IDs" 485 auth = json.load(open(auth)) 486 session = utils.session_for_auth(auth) 487 db = utils.open_database(db_path) 488 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 489 if skip_existing: 490 existing_ids = set( 491 r[0] for r in db.conn.execute("select id from tweets").fetchall() 492 ) 493 identifiers = [i for i in identifiers if int(i) not in existing_ids] 494 if silent: 495 for batch in utils.fetch_status_batches(session, identifiers): 496 utils.save_tweets(db, batch) 497 else: 498 # Do it with a progress bar 499 count = len(identifiers) 500 with click.progressbar( 501 length=count, 502 label="Importing {:,} tweet{}".format(count, "" if count == 1 else "s"), 503 ) as bar: 504 for batch in utils.fetch_status_batches(session, identifiers): 505 utils.save_tweets(db, batch) 506 bar.update(len(batch)) 507 508 509@cli.command(name="lists") 510@click.argument( 511 "db_path", 512 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 513 required=True, 514) 515@add_identifier_options 516@click.option( 517 "-a", 518 "--auth", 519 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 520 default="auth.json", 521 help="Path to auth.json token file", 522) 523@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen_names") 524@click.option("--members", is_flag=True, help="Retrieve members for each list") 525def lists(db_path, identifiers, attach, sql, auth, ids, members): 526 "Fetch lists belonging to specified users" 527 auth = json.load(open(auth)) 528 session = utils.session_for_auth(auth) 529 db = utils.open_database(db_path) 530 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 531 # Make sure we have saved these users to the database 532 for batch in utils.fetch_user_batches(session, identifiers, ids): 533 utils.save_users(db, batch) 534 first = True 535 for identifier in identifiers: 536 if ids: 537 kwargs = {"user_id": identifier} 538 else: 539 kwargs = {"screen_name": identifier} 540 fetched_lists = utils.fetch_lists(db, session, **kwargs) 541 if members: 542 for new_list in fetched_lists: 543 utils.fetch_and_save_list( 544 db, session, new_list["full_name"].rstrip("@") 545 ) 546 if not first: 547 # Rate limit is one per minute 548 first = False 549 time.sleep(60) 550 551 552@cli.command(name="list-members") 553@click.argument( 554 "db_path", 555 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 556 required=True, 557) 558@click.argument("identifiers", type=str, nargs=-1) 559@click.option( 560 "-a", 561 "--auth", 562 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 563 default="auth.json", 564 help="Path to auth.json token file", 565) 566@click.option( 567 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 568) 569def list_members(db_path, identifiers, auth, ids): 570 "Fetch lists - accepts one or more screen_name/list_slug identifiers" 571 auth = json.load(open(auth)) 572 session = utils.session_for_auth(auth) 573 db = utils.open_database(db_path) 574 for identifier in identifiers: 575 utils.fetch_and_save_list(db, session, identifier, ids) 576 577 578@cli.command(name="followers-ids") 579@click.argument( 580 "db_path", 581 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 582 required=True, 583) 584@add_identifier_options 585@click.option( 586 "-a", 587 "--auth", 588 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 589 default="auth.json", 590 help="Path to auth.json token file", 591) 592@click.option( 593 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 594) 595@click.option( 596 "--sleep", type=int, default=61, help="Seconds to sleep between API calls" 597) 598def followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep): 599 "Populate followers table with IDs of account followers" 600 _shared_friends_ids_followers_ids( 601 db_path, 602 identifiers, 603 attach, 604 sql, 605 auth, 606 ids, 607 sleep, 608 api_url="https://api.twitter.com/1.1/followers/ids.json", 609 first_key="followed_id", 610 second_key="follower_id", 611 ) 612 613 614@cli.command(name="friends-ids") 615@click.argument( 616 "db_path", 617 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 618 required=True, 619) 620@add_identifier_options 621@click.option( 622 "-a", 623 "--auth", 624 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 625 default="auth.json", 626 help="Path to auth.json token file", 627) 628@click.option( 629 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 630) 631@click.option( 632 "--sleep", type=int, default=61, help="Seconds to sleep between API calls" 633) 634def friends_ids(db_path, identifiers, attach, sql, auth, ids, sleep): 635 "Populate followers table with IDs of account friends" 636 _shared_friends_ids_followers_ids( 637 db_path, 638 identifiers, 639 attach, 640 sql, 641 auth, 642 ids, 643 sleep, 644 api_url="https://api.twitter.com/1.1/friends/ids.json", 645 first_key="follower_id", 646 second_key="followed_id", 647 ) 648 649 650@cli.command() 651@click.argument( 652 "db_path", 653 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 654 required=True, 655) 656@click.argument("track", type=str, required=True, nargs=-1) 657@click.option( 658 "-a", 659 "--auth", 660 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 661 default="auth.json", 662 help="Path to auth.json token file", 663) 664@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet") 665def track(db_path, track, auth, verbose): 666 "Experimental: Save tweets matching these keywords in real-time" 667 auth = json.load(open(auth)) 668 session = utils.session_for_auth(auth) 669 db = utils.open_database(db_path) 670 for tweet in utils.stream_filter(session, track=track): 671 if verbose: 672 print(json.dumps(tweet, indent=2)) 673 with db.conn: 674 utils.save_tweets(db, [tweet]) 675 676 677@cli.command() 678@click.argument( 679 "db_path", 680 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 681 required=True, 682) 683@add_identifier_options 684@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 685@click.option( 686 "-a", 687 "--auth", 688 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 689 default="auth.json", 690 help="Path to auth.json token file", 691) 692@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet") 693def follow(db_path, identifiers, attach, sql, ids, auth, verbose): 694 "Experimental: Follow these Twitter users and save tweets in real-time" 695 auth = json.load(open(auth)) 696 session = utils.session_for_auth(auth) 697 db = utils.open_database(db_path) 698 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 699 # Make sure we have saved these users to the database 700 for batch in utils.fetch_user_batches(session, identifiers, ids): 701 utils.save_users(db, batch) 702 # Ensure we have user IDs, not screen names 703 if ids: 704 follow = identifiers 705 else: 706 follow = utils.user_ids_for_screen_names(db, identifiers) 707 # Start streaming: 708 for tweet in utils.stream_filter(session, follow=follow): 709 if verbose: 710 print(json.dumps(tweet, indent=2)) 711 with db.conn: 712 utils.save_tweets(db, [tweet]) 713 714 715def _shared_friends_ids_followers_ids( 716 db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key 717): 718 auth = json.load(open(auth)) 719 session = utils.session_for_auth(auth) 720 db = utils.open_database(db_path) 721 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 722 for identifier in identifiers: 723 # Make sure this user is saved 724 arg_user_id = identifier if ids else None 725 arg_screen_name = None if ids else identifier 726 profile = utils.get_profile(db, session, arg_user_id, arg_screen_name) 727 user_id = profile["id"] 728 args = {("user_id" if ids else "screen_name"): identifier} 729 for id_batch in utils.cursor_paginate( 730 session, api_url, args, "ids", 5000, sleep 731 ): 732 first_seen = datetime.datetime.utcnow().isoformat() 733 db["following"].insert_all( 734 ( 735 {first_key: user_id, second_key: other_id, "first_seen": first_seen} 736 for other_id in id_batch 737 ), 738 ignore=True, 739 ) 740 time.sleep(sleep) 741 742 743@cli.command(name="import") 744@click.argument( 745 "db_path", 746 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False), 747 required=True, 748) 749@click.argument( 750 "paths", 751 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False, exists=True), 752 required=True, 753 nargs=-1, 754) 755def import_(db_path, paths): 756 """ 757 Import data from a Twitter exported archive. Input can be the path to a zip 758 file, a directory full of .js files or one or more direct .js files. 759 """ 760 db = utils.open_database(db_path) 761 for filepath in paths: 762 path = pathlib.Path(filepath) 763 if path.suffix == ".zip": 764 for filename, content in utils.read_archive_js(filepath): 765 archive.import_from_file(db, filename, content) 766 elif path.is_dir(): 767 # Import every .js file in this directory 768 for filepath in path.glob("*.js"): 769 archive.import_from_file(db, filepath.name, open(filepath, "rb").read()) 770 elif path.suffix == ".js": 771 archive.import_from_file(db, path.name, open(path, "rb").read()) 772 else: 773 raise click.ClickException("Path must be a .js or .zip file or a directory") 774 775 776@cli.command() 777@click.argument( 778 "db_path", 779 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 780 required=True, 781) 782@click.argument("q") 783@click.option( 784 "-a", 785 "--auth", 786 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 787 default="auth.json", 788 help="Path to auth.json token file", 789) 790@click.option( 791 "--since", 792 is_flag=True, 793 default=False, 794 help="Pull tweets since last retrieved tweet", 795) 796@click.option( 797 "--geocode", 798 type=str, 799 help="latitude,longitude,radius - where radius is a number followed by mi or km", 800) 801@click.option("--lang", type=str, help="ISO 639-1 language code") 802@click.option("--locale", type=str, help="Locale: only 'ja' is currently effective") 803@click.option("--result_type", type=click.Choice(["mixed", "recent", "popular"])) 804@click.option("--count", type=int, default=100, help="Number of results per page") 805@click.option("--stop_after", type=int, help="Stop after this many") 806@click.option( 807 "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID" 808) 809def search(db_path, q, auth, since, **kwargs): 810 """ 811 Save tweets from a search. Full documentation here: 812 813 https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets 814 """ 815 since_id = kwargs.pop("since_id", None) 816 stop_after = kwargs.pop("stop_after", None) 817 auth = json.load(open(auth)) 818 session = utils.session_for_auth(auth) 819 db = utils.open_database(db_path) 820 821 search_args = {"q": q} 822 for key, value in kwargs.items(): 823 if value is not None: 824 search_args[key] = value 825 826 args_hash = hashlib.sha1( 827 json.dumps(search_args, sort_keys=True, separators=(",", ":")).encode("utf8") 828 ).hexdigest() 829 830 tweets = utils.fetch_timeline( 831 session, 832 "https://api.twitter.com/1.1/search/tweets.json", 833 db, 834 search_args, 835 sleep=6, 836 key="statuses", 837 stop_after=stop_after, 838 since_id=since_id, 839 since_type="search", 840 since_key=args_hash, 841 ) 842 chunk = [] 843 first = True 844 845 if not db["search_runs"].exists(): 846 db["search_runs"].create( 847 {"id": int, "name": str, "args": str, "started": str, "hash": str}, pk="id" 848 ) 849 850 def save_chunk(db, search_run_id, chunk): 851 utils.save_tweets(db, chunk) 852 # Record which search run produced them 853 db["search_runs_tweets"].insert_all( 854 [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk], 855 pk=("search_run", "tweet"), 856 foreign_keys=( 857 ("search_run", "search_runs", "id"), 858 ("tweet", "tweets", "id"), 859 ), 860 replace=True, 861 ) 862 863 search_run_id = None 864 for tweet in tweets: 865 if first: 866 first = False 867 search_run_id = ( 868 db["search_runs"] 869 .insert( 870 { 871 "name": search_args["q"], 872 "args": { 873 key: value 874 for key, value in search_args.items() 875 if key not in {"q", "count"} 876 }, 877 "started": datetime.datetime.utcnow().isoformat(), 878 "hash": args_hash, 879 }, 880 alter=True, 881 ) 882 .last_pk 883 ) 884 chunk.append(tweet) 885 if len(chunk) >= 10: 886 save_chunk(db, search_run_id, chunk) 887 chunk = [] 888 if chunk: 889 save_chunk(db, search_run_id, chunk)