this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

at main 880 lines 28 kB view raw
1import datetime 2import hashlib 3import json 4import os 5import pathlib 6import time 7 8import click 9 10from twitter_to_sqlite import archive 11from twitter_to_sqlite import utils 12 13 14def add_identifier_options(subcommand): 15 for decorator in reversed( 16 ( 17 click.argument("identifiers", type=str, nargs=-1), 18 click.option( 19 "--attach", 20 type=click.Path( 21 file_okay=True, dir_okay=False, allow_dash=False, exists=True 22 ), 23 multiple=True, 24 help="Additional database file to attach", 25 ), 26 click.option("--sql", help="SQL query to fetch identifiers to use"), 27 ) 28 ): 29 subcommand = decorator(subcommand) 30 return subcommand 31 32 33@click.group() 34@click.version_option() 35def cli(): 36 "Save data from Twitter to a SQLite database" 37 38 39@cli.command() 40@click.argument("url") 41@click.option( 42 "-a", 43 "--auth", 44 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 45 default="auth.json", 46 help="Path to auth.json token file", 47) 48def fetch(url, auth): 49 "Make an authenticated request to the Twitter API" 50 auth = json.load(open(auth)) 51 session = utils.session_for_auth(auth) 52 click.echo(json.dumps(session.get(url).json(), indent=4)) 53 54 55@cli.command() 56@click.option( 57 "-a", 58 "--auth", 59 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 60 default="auth.json", 61 help="Path to save tokens to, defaults to auth.json", 62) 63def auth(auth): 64 "Save authentication credentials to a JSON file" 65 click.echo("Create an app here: https://developer.twitter.com/en/apps") 66 click.echo("Then navigate to 'Keys and tokens' and paste in the following:") 67 click.echo() 68 api_key = click.prompt("API key") 69 api_secret_key = click.prompt("API secret key") 70 access_token = click.prompt("Access token") 71 access_token_secret = click.prompt("Access token secret") 72 open(auth, "w").write( 73 json.dumps( 74 { 75 "api_key": api_key, 76 "api_secret_key": api_secret_key, 77 "access_token": access_token, 78 "access_token_secret": access_token_secret, 79 }, 80 indent=4, 81 ) 82 + "\n" 83 ) 84 85 86@cli.command() 87@click.argument( 88 "db_path", 89 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 90 required=True, 91) 92@add_identifier_options 93@click.option( 94 "-a", 95 "--auth", 96 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 97 default="auth.json", 98 help="Path to auth.json token file", 99) 100@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 101@click.option("--silent", is_flag=True, help="Disable progress bar") 102def followers(db_path, identifiers, attach, sql, auth, ids, silent): 103 "Save followers for specified users (defaults to authenticated user)" 104 _shared_friends_followers( 105 db_path, identifiers, attach, sql, auth, ids, silent, "followers" 106 ) 107 108 109def _shared_friends_followers( 110 db_path, identifiers, attach, sql, auth, ids, silent, noun 111): 112 assert noun in ("friends", "followers") 113 auth = json.load(open(auth)) 114 session = utils.session_for_auth(auth) 115 db = utils.open_database(db_path) 116 117 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 118 119 if not identifiers: 120 profile = utils.get_profile(db, session) 121 identifiers = [profile["screen_name"]] 122 123 for identifier in identifiers: 124 if ids: 125 kwargs = {"user_id": identifier} 126 else: 127 kwargs = {"screen_name": identifier} 128 129 fetched = [] 130 # Get the follower count, so we can have a progress bar 131 count = 0 132 133 profile = utils.get_profile(db, session, **kwargs) 134 screen_name = profile["screen_name"] 135 user_id = profile["id"] 136 137 save_users_kwargs = {} 138 if noun == "followers": 139 save_users_kwargs["followed_id"] = user_id 140 elif noun == "friends": 141 save_users_kwargs["follower_id"] = user_id 142 143 def go(update): 144 for users_chunk in utils.fetch_user_list_chunks( 145 session, user_id, screen_name, noun=noun 146 ): 147 fetched.extend(users_chunk) 148 utils.save_users(db, users_chunk, **save_users_kwargs) 149 update(len(users_chunk)) 150 151 if not silent: 152 count = profile["{}_count".format(noun)] 153 with click.progressbar( 154 length=count, 155 label="Importing {:,} {} for @{}".format(count, noun, screen_name), 156 ) as bar: 157 go(bar.update) 158 else: 159 go(lambda x: None) 160 161 162@cli.command() 163@click.argument( 164 "db_path", 165 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 166 required=True, 167) 168@add_identifier_options 169@click.option( 170 "-a", 171 "--auth", 172 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 173 default="auth.json", 174 help="Path to auth.json token file", 175) 176@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 177@click.option("--silent", is_flag=True, help="Disable progress bar") 178def friends(db_path, identifiers, attach, sql, auth, ids, silent): 179 "Save friends for specified users (defaults to authenticated user)" 180 _shared_friends_followers( 181 db_path, identifiers, attach, sql, auth, ids, silent, "friends" 182 ) 183 184 185@cli.command() 186@click.argument( 187 "db_path", 188 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 189 required=True, 190) 191@click.option( 192 "-a", 193 "--auth", 194 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 195 default="auth.json", 196 help="Path to auth.json token file", 197) 198@click.option("--user_id", help="Numeric user ID") 199@click.option("--screen_name", help="Screen name") 200@click.option("--stop_after", type=int, help="Stop after this many") 201def favorites(db_path, auth, user_id, screen_name, stop_after): 202 "Save tweets favorited by specified user" 203 auth = json.load(open(auth)) 204 session = utils.session_for_auth(auth) 205 db = utils.open_database(db_path) 206 profile = utils.get_profile(db, session, user_id, screen_name) 207 with click.progressbar( 208 utils.fetch_favorites(session, db, user_id, screen_name, stop_after), 209 label="Importing favorites", 210 show_pos=True, 211 ) as bar: 212 utils.save_tweets(db, bar, favorited_by=profile["id"]) 213 214 215@cli.command(name="user-timeline") 216@click.argument( 217 "db_path", 218 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 219 required=True, 220) 221@add_identifier_options 222@click.option( 223 "-a", 224 "--auth", 225 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 226 default="auth.json", 227 help="Path to auth.json token file", 228) 229@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 230@click.option("--stop_after", type=int, help="Only pull this number of recent tweets") 231@click.option("--user_id", help="Numeric user ID", hidden=True) 232@click.option("--screen_name", help="Screen name", hidden=True) 233@click.option( 234 "--since", 235 is_flag=True, 236 help="Pull tweets since last retrieved tweet", 237) 238@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID") 239def user_timeline( 240 db_path, 241 identifiers, 242 attach, 243 sql, 244 auth, 245 ids, 246 stop_after, 247 user_id, 248 screen_name, 249 since, 250 since_id, 251): 252 "Save tweets posted by specified user" 253 auth = json.load(open(auth)) 254 session = utils.session_for_auth(auth) 255 db = utils.open_database(db_path) 256 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 257 258 # Backwards compatible support for old --user_id and --screen_name options 259 if screen_name: 260 if ids: 261 raise click.ClickException("Cannot use --screen_name with --ids") 262 identifiers.append(screen_name) 263 264 if user_id: 265 if not identifiers: 266 identifiers = [user_id] 267 else: 268 if not ids: 269 raise click.ClickException("Use --user_id with --ids") 270 identifiers.append(user_id) 271 272 # If identifiers is empty, fetch the authenticated user 273 fetch_profiles = True 274 if not identifiers: 275 fetch_profiles = False 276 profile = utils.get_profile(db, session, user_id, screen_name) 277 identifiers = [profile["screen_name"]] 278 ids = False 279 280 format_string = ( 281 "@{:" + str(max(len(str(identifier)) for identifier in identifiers)) + "}" 282 ) 283 284 for identifier in identifiers: 285 kwargs = {} 286 if ids: 287 kwargs["user_id"] = identifier 288 else: 289 kwargs["screen_name"] = identifier 290 if fetch_profiles: 291 profile = utils.get_profile(db, session, **kwargs) 292 else: 293 profile = db["users"].get(profile["id"]) 294 expected_length = profile["statuses_count"] 295 296 if since or since_id: 297 expected_length = None 298 299 with click.progressbar( 300 utils.fetch_user_timeline( 301 session, 302 db, 303 stop_after=stop_after, 304 since_id=since_id, 305 since=since, 306 **kwargs 307 ), 308 length=expected_length, 309 label=format_string.format(profile["screen_name"]), 310 show_pos=True, 311 ) as bar: 312 # Save them 100 at a time 313 chunk = [] 314 for tweet in bar: 315 chunk.append(tweet) 316 if len(chunk) >= 100: 317 utils.save_tweets(db, chunk) 318 chunk = [] 319 if chunk: 320 utils.save_tweets(db, chunk) 321 322 323@cli.command(name="home-timeline") 324@click.argument( 325 "db_path", 326 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 327 required=True, 328) 329@click.option( 330 "-a", 331 "--auth", 332 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 333 default="auth.json", 334 help="Path to auth.json token file", 335) 336@click.option( 337 "--since", 338 is_flag=True, 339 help="Pull tweets since last retrieved tweet", 340) 341@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID") 342def home_timeline(db_path, auth, since, since_id): 343 "Save tweets from timeline for authenticated user" 344 _shared_timeline( 345 db_path, 346 auth, 347 since, 348 since_id, 349 table="timeline_tweets", 350 api_url="https://api.twitter.com/1.1/statuses/home_timeline.json", 351 since_type="home", 352 ) 353 354 355@cli.command(name="mentions-timeline") 356@click.argument( 357 "db_path", 358 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 359 required=True, 360) 361@click.option( 362 "-a", 363 "--auth", 364 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 365 default="auth.json", 366 help="Path to auth.json token file", 367) 368@click.option( 369 "--since", 370 is_flag=True, 371 help="Pull tweets since last retrieved mention", 372) 373@click.option("--since_id", type=str, help="Pull mentions since this Tweet ID") 374def mentions_timeline(db_path, auth, since, since_id): 375 "Save tweets that mention the authenticated user" 376 _shared_timeline( 377 db_path, 378 auth, 379 since, 380 since_id, 381 table="mentions_tweets", 382 api_url="https://api.twitter.com/1.1/statuses/mentions_timeline.json", 383 sleep=10, 384 since_type="mentions", 385 ) 386 387 388def _shared_timeline( 389 db_path, auth, since, since_id, table, api_url, sleep=1, since_type=None 390): 391 auth = json.load(open(auth)) 392 session = utils.session_for_auth(auth) 393 db = utils.open_database(db_path) 394 profile = utils.get_profile(db, session) 395 expected_length = 800 396 since_key = profile["id"] 397 398 with click.progressbar( 399 utils.fetch_timeline( 400 session, 401 api_url, 402 db, 403 sleep=sleep, 404 since=since, 405 since_id=since_id, 406 since_type=since_type, 407 since_key=since_key, 408 ), 409 length=expected_length, 410 label="Importing tweets", 411 show_pos=True, 412 ) as bar: 413 # Save them 100 at a time 414 def save_chunk(db, chunk): 415 utils.save_tweets(db, chunk) 416 # Record who's timeline they came from 417 db[table].insert_all( 418 [{"user": profile["id"], "tweet": tweet["id"]} for tweet in chunk], 419 pk=("user", "tweet"), 420 foreign_keys=("user", "tweet"), 421 replace=True, 422 ) 423 424 chunk = [] 425 for tweet in bar: 426 chunk.append(tweet) 427 if len(chunk) >= 100: 428 save_chunk(db, chunk) 429 chunk = [] 430 if chunk: 431 save_chunk(db, chunk) 432 433 434@cli.command(name="users-lookup") 435@click.argument( 436 "db_path", 437 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 438 required=True, 439) 440@add_identifier_options 441@click.option( 442 "-a", 443 "--auth", 444 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 445 default="auth.json", 446 help="Path to auth.json token file", 447) 448@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 449def users_lookup(db_path, identifiers, attach, sql, auth, ids): 450 "Fetch user accounts" 451 auth = json.load(open(auth)) 452 session = utils.session_for_auth(auth) 453 db = utils.open_database(db_path) 454 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 455 for batch in utils.fetch_user_batches(session, identifiers, ids): 456 utils.save_users(db, batch) 457 458 459@cli.command(name="statuses-lookup") 460@click.argument( 461 "db_path", 462 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 463 required=True, 464) 465@add_identifier_options 466@click.option( 467 "-a", 468 "--auth", 469 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 470 default="auth.json", 471 help="Path to auth.json token file", 472) 473@click.option( 474 "--skip-existing", is_flag=True, help="Skip tweets that are already in the DB" 475) 476@click.option("--silent", is_flag=True, help="Disable progress bar") 477def statuses_lookup(db_path, identifiers, attach, sql, auth, skip_existing, silent): 478 "Fetch tweets by their IDs" 479 auth = json.load(open(auth)) 480 session = utils.session_for_auth(auth) 481 db = utils.open_database(db_path) 482 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 483 if skip_existing: 484 existing_ids = set( 485 r[0] for r in db.conn.execute("select id from tweets").fetchall() 486 ) 487 identifiers = [i for i in identifiers if int(i) not in existing_ids] 488 if silent: 489 for batch in utils.fetch_status_batches(session, identifiers): 490 utils.save_tweets(db, batch) 491 else: 492 # Do it with a progress bar 493 count = len(identifiers) 494 with click.progressbar( 495 length=count, 496 label="Importing {:,} tweet{}".format(count, "" if count == 1 else "s"), 497 ) as bar: 498 for batch in utils.fetch_status_batches(session, identifiers): 499 utils.save_tweets(db, batch) 500 bar.update(len(batch)) 501 502 503@cli.command(name="lists") 504@click.argument( 505 "db_path", 506 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 507 required=True, 508) 509@add_identifier_options 510@click.option( 511 "-a", 512 "--auth", 513 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 514 default="auth.json", 515 help="Path to auth.json token file", 516) 517@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen_names") 518@click.option("--members", is_flag=True, help="Retrieve members for each list") 519def lists(db_path, identifiers, attach, sql, auth, ids, members): 520 "Fetch lists belonging to specified users" 521 auth = json.load(open(auth)) 522 session = utils.session_for_auth(auth) 523 db = utils.open_database(db_path) 524 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 525 # Make sure we have saved these users to the database 526 for batch in utils.fetch_user_batches(session, identifiers, ids): 527 utils.save_users(db, batch) 528 first = True 529 for identifier in identifiers: 530 if ids: 531 kwargs = {"user_id": identifier} 532 else: 533 kwargs = {"screen_name": identifier} 534 fetched_lists = utils.fetch_lists(db, session, **kwargs) 535 if members: 536 for new_list in fetched_lists: 537 utils.fetch_and_save_list( 538 db, session, new_list["full_name"].rstrip("@") 539 ) 540 if not first: 541 # Rate limit is one per minute 542 first = False 543 time.sleep(60) 544 545 546@cli.command(name="list-members") 547@click.argument( 548 "db_path", 549 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 550 required=True, 551) 552@click.argument("identifiers", type=str, nargs=-1) 553@click.option( 554 "-a", 555 "--auth", 556 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 557 default="auth.json", 558 help="Path to auth.json token file", 559) 560@click.option( 561 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 562) 563def list_members(db_path, identifiers, auth, ids): 564 "Fetch lists - accepts one or more screen_name/list_slug identifiers" 565 auth = json.load(open(auth)) 566 session = utils.session_for_auth(auth) 567 db = utils.open_database(db_path) 568 for identifier in identifiers: 569 utils.fetch_and_save_list(db, session, identifier, ids) 570 571 572@cli.command(name="followers-ids") 573@click.argument( 574 "db_path", 575 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 576 required=True, 577) 578@add_identifier_options 579@click.option( 580 "-a", 581 "--auth", 582 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 583 default="auth.json", 584 help="Path to auth.json token file", 585) 586@click.option( 587 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 588) 589@click.option( 590 "--sleep", type=int, default=61, help="Seconds to sleep between API calls" 591) 592def followers_ids(db_path, identifiers, attach, sql, auth, ids, sleep): 593 "Populate followers table with IDs of account followers" 594 _shared_friends_ids_followers_ids( 595 db_path, 596 identifiers, 597 attach, 598 sql, 599 auth, 600 ids, 601 sleep, 602 api_url="https://api.twitter.com/1.1/followers/ids.json", 603 first_key="followed_id", 604 second_key="follower_id", 605 ) 606 607 608@cli.command(name="friends-ids") 609@click.argument( 610 "db_path", 611 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 612 required=True, 613) 614@add_identifier_options 615@click.option( 616 "-a", 617 "--auth", 618 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 619 default="auth.json", 620 help="Path to auth.json token file", 621) 622@click.option( 623 "--ids", is_flag=True, help="Treat input as list IDs, not user/slug strings" 624) 625@click.option( 626 "--sleep", type=int, default=61, help="Seconds to sleep between API calls" 627) 628def friends_ids(db_path, identifiers, attach, sql, auth, ids, sleep): 629 "Populate followers table with IDs of account friends" 630 _shared_friends_ids_followers_ids( 631 db_path, 632 identifiers, 633 attach, 634 sql, 635 auth, 636 ids, 637 sleep, 638 api_url="https://api.twitter.com/1.1/friends/ids.json", 639 first_key="follower_id", 640 second_key="followed_id", 641 ) 642 643 644@cli.command() 645@click.argument( 646 "db_path", 647 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 648 required=True, 649) 650@click.argument("track", type=str, required=True, nargs=-1) 651@click.option( 652 "-a", 653 "--auth", 654 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 655 default="auth.json", 656 help="Path to auth.json token file", 657) 658@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet") 659def track(db_path, track, auth, verbose): 660 "Experimental: Save tweets matching these keywords in real-time" 661 auth = json.load(open(auth)) 662 session = utils.session_for_auth(auth) 663 db = utils.open_database(db_path) 664 for tweet in utils.stream_filter(session, track=track): 665 if verbose: 666 print(json.dumps(tweet, indent=2)) 667 with db.conn: 668 utils.save_tweets(db, [tweet]) 669 670 671@cli.command() 672@click.argument( 673 "db_path", 674 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 675 required=True, 676) 677@add_identifier_options 678@click.option("--ids", is_flag=True, help="Treat input as user IDs, not screen names") 679@click.option( 680 "-a", 681 "--auth", 682 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 683 default="auth.json", 684 help="Path to auth.json token file", 685) 686@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet") 687def follow(db_path, identifiers, attach, sql, ids, auth, verbose): 688 "Experimental: Follow these Twitter users and save tweets in real-time" 689 auth = json.load(open(auth)) 690 session = utils.session_for_auth(auth) 691 db = utils.open_database(db_path) 692 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 693 # Make sure we have saved these users to the database 694 for batch in utils.fetch_user_batches(session, identifiers, ids): 695 utils.save_users(db, batch) 696 # Ensure we have user IDs, not screen names 697 if ids: 698 follow = identifiers 699 else: 700 follow = utils.user_ids_for_screen_names(db, identifiers) 701 # Start streaming: 702 for tweet in utils.stream_filter(session, follow=follow): 703 if verbose: 704 print(json.dumps(tweet, indent=2)) 705 with db.conn: 706 utils.save_tweets(db, [tweet]) 707 708 709def _shared_friends_ids_followers_ids( 710 db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key 711): 712 auth = json.load(open(auth)) 713 session = utils.session_for_auth(auth) 714 db = utils.open_database(db_path) 715 identifiers = utils.resolve_identifiers(db, identifiers, attach, sql) 716 for identifier in identifiers: 717 # Make sure this user is saved 718 arg_user_id = identifier if ids else None 719 arg_screen_name = None if ids else identifier 720 profile = utils.get_profile(db, session, arg_user_id, arg_screen_name) 721 user_id = profile["id"] 722 args = {("user_id" if ids else "screen_name"): identifier} 723 for id_batch in utils.cursor_paginate( 724 session, api_url, args, "ids", 5000, sleep 725 ): 726 first_seen = datetime.datetime.utcnow().isoformat() 727 db["following"].insert_all( 728 ( 729 {first_key: user_id, second_key: other_id, "first_seen": first_seen} 730 for other_id in id_batch 731 ), 732 ignore=True, 733 ) 734 time.sleep(sleep) 735 736 737@cli.command(name="import") 738@click.argument( 739 "db_path", 740 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False), 741 required=True, 742) 743@click.argument( 744 "paths", 745 type=click.Path(file_okay=True, dir_okay=True, allow_dash=False, exists=True), 746 required=True, 747 nargs=-1, 748) 749def import_(db_path, paths): 750 """ 751 Import data from a Twitter exported archive. Input can be the path to a zip 752 file, a directory full of .js files or one or more direct .js files. 753 """ 754 db = utils.open_database(db_path) 755 for filepath in paths: 756 path = pathlib.Path(filepath) 757 if path.suffix == ".zip": 758 for filename, content in utils.read_archive_js(filepath): 759 archive.import_from_file(db, filename, content) 760 elif path.is_dir(): 761 # Import every .js file in this directory 762 for filepath in path.glob("*.js"): 763 archive.import_from_file(db, filepath.name, open(filepath, "rb").read()) 764 elif path.suffix == ".js": 765 archive.import_from_file(db, path.name, open(path, "rb").read()) 766 else: 767 raise click.ClickException("Path must be a .js or .zip file or a directory") 768 769 770@cli.command() 771@click.argument( 772 "db_path", 773 type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 774 required=True, 775) 776@click.argument("q") 777@click.option( 778 "-a", 779 "--auth", 780 type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 781 default="auth.json", 782 help="Path to auth.json token file", 783) 784@click.option( 785 "--since", 786 is_flag=True, 787 help="Pull tweets since last retrieved tweet", 788) 789@click.option( 790 "--geocode", 791 type=str, 792 help="latitude,longitude,radius - where radius is a number followed by mi or km", 793) 794@click.option("--lang", type=str, help="ISO 639-1 language code") 795@click.option("--locale", type=str, help="Locale: only 'ja' is currently effective") 796@click.option("--result_type", type=click.Choice(["mixed", "recent", "popular"])) 797@click.option("--count", type=int, default=100, help="Number of results per page") 798@click.option("--stop_after", type=int, help="Stop after this many") 799@click.option("--since_id", type=str, help="Pull tweets since this Tweet ID") 800def search(db_path, q, auth, since, **kwargs): 801 """ 802 Save tweets from a search. Full documentation here: 803 804 https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets 805 """ 806 since_id = kwargs.pop("since_id", None) 807 stop_after = kwargs.pop("stop_after", None) 808 auth = json.load(open(auth)) 809 session = utils.session_for_auth(auth) 810 db = utils.open_database(db_path) 811 812 search_args = {"q": q} 813 for key, value in kwargs.items(): 814 if value is not None: 815 search_args[key] = value 816 817 args_hash = hashlib.sha1( 818 json.dumps(search_args, sort_keys=True, separators=(",", ":")).encode("utf8") 819 ).hexdigest() 820 821 tweets = utils.fetch_timeline( 822 session, 823 "https://api.twitter.com/1.1/search/tweets.json", 824 db, 825 search_args, 826 sleep=6, 827 key="statuses", 828 stop_after=stop_after, 829 since_id=since_id, 830 since_type="search", 831 since_key=args_hash, 832 ) 833 chunk = [] 834 first = True 835 836 if not db["search_runs"].exists(): 837 db["search_runs"].create( 838 {"id": int, "name": str, "args": str, "started": str, "hash": str}, pk="id" 839 ) 840 841 def save_chunk(db, search_run_id, chunk): 842 utils.save_tweets(db, chunk) 843 # Record which search run produced them 844 db["search_runs_tweets"].insert_all( 845 [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk], 846 pk=("search_run", "tweet"), 847 foreign_keys=( 848 ("search_run", "search_runs", "id"), 849 ("tweet", "tweets", "id"), 850 ), 851 replace=True, 852 ) 853 854 search_run_id = None 855 for tweet in tweets: 856 if first: 857 first = False 858 search_run_id = ( 859 db["search_runs"] 860 .insert( 861 { 862 "name": search_args["q"], 863 "args": { 864 key: value 865 for key, value in search_args.items() 866 if key not in {"q", "count"} 867 }, 868 "started": datetime.datetime.utcnow().isoformat(), 869 "hash": args_hash, 870 }, 871 alter=True, 872 ) 873 .last_pk 874 ) 875 chunk.append(tweet) 876 if len(chunk) >= 10: 877 save_chunk(db, search_run_id, chunk) 878 chunk = [] 879 if chunk: 880 save_chunk(db, search_run_id, chunk)