Initial implementation of search command, refs #3 · alice.mosphere.at/twitter-to-sqlite@a79fbdf

+96 -1

2 changed files

expand all

twitter_to_sqlite

cli.py

utils.py

+93

twitter_to_sqlite/cli.py

··· 584 584 archive.import_from_file(db, path.name, open(path, "rb").read()) 585 585 else: 586 586 raise click.ClickException("Path must be a .js or .zip file or a directory") 587 + 588 + 589 + @cli.command() 590 + @click.argument( 591 + "db_path", 592 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 593 + required=True, 594 + ) 595 + @click.argument("q") 596 + @click.option( 597 + "-a", 598 + "--auth", 599 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 600 + default="auth.json", 601 + help="Path to auth.json token file", 602 + ) 603 + @click.option( 604 + "--geocode", 605 + type=str, 606 + help="latitude,longitude,radius - where radius is a number followed by mi or km", 607 + ) 608 + @click.option("--lang", type=str, help="ISO 639-1 language code") 609 + @click.option("--locale", type=str, help="Locale: only 'ja' is currently effective") 610 + @click.option("--result_type", type=click.Choice(["mixed", "recent", "popular"])) 611 + @click.option("--count", type=int, default=100, help="Number of results per page") 612 + @click.option("--stop_after", type=int, help="Stop after this many") 613 + def search(db_path, q, auth, **kwargs): 614 + """ 615 + Save tweets from a search. Full documentation here: 616 + 617 + https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets 618 + """ 619 + stop_after = kwargs.pop("stop_after", None) 620 + auth = json.load(open(auth)) 621 + session = utils.session_for_auth(auth) 622 + db = utils.open_database(db_path) 623 + 624 + search_args = {"q": q} 625 + for key, value in kwargs.items(): 626 + if value is not None: 627 + search_args[key] = value 628 + 629 + tweets = utils.fetch_timeline( 630 + session, 631 + "https://api.twitter.com/1.1/search/tweets.json", 632 + search_args, 633 + sleep=6, 634 + key="statuses", 635 + stop_after=stop_after, 636 + ) 637 + chunk = [] 638 + first = True 639 + 640 + if not db["search_runs"].exists: 641 + db["search_runs"].create( 642 + {"id": int, "name": str, "args": str, "started": str}, pk="id" 643 + ) 644 + 645 + def save_chunk(db, search_run_id, chunk): 646 + utils.save_tweets(db, chunk) 647 + # Record which search run produced them 648 + db["search_runs_tweets"].upsert_all( 649 + [{"search_run": search_run_id, "tweet": tweet["id"]} for tweet in chunk], 650 + pk=("search_run", "tweet"), 651 + foreign_keys=("search_run", "tweet"), 652 + ) 653 + 654 + search_run_id = None 655 + for tweet in tweets: 656 + if first: 657 + first = False 658 + search_run_id = ( 659 + db["search_runs"] 660 + .insert( 661 + { 662 + "name": search_args["q"], 663 + "args": { 664 + key: value 665 + for key, value in search_args.items() 666 + if key not in {"q", "count"} 667 + }, 668 + "started": datetime.datetime.utcnow().isoformat(), 669 + }, 670 + alter=True, 671 + ) 672 + .last_pk 673 + ) 674 + chunk.append(tweet) 675 + if len(chunk) >= 10: 676 + save_chunk(db, search_run_id, chunk) 677 + chunk = [] 678 + if chunk: 679 + save_chunk(db, search_run_id, chunk)

+3 -1

twitter_to_sqlite/utils.py

··· 89 89 return profile 90 90 91 91 92 - def fetch_timeline(session, url, args, sleep=1, stop_after=None): 92 + def fetch_timeline(session, url, args, sleep=1, stop_after=None, key=None): 93 93 # See https://developer.twitter.com/en/docs/tweets/timelines/guides/working-with-timelines 94 94 args = dict(args) 95 95 args["count"] = 200 ··· 117 117 continue 118 118 else: 119 119 raise Exception(str(tweets["errors"])) 120 + if key is not None: 121 + tweets = tweets[key] 120 122 if not tweets: 121 123 break 122 124 for tweet in tweets:

Configure Feed

Configure Feed