Added --since and --since_id to search, refs #3 · alice.mosphere.at/twitter-to-sqlite@4f86a46

+36 -2

1 changed file

expand all

twitter_to_sqlite

cli.py

+36 -2

twitter_to_sqlite/cli.py

··· 1 1 import datetime 2 + import hashlib 2 3 import json 3 4 import os 4 5 import pathlib ··· 649 650 help="Path to auth.json token file", 650 651 ) 651 652 @click.option( 653 + "--since", 654 + is_flag=True, 655 + default=False, 656 + help="Pull tweets since last retrieved tweet", 657 + ) 658 + @click.option( 652 659 "--geocode", 653 660 type=str, 654 661 help="latitude,longitude,radius - where radius is a number followed by mi or km", ··· 658 665 @click.option("--result_type", type=click.Choice(["mixed", "recent", "popular"])) 659 666 @click.option("--count", type=int, default=100, help="Number of results per page") 660 667 @click.option("--stop_after", type=int, help="Stop after this many") 661 - def search(db_path, q, auth, **kwargs): 668 + @click.option( 669 + "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID" 670 + ) 671 + def search(db_path, q, auth, since, **kwargs): 662 672 """ 663 673 Save tweets from a search. Full documentation here: 664 674 665 675 https://developer.twitter.com/en/docs/tweets/search/api-reference/get-search-tweets 666 676 """ 677 + since_id = kwargs.pop("since_id", None) 678 + if since and since_id: 679 + raise click.ClickException("Use either --since or --since_id, not both") 667 680 stop_after = kwargs.pop("stop_after", None) 668 681 auth = json.load(open(auth)) 669 682 session = utils.session_for_auth(auth) ··· 674 687 if value is not None: 675 688 search_args[key] = value 676 689 690 + args_hash = hashlib.sha1( 691 + json.dumps(search_args, sort_keys=True, separators=(",", ":")).encode( 692 + "utf8" 693 + ) 694 + ).hexdigest() 695 + 696 + if since and db["search_runs_tweets"].exists: 697 + # Find the maximum tweet ID from previous runs of this search 698 + try: 699 + since_id = db.conn.execute( 700 + """ 701 + select max(tweet) from search_runs_tweets where search_run in ( 702 + select id from search_runs where hash = ? 703 + ) 704 + """, [args_hash] 705 + ).fetchall()[0][0] 706 + except IndexError: 707 + pass 708 + 677 709 tweets = utils.fetch_timeline( 678 710 session, 679 711 "https://api.twitter.com/1.1/search/tweets.json", ··· 681 713 sleep=6, 682 714 key="statuses", 683 715 stop_after=stop_after, 716 + since_id=since_id, 684 717 ) 685 718 chunk = [] 686 719 first = True 687 720 688 721 if not db["search_runs"].exists: 689 722 db["search_runs"].create( 690 - {"id": int, "name": str, "args": str, "started": str}, pk="id" 723 + {"id": int, "name": str, "args": str, "started": str, "hash": str}, pk="id" 691 724 ) 692 725 693 726 def save_chunk(db, search_run_id, chunk): ··· 714 747 if key not in {"q", "count"} 715 748 }, 716 749 "started": datetime.datetime.utcnow().isoformat(), 750 + "hash": args_hash, 717 751 }, 718 752 alter=True, 719 753 )

Configure Feed

Configure Feed