this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

--since and --since_id options for user-timeline, closes #19

Refs #20

Also added some initial rate limit error handling code.

+54 -6
+2
README.md
··· 137 137 138 138 The tweets are stored in the `tweets` table, and a record is added to the `timeline_tweets` table noting that this tweet came in due to being spotted in the timeline of your user. 139 139 140 + You can use `--since` to retrieve just tweets that have been posted since the last time this command was run, or `--since_id=xxx` to explicitly pass in a tweet ID to use as the last position. 141 + 140 142 You can then view your timeline in Datasette using the following URL: 141 143 142 144 `/tweets/tweets?_where=id+in+(select+tweet+from+[timeline_tweets])&_sort_desc=id&_facet=user`
+24 -3
twitter_to_sqlite/cli.py
··· 214 214 default="auth.json", 215 215 help="Path to auth.json token file", 216 216 ) 217 - def home_timeline(db_path, auth): 217 + @click.option( 218 + "--since", 219 + is_flag=True, 220 + default=False, 221 + help="Pull tweets since last retrieved tweet", 222 + ) 223 + @click.option( 224 + "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID" 225 + ) 226 + def home_timeline(db_path, auth, since, since_id): 218 227 "Save tweets from timeline for authenticated user" 228 + if since and since_id: 229 + raise click.ClickException("Use either --since or --since_id, not both ") 219 230 auth = json.load(open(auth)) 220 231 session = utils.session_for_auth(auth) 221 232 profile = utils.get_profile(session) 222 233 db = sqlite_utils.Database(db_path) 234 + expected_length = 800 235 + if since: 236 + # Set since_id to highest value for this timeline 237 + try: 238 + since_id = db.conn.execute( 239 + "select max(tweet) from timeline_tweets where user = ?", [profile["id"]] 240 + ).fetchall()[0][0] 241 + expected_length = None 242 + except IndexError: 243 + pass 223 244 with click.progressbar( 224 - utils.fetch_home_timeline(session), 225 - length=800, 245 + utils.fetch_home_timeline(session, since_id=since_id), 246 + length=expected_length, 226 247 label="Importing timeline", 227 248 show_pos=True, 228 249 ) as bar:
+28 -3
twitter_to_sqlite/utils.py
··· 9 9 from dateutil import parser 10 10 from requests_oauthlib import OAuth1Session 11 11 12 + # Twitter API error codes 13 + RATE_LIMIT_ERROR_CODE = 88 14 + 12 15 13 16 def session_for_auth(auth): 14 17 return OAuth1Session( ··· 61 64 args["count"] = stop_after 62 65 args["tweet_mode"] = "extended" 63 66 min_seen_id = None 67 + num_rate_limit_errors = 0 64 68 while True: 65 69 if min_seen_id is not None: 66 70 args["max_id"] = min_seen_id - 1 67 - tweets = session.get(url, params=args).json() 71 + response = session.get(url, params=args) 72 + tweets = response.json() 73 + if "errors" in tweets: 74 + # Was it a rate limit error? If so sleep and try again 75 + if RATE_LIMIT_ERROR_CODE == tweets["errors"][0]["code"]: 76 + num_rate_limit_errors += 1 77 + assert num_rate_limit_errors < 5, "More than 5 rate limit errors" 78 + print( 79 + "Rate limit exceeded - will sleep 15s and try again {}".format( 80 + repr(response.headers) 81 + ) 82 + ) 83 + time.sleep(15) 84 + continue 85 + else: 86 + raise Exception(str(tweets["errors"])) 68 87 if not tweets: 69 88 break 70 89 for tweet in tweets: ··· 86 105 ) 87 106 88 107 89 - def fetch_home_timeline(session): 108 + def fetch_home_timeline(session, since_id=None): 109 + args = {} 110 + if since_id is not None: 111 + args["since_id"] = since_id 90 112 yield from fetch_timeline( 91 - session, "https://api.twitter.com/1.1/statuses/home_timeline.json", {}, sleep=1 113 + session, 114 + "https://api.twitter.com/1.1/statuses/home_timeline.json", 115 + args, 116 + sleep=1, 92 117 ) 93 118 94 119