···137137138138The tweets are stored in the `tweets` table, and a record is added to the `timeline_tweets` table noting that this tweet came in due to being spotted in the timeline of your user.
139139140140+You can use `--since` to retrieve just tweets that have been posted since the last time this command was run, or `--since_id=xxx` to explicitly pass in a tweet ID to use as the last position.
141141+140142You can then view your timeline in Datasette using the following URL:
141143142144`/tweets/tweets?_where=id+in+(select+tweet+from+[timeline_tweets])&_sort_desc=id&_facet=user`
+24-3
twitter_to_sqlite/cli.py
···214214 default="auth.json",
215215 help="Path to auth.json token file",
216216)
217217-def home_timeline(db_path, auth):
217217+@click.option(
218218+ "--since",
219219+ is_flag=True,
220220+ default=False,
221221+ help="Pull tweets since last retrieved tweet",
222222+)
223223+@click.option(
224224+ "--since_id", type=str, default=False, help="Pull tweets since this Tweet ID"
225225+)
226226+def home_timeline(db_path, auth, since, since_id):
218227 "Save tweets from timeline for authenticated user"
228228+ if since and since_id:
229229+ raise click.ClickException("Use either --since or --since_id, not both ")
219230 auth = json.load(open(auth))
220231 session = utils.session_for_auth(auth)
221232 profile = utils.get_profile(session)
222233 db = sqlite_utils.Database(db_path)
234234+ expected_length = 800
235235+ if since:
236236+ # Set since_id to highest value for this timeline
237237+ try:
238238+ since_id = db.conn.execute(
239239+ "select max(tweet) from timeline_tweets where user = ?", [profile["id"]]
240240+ ).fetchall()[0][0]
241241+ expected_length = None
242242+ except IndexError:
243243+ pass
223244 with click.progressbar(
224224- utils.fetch_home_timeline(session),
225225- length=800,
245245+ utils.fetch_home_timeline(session, since_id=since_id),
246246+ length=expected_length,
226247 label="Importing timeline",
227248 show_pos=True,
228249 ) as bar:
+28-3
twitter_to_sqlite/utils.py
···99from dateutil import parser
1010from requests_oauthlib import OAuth1Session
11111212+# Twitter API error codes
1313+RATE_LIMIT_ERROR_CODE = 88
1414+12151316def session_for_auth(auth):
1417 return OAuth1Session(
···6164 args["count"] = stop_after
6265 args["tweet_mode"] = "extended"
6366 min_seen_id = None
6767+ num_rate_limit_errors = 0
6468 while True:
6569 if min_seen_id is not None:
6670 args["max_id"] = min_seen_id - 1
6767- tweets = session.get(url, params=args).json()
7171+ response = session.get(url, params=args)
7272+ tweets = response.json()
7373+ if "errors" in tweets:
7474+ # Was it a rate limit error? If so sleep and try again
7575+ if RATE_LIMIT_ERROR_CODE == tweets["errors"][0]["code"]:
7676+ num_rate_limit_errors += 1
7777+ assert num_rate_limit_errors < 5, "More than 5 rate limit errors"
7878+ print(
7979+ "Rate limit exceeded - will sleep 15s and try again {}".format(
8080+ repr(response.headers)
8181+ )
8282+ )
8383+ time.sleep(15)
8484+ continue
8585+ else:
8686+ raise Exception(str(tweets["errors"]))
6887 if not tweets:
6988 break
7089 for tweet in tweets:
···86105 )
87106881078989-def fetch_home_timeline(session):
108108+def fetch_home_timeline(session, since_id=None):
109109+ args = {}
110110+ if since_id is not None:
111111+ args["since_id"] = since_id
90112 yield from fetch_timeline(
9191- session, "https://api.twitter.com/1.1/statuses/home_timeline.json", {}, sleep=1
113113+ session,
114114+ "https://api.twitter.com/1.1/statuses/home_timeline.json",
115115+ args,
116116+ sleep=1,
92117 )
9311894119