···22import datetime
33import os
44import sqlite_utils
55+import time
56import json
67from twitter_to_sqlite import utils
78···318319 )
319320320321322322+@cli.command()
323323+@click.argument(
324324+ "db_path",
325325+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
326326+ required=True,
327327+)
328328+@click.argument("track", type=str, required=True, nargs=-1)
329329+@click.option(
330330+ "-a",
331331+ "--auth",
332332+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
333333+ default="auth.json",
334334+ help="Path to auth.json token file",
335335+)
336336+@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet")
337337+def track(db_path, track, auth, verbose):
338338+ "Experimental: Save tweets matching these keywords in real-time"
339339+ auth = json.load(open(auth))
340340+ session = utils.session_for_auth(auth)
341341+ db = sqlite_utils.Database(db_path)
342342+ for tweet in utils.stream_filter(session, track=track):
343343+ if verbose:
344344+ print(json.dumps(tweet, indent=2))
345345+ with db.conn:
346346+ utils.save_tweets(db, [tweet])
347347+348348+349349+@cli.command()
350350+@click.argument(
351351+ "db_path",
352352+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=False),
353353+ required=True,
354354+)
355355+@click.argument("follow", type=str, required=True, nargs=-1)
356356+@click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet")
357357+@click.option(
358358+ "-a",
359359+ "--auth",
360360+ type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True),
361361+ default="auth.json",
362362+ help="Path to auth.json token file",
363363+)
364364+def follow(db_path, follow, auth, verbose):
365365+ "Experimental: Follow these Twitter users (numeric user IDs required) and save tweets in real-time"
366366+ auth = json.load(open(auth))
367367+ session = utils.session_for_auth(auth)
368368+ db = sqlite_utils.Database(db_path)
369369+ for tweet in utils.stream_filter(session, follow=follow):
370370+ if verbose:
371371+ print(json.dumps(tweet, indent=2))
372372+ with db.conn:
373373+ utils.save_tweets(db, [tweet])
374374+375375+321376def _shared_friends_ids_followers_ids(
322377 db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key
323378):
···344399 ),
345400 ignore=True,
346401 )
402402+ time.sleep(sleep)
+44
twitter_to_sqlite/utils.py
···341341def raise_if_error(r):
342342 if "errors" in r.json():
343343 raise TwitterApiError(r.headers, r.json()["errors"])
344344+345345+346346+def stream_filter(session, track=None, follow=None, locations=None, language=None):
347347+ session.stream = True
348348+ args = {"tweet_mode": "extended"}
349349+ for key, value in (
350350+ ("track", track),
351351+ ("follow", follow),
352352+ ("locations", locations),
353353+ ("language", language),
354354+ ):
355355+ if value is None:
356356+ continue
357357+ if not isinstance(value, str):
358358+ value = ",".join(map(str, value))
359359+ args[key] = value
360360+ while True:
361361+ response = session.post(
362362+ "https://stream.twitter.com/1.1/statuses/filter.json", params=args
363363+ )
364364+ for line in response.iter_lines(chunk_size=10000):
365365+ if line.strip().startswith(b"{"):
366366+ tweet = json.loads(line)
367367+ # Only yield tweet if it has an 'id' and 'created_at'
368368+ # - otherwise it's probably a maintenance message, see
369369+ # https://developer.twitter.com/en/docs/tweets/filter-realtime/overview/statuses-filter
370370+ if "id" in tweet and "created_at" in tweet:
371371+ # 'Fix' weird tweets from streaming API
372372+ fix_streaming_tweet(tweet)
373373+ yield tweet
374374+ else:
375375+ print(tweet)
376376+ time.sleep(1)
377377+378378+379379+def fix_streaming_tweet(tweet):
380380+ if "extended_tweet" in tweet:
381381+ tweet.update(tweet.pop("extended_tweet"))
382382+ if "full_text" not in tweet:
383383+ tweet["full_text"] = tweet["text"]
384384+ if "retweeted_status" in tweet:
385385+ fix_streaming_tweet(tweet["retweeted_status"])
386386+ if "quoted_status" in tweet:
387387+ fix_streaming_tweet(tweet["quoted_status"])