this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Experimental follow/track commands, refs #11

+100
+56
twitter_to_sqlite/cli.py
··· 2 2 import datetime 3 3 import os 4 4 import sqlite_utils 5 + import time 5 6 import json 6 7 from twitter_to_sqlite import utils 7 8 ··· 318 319 ) 319 320 320 321 322 + @cli.command() 323 + @click.argument( 324 + "db_path", 325 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 326 + required=True, 327 + ) 328 + @click.argument("track", type=str, required=True, nargs=-1) 329 + @click.option( 330 + "-a", 331 + "--auth", 332 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 333 + default="auth.json", 334 + help="Path to auth.json token file", 335 + ) 336 + @click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet") 337 + def track(db_path, track, auth, verbose): 338 + "Experimental: Save tweets matching these keywords in real-time" 339 + auth = json.load(open(auth)) 340 + session = utils.session_for_auth(auth) 341 + db = sqlite_utils.Database(db_path) 342 + for tweet in utils.stream_filter(session, track=track): 343 + if verbose: 344 + print(json.dumps(tweet, indent=2)) 345 + with db.conn: 346 + utils.save_tweets(db, [tweet]) 347 + 348 + 349 + @cli.command() 350 + @click.argument( 351 + "db_path", 352 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 353 + required=True, 354 + ) 355 + @click.argument("follow", type=str, required=True, nargs=-1) 356 + @click.option("--verbose", is_flag=True, help="Verbose mode: display every tweet") 357 + @click.option( 358 + "-a", 359 + "--auth", 360 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 361 + default="auth.json", 362 + help="Path to auth.json token file", 363 + ) 364 + def follow(db_path, follow, auth, verbose): 365 + "Experimental: Follow these Twitter users (numeric user IDs required) and save tweets in real-time" 366 + auth = json.load(open(auth)) 367 + session = utils.session_for_auth(auth) 368 + db = sqlite_utils.Database(db_path) 369 + for tweet in utils.stream_filter(session, follow=follow): 370 + if verbose: 371 + print(json.dumps(tweet, indent=2)) 372 + with db.conn: 373 + utils.save_tweets(db, [tweet]) 374 + 375 + 321 376 def _shared_friends_ids_followers_ids( 322 377 db_path, identifiers, attach, sql, auth, ids, sleep, api_url, first_key, second_key 323 378 ): ··· 344 399 ), 345 400 ignore=True, 346 401 ) 402 + time.sleep(sleep)
+44
twitter_to_sqlite/utils.py
··· 341 341 def raise_if_error(r): 342 342 if "errors" in r.json(): 343 343 raise TwitterApiError(r.headers, r.json()["errors"]) 344 + 345 + 346 + def stream_filter(session, track=None, follow=None, locations=None, language=None): 347 + session.stream = True 348 + args = {"tweet_mode": "extended"} 349 + for key, value in ( 350 + ("track", track), 351 + ("follow", follow), 352 + ("locations", locations), 353 + ("language", language), 354 + ): 355 + if value is None: 356 + continue 357 + if not isinstance(value, str): 358 + value = ",".join(map(str, value)) 359 + args[key] = value 360 + while True: 361 + response = session.post( 362 + "https://stream.twitter.com/1.1/statuses/filter.json", params=args 363 + ) 364 + for line in response.iter_lines(chunk_size=10000): 365 + if line.strip().startswith(b"{"): 366 + tweet = json.loads(line) 367 + # Only yield tweet if it has an 'id' and 'created_at' 368 + # - otherwise it's probably a maintenance message, see 369 + # https://developer.twitter.com/en/docs/tweets/filter-realtime/overview/statuses-filter 370 + if "id" in tweet and "created_at" in tweet: 371 + # 'Fix' weird tweets from streaming API 372 + fix_streaming_tweet(tweet) 373 + yield tweet 374 + else: 375 + print(tweet) 376 + time.sleep(1) 377 + 378 + 379 + def fix_streaming_tweet(tweet): 380 + if "extended_tweet" in tweet: 381 + tweet.update(tweet.pop("extended_tweet")) 382 + if "full_text" not in tweet: 383 + tweet["full_text"] = tweet["text"] 384 + if "retweeted_status" in tweet: 385 + fix_streaming_tweet(tweet["retweeted_status"]) 386 + if "quoted_status" in tweet: 387 + fix_streaming_tweet(tweet["quoted_status"])