this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

Implemented favorites and user-timeline commands

Plus tests

+1082 -12
+273
tests/test_save_tweets.py
··· 1 + from twitter_to_sqlite import utils 2 + import pytest 3 + import pathlib 4 + import sqlite_utils 5 + import json 6 + 7 + 8 + @pytest.fixture 9 + def tweets(): 10 + return json.load(open(pathlib.Path(__file__).parent / "tweets.json")) 11 + 12 + 13 + def test_save_tweets(tweets): 14 + db = sqlite_utils.Database(memory=True) 15 + utils.save_tweets(db, tweets) 16 + assert {"tweets", "users"} == set(db.table_names()) 17 + tweet_rows = list(db["tweets"].rows) 18 + user_rows = list(db["users"].rows) 19 + assert [ 20 + { 21 + "id": 861696799362478100, 22 + "user": 14148390, 23 + "created_at": "2017-05-08T21:38:21+00:00", 24 + "full_text": "If you use Photos (mac) &amp; Live Photos, run this command to generate a lovely sound collage of where you’ve been https://gist.github.com/bwhitman/5be2f905556a25145dbac74fe4080739", 25 + "retweeted_status": None, 26 + "quoted_status": None, 27 + "truncated": 0, 28 + "display_text_range": "[0, 139]", 29 + "source": '<a href="http://itunes.apple.com/us/app/twitter/id409789998?mt=12" rel="nofollow">Twitter for Mac</a>', 30 + "in_reply_to_status_id": None, 31 + "in_reply_to_user_id": None, 32 + "in_reply_to_screen_name": None, 33 + "geo": None, 34 + "coordinates": None, 35 + "place": None, 36 + "contributors": None, 37 + "is_quote_status": 0, 38 + "retweet_count": 14, 39 + "favorite_count": 57, 40 + "favorited": 0, 41 + "retweeted": 0, 42 + "possibly_sensitive": 0, 43 + "lang": "en", 44 + }, 45 + { 46 + "id": 1168529001599533000, 47 + "user": 12497, 48 + "created_at": "2019-09-02T14:19:58+00:00", 49 + "full_text": "Finally got around to running this script. It is BRILLIANT - it produces a concatenated .wav file of the audio from every live photo you've ever taken.\n\nNeeds quite a lot of disk space to run - the /tmp/picblast folder can take multiple GB https://twitter.com/bwhitman/status/861696799362478085", 50 + "retweeted_status": None, 51 + "quoted_status": 861696799362478100, 52 + "truncated": 0, 53 + "display_text_range": "[0, 239]", 54 + "source": '<a href="https://mobile.twitter.com" rel="nofollow">Twitter Web App</a>', 55 + "in_reply_to_status_id": None, 56 + "in_reply_to_user_id": None, 57 + "in_reply_to_screen_name": None, 58 + "geo": None, 59 + "coordinates": None, 60 + "place": None, 61 + "contributors": None, 62 + "is_quote_status": 1, 63 + "retweet_count": 4, 64 + "favorite_count": 31, 65 + "favorited": 0, 66 + "retweeted": 0, 67 + "possibly_sensitive": 0, 68 + "lang": "en", 69 + }, 70 + { 71 + "id": 1169196446043664400, 72 + "user": 12497, 73 + "created_at": "2019-09-04T10:32:10+00:00", 74 + "full_text": "@scientiffic @Wikipedia @unsplash @cagarrity The @inaturalist API is amazingly powerful and fun with no auth and no rate limit. We used it to build http://www.owlsnearme.com - see also @Natbat's great tutorial on using it with @observablehq https://24ways.org/2018/observable-notebooks-and-inaturalist/", 75 + "retweeted_status": None, 76 + "quoted_status": None, 77 + "truncated": 0, 78 + "display_text_range": "[45, 262]", 79 + "source": '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 80 + "in_reply_to_status_id": "1169079390577320000", 81 + "in_reply_to_user_id": "82016165", 82 + "in_reply_to_screen_name": "scientiffic", 83 + "geo": None, 84 + "coordinates": None, 85 + "place": None, 86 + "contributors": None, 87 + "is_quote_status": 0, 88 + "retweet_count": 0, 89 + "favorite_count": 2, 90 + "favorited": 0, 91 + "retweeted": 0, 92 + "possibly_sensitive": 0, 93 + "lang": "en", 94 + }, 95 + { 96 + "id": 1169242008432644000, 97 + "user": 22737278, 98 + "created_at": "2019-09-04T13:33:12+00:00", 99 + "full_text": "My new post: an explainer on “carbon capture &amp; utilization” (CCU). CO2 captured from waste gases or the ambient air can be used to make valuable products. Could CCU help the carbon capture industry scale up? https://www.vox.com/energy-and-environment/2019/9/4/20829431/climate-change-carbon-capture-utilization-sequestration-ccu-ccs?utm_campaign=drvox&utm_content=chorus&utm_medium=social&utm_source=twitter", 100 + "retweeted_status": None, 101 + "quoted_status": None, 102 + "truncated": 0, 103 + "display_text_range": "[0, 235]", 104 + "source": '<a href="http://www.voxmedia.com" rel="nofollow">Vox Media</a>', 105 + "in_reply_to_status_id": None, 106 + "in_reply_to_user_id": None, 107 + "in_reply_to_screen_name": None, 108 + "geo": None, 109 + "coordinates": None, 110 + "place": None, 111 + "contributors": None, 112 + "is_quote_status": 0, 113 + "retweet_count": 42, 114 + "favorite_count": 86, 115 + "favorited": 1, 116 + "retweeted": 1, 117 + "possibly_sensitive": 0, 118 + "lang": "en", 119 + }, 120 + { 121 + "id": 1169246717864136700, 122 + "user": 12497, 123 + "created_at": "2019-09-04T13:51:55+00:00", 124 + "full_text": "RT @drvox: My new post: an explainer on “carbon capture &amp; utilization” (CCU). CO2 captured from waste gases or the ambient air can be used…", 125 + "retweeted_status": 1169242008432644000, 126 + "quoted_status": None, 127 + "truncated": 0, 128 + "display_text_range": "[0, 143]", 129 + "source": '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 130 + "in_reply_to_status_id": None, 131 + "in_reply_to_user_id": None, 132 + "in_reply_to_screen_name": None, 133 + "geo": None, 134 + "coordinates": None, 135 + "place": None, 136 + "contributors": None, 137 + "is_quote_status": 0, 138 + "retweet_count": 42, 139 + "favorite_count": 0, 140 + "favorited": 1, 141 + "retweeted": 1, 142 + "possibly_sensitive": None, 143 + "lang": "en", 144 + }, 145 + ] == tweet_rows 146 + assert [ 147 + { 148 + "id": 12497, 149 + "name": "Simon Willison", 150 + "screen_name": "simonw", 151 + "location": "San Francisco, CA", 152 + "description": "Creator of Datasette, co-creator Django. Fellow at @JSKstanford. Usually hanging out with @natbat and @cleopaws. He/Him", 153 + "url": "https://simonwillison.net/", 154 + "protected": 0, 155 + "followers_count": 17754, 156 + "friends_count": 3460, 157 + "listed_count": 1230, 158 + "created_at": "2006-11-15T13:18:50+00:00", 159 + "favourites_count": 21506, 160 + "utc_offset": None, 161 + "time_zone": None, 162 + "geo_enabled": 1, 163 + "verified": 1, 164 + "statuses_count": 17780, 165 + "lang": None, 166 + "contributors_enabled": 0, 167 + "is_translator": 0, 168 + "is_translation_enabled": 0, 169 + "profile_background_color": "000000", 170 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 171 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 172 + "profile_background_tile": 0, 173 + "profile_image_url": "http://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 174 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 175 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/12497/1347977147", 176 + "profile_link_color": "0000FF", 177 + "profile_sidebar_border_color": "FFFFFF", 178 + "profile_sidebar_fill_color": "FFFFFF", 179 + "profile_text_color": "000000", 180 + "profile_use_background_image": 1, 181 + "has_extended_profile": 1, 182 + "default_profile": 0, 183 + "default_profile_image": 0, 184 + "following": 0, 185 + "follow_request_sent": 0, 186 + "notifications": 0, 187 + "translator_type": "regular", 188 + }, 189 + { 190 + "id": 14148390, 191 + "name": "Brian Whitman", 192 + "screen_name": "bwhitman", 193 + "location": "Fort Greene NYC", 194 + "description": "finding the good @ourcanopy with the best people. was CTO/cofounder of Echo Nest, then research @ Spotify. always music", 195 + "url": "https://notes.variogr.am/about/", 196 + "protected": 0, 197 + "followers_count": 4300, 198 + "friends_count": 639, 199 + "listed_count": 235, 200 + "created_at": "2008-03-14T18:19:20+00:00", 201 + "favourites_count": 8966, 202 + "utc_offset": None, 203 + "time_zone": None, 204 + "geo_enabled": 1, 205 + "verified": 0, 206 + "statuses_count": 2192, 207 + "lang": None, 208 + "contributors_enabled": 0, 209 + "is_translator": 0, 210 + "is_translation_enabled": 0, 211 + "profile_background_color": "FFFFFF", 212 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme13/bg.gif", 213 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme13/bg.gif", 214 + "profile_background_tile": 0, 215 + "profile_image_url": "http://pbs.twimg.com/profile_images/742302060/avatars-000000620200-z21ozh-crop_normal.jpeg", 216 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/742302060/avatars-000000620200-z21ozh-crop_normal.jpeg", 217 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/14148390/1398269147", 218 + "profile_link_color": "911A1A", 219 + "profile_sidebar_border_color": "EEEEEE", 220 + "profile_sidebar_fill_color": "FFFFFF", 221 + "profile_text_color": "333333", 222 + "profile_use_background_image": 0, 223 + "has_extended_profile": 1, 224 + "default_profile": 0, 225 + "default_profile_image": 0, 226 + "following": 0, 227 + "follow_request_sent": 0, 228 + "notifications": 0, 229 + "translator_type": "none", 230 + }, 231 + { 232 + "id": 22737278, 233 + "name": "David Roberts", 234 + "screen_name": "drvox", 235 + "location": "Seattle, WA", 236 + "description": "Seattleite transplanted from Tennessee; now blogging for http://Vox.com about energy politics. Climate hawk, deficit dove. Not a doctor.", 237 + "url": "http://www.vox.com/authors/david-roberts", 238 + "protected": 0, 239 + "followers_count": 132789, 240 + "friends_count": 2723, 241 + "listed_count": 4644, 242 + "created_at": "2009-03-04T05:14:12+00:00", 243 + "favourites_count": 26, 244 + "utc_offset": None, 245 + "time_zone": None, 246 + "geo_enabled": 0, 247 + "verified": 1, 248 + "statuses_count": 13887, 249 + "lang": None, 250 + "contributors_enabled": 0, 251 + "is_translator": 0, 252 + "is_translation_enabled": 0, 253 + "profile_background_color": "022330", 254 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme15/bg.png", 255 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme15/bg.png", 256 + "profile_background_tile": 0, 257 + "profile_image_url": "http://pbs.twimg.com/profile_images/551076081051004929/2i4QEfn-_normal.jpeg", 258 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/551076081051004929/2i4QEfn-_normal.jpeg", 259 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/22737278/1433745271", 260 + "profile_link_color": "0084B4", 261 + "profile_sidebar_border_color": "A8C7F7", 262 + "profile_sidebar_fill_color": "C0DFEC", 263 + "profile_text_color": "333333", 264 + "profile_use_background_image": 1, 265 + "has_extended_profile": 0, 266 + "default_profile": 0, 267 + "default_profile_image": 0, 268 + "following": 1, 269 + "follow_request_sent": 0, 270 + "notifications": 0, 271 + "translator_type": "none", 272 + }, 273 + ] == user_rows
+624
tests/tweets.json
··· 1 + [ 2 + { 3 + "created_at": "Wed Sep 04 13:51:55 +0000 2019", 4 + "id": 1169246717864136700, 5 + "id_str": "1169246717864136705", 6 + "full_text": "RT @drvox: My new post: an explainer on “carbon capture &amp; utilization” (CCU). CO2 captured from waste gases or the ambient air can be used…", 7 + "truncated": false, 8 + "display_text_range": [ 9 + 0, 10 + 143 11 + ], 12 + "entities": { 13 + "hashtags": [], 14 + "symbols": [], 15 + "user_mentions": [ 16 + { 17 + "screen_name": "drvox", 18 + "name": "David Roberts", 19 + "id": 22737278, 20 + "id_str": "22737278", 21 + "indices": [ 22 + 3, 23 + 9 24 + ] 25 + } 26 + ], 27 + "urls": [] 28 + }, 29 + "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", 30 + "in_reply_to_status_id": null, 31 + "in_reply_to_status_id_str": null, 32 + "in_reply_to_user_id": null, 33 + "in_reply_to_user_id_str": null, 34 + "in_reply_to_screen_name": null, 35 + "user": { 36 + "id": 12497, 37 + "id_str": "12497", 38 + "name": "Simon Willison", 39 + "screen_name": "simonw", 40 + "location": "San Francisco, CA", 41 + "description": "Creator of Datasette, co-creator Django. Fellow at @JSKstanford. Usually hanging out with @natbat and @cleopaws. He/Him", 42 + "url": "https://t.co/wyNggeHZ8W", 43 + "entities": { 44 + "url": { 45 + "urls": [ 46 + { 47 + "url": "https://t.co/wyNggeHZ8W", 48 + "expanded_url": "https://simonwillison.net/", 49 + "display_url": "simonwillison.net", 50 + "indices": [ 51 + 0, 52 + 23 53 + ] 54 + } 55 + ] 56 + }, 57 + "description": { 58 + "urls": [] 59 + } 60 + }, 61 + "protected": false, 62 + "followers_count": 17754, 63 + "friends_count": 3460, 64 + "listed_count": 1230, 65 + "created_at": "Wed Nov 15 13:18:50 +0000 2006", 66 + "favourites_count": 21506, 67 + "utc_offset": null, 68 + "time_zone": null, 69 + "geo_enabled": true, 70 + "verified": true, 71 + "statuses_count": 17780, 72 + "lang": null, 73 + "contributors_enabled": false, 74 + "is_translator": false, 75 + "is_translation_enabled": false, 76 + "profile_background_color": "000000", 77 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 78 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 79 + "profile_background_tile": false, 80 + "profile_image_url": "http://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 81 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 82 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/12497/1347977147", 83 + "profile_link_color": "0000FF", 84 + "profile_sidebar_border_color": "FFFFFF", 85 + "profile_sidebar_fill_color": "FFFFFF", 86 + "profile_text_color": "000000", 87 + "profile_use_background_image": true, 88 + "has_extended_profile": true, 89 + "default_profile": false, 90 + "default_profile_image": false, 91 + "following": false, 92 + "follow_request_sent": false, 93 + "notifications": false, 94 + "translator_type": "regular" 95 + }, 96 + "geo": null, 97 + "coordinates": null, 98 + "place": null, 99 + "contributors": null, 100 + "retweeted_status": { 101 + "created_at": "Wed Sep 04 13:33:12 +0000 2019", 102 + "id": 1169242008432644000, 103 + "id_str": "1169242008432644097", 104 + "full_text": "My new post: an explainer on “carbon capture &amp; utilization” (CCU). CO2 captured from waste gases or the ambient air can be used to make valuable products. Could CCU help the carbon capture industry scale up? https://t.co/cVDz7Xxi4E", 105 + "truncated": false, 106 + "display_text_range": [ 107 + 0, 108 + 235 109 + ], 110 + "entities": { 111 + "hashtags": [], 112 + "symbols": [], 113 + "user_mentions": [], 114 + "urls": [ 115 + { 116 + "url": "https://t.co/cVDz7Xxi4E", 117 + "expanded_url": "https://www.vox.com/energy-and-environment/2019/9/4/20829431/climate-change-carbon-capture-utilization-sequestration-ccu-ccs?utm_campaign=drvox&utm_content=chorus&utm_medium=social&utm_source=twitter", 118 + "display_url": "vox.com/energy-and-env…", 119 + "indices": [ 120 + 212, 121 + 235 122 + ] 123 + } 124 + ] 125 + }, 126 + "source": "<a href=\"http://www.voxmedia.com\" rel=\"nofollow\">Vox Media</a>", 127 + "in_reply_to_status_id": null, 128 + "in_reply_to_status_id_str": null, 129 + "in_reply_to_user_id": null, 130 + "in_reply_to_user_id_str": null, 131 + "in_reply_to_screen_name": null, 132 + "user": { 133 + "id": 22737278, 134 + "id_str": "22737278", 135 + "name": "David Roberts", 136 + "screen_name": "drvox", 137 + "location": "Seattle, WA", 138 + "description": "Seattleite transplanted from Tennessee; now blogging for https://t.co/5gESirnht7 about energy politics. Climate hawk, deficit dove. Not a doctor.", 139 + "url": "http://t.co/AMWwRyre24", 140 + "entities": { 141 + "url": { 142 + "urls": [ 143 + { 144 + "url": "http://t.co/AMWwRyre24", 145 + "expanded_url": "http://www.vox.com/authors/david-roberts", 146 + "display_url": "vox.com/authors/david-…", 147 + "indices": [ 148 + 0, 149 + 22 150 + ] 151 + } 152 + ] 153 + }, 154 + "description": { 155 + "urls": [ 156 + { 157 + "url": "https://t.co/5gESirnht7", 158 + "expanded_url": "http://Vox.com", 159 + "display_url": "Vox.com", 160 + "indices": [ 161 + 57, 162 + 80 163 + ] 164 + } 165 + ] 166 + } 167 + }, 168 + "protected": false, 169 + "followers_count": 132789, 170 + "friends_count": 2723, 171 + "listed_count": 4644, 172 + "created_at": "Wed Mar 04 05:14:12 +0000 2009", 173 + "favourites_count": 26, 174 + "utc_offset": null, 175 + "time_zone": null, 176 + "geo_enabled": false, 177 + "verified": true, 178 + "statuses_count": 13887, 179 + "lang": null, 180 + "contributors_enabled": false, 181 + "is_translator": false, 182 + "is_translation_enabled": false, 183 + "profile_background_color": "022330", 184 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme15/bg.png", 185 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme15/bg.png", 186 + "profile_background_tile": false, 187 + "profile_image_url": "http://pbs.twimg.com/profile_images/551076081051004929/2i4QEfn-_normal.jpeg", 188 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/551076081051004929/2i4QEfn-_normal.jpeg", 189 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/22737278/1433745271", 190 + "profile_link_color": "0084B4", 191 + "profile_sidebar_border_color": "A8C7F7", 192 + "profile_sidebar_fill_color": "C0DFEC", 193 + "profile_text_color": "333333", 194 + "profile_use_background_image": true, 195 + "has_extended_profile": false, 196 + "default_profile": false, 197 + "default_profile_image": false, 198 + "following": true, 199 + "follow_request_sent": false, 200 + "notifications": false, 201 + "translator_type": "none" 202 + }, 203 + "geo": null, 204 + "coordinates": null, 205 + "place": null, 206 + "contributors": null, 207 + "is_quote_status": false, 208 + "retweet_count": 42, 209 + "favorite_count": 86, 210 + "favorited": true, 211 + "retweeted": true, 212 + "possibly_sensitive": false, 213 + "lang": "en" 214 + }, 215 + "is_quote_status": false, 216 + "retweet_count": 42, 217 + "favorite_count": 0, 218 + "favorited": true, 219 + "retweeted": true, 220 + "lang": "en" 221 + }, 222 + { 223 + "created_at": "Wed Sep 04 10:32:10 +0000 2019", 224 + "id": 1169196446043664400, 225 + "id_str": "1169196446043664385", 226 + "full_text": "@scientiffic @Wikipedia @unsplash @cagarrity The @inaturalist API is amazingly powerful and fun with no auth and no rate limit. We used it to build https://t.co/q17EOpkGc3 - see also @Natbat's great tutorial on using it with @observablehq https://t.co/WbYktnYxBc", 227 + "truncated": false, 228 + "display_text_range": [ 229 + 45, 230 + 262 231 + ], 232 + "entities": { 233 + "hashtags": [], 234 + "symbols": [], 235 + "user_mentions": [ 236 + { 237 + "screen_name": "scientiffic", 238 + "name": "Tiffany Tseng 🍡", 239 + "id": 82016165, 240 + "id_str": "82016165", 241 + "indices": [ 242 + 0, 243 + 12 244 + ] 245 + }, 246 + { 247 + "screen_name": "Wikipedia", 248 + "name": "Wikipedia", 249 + "id": 86390214, 250 + "id_str": "86390214", 251 + "indices": [ 252 + 13, 253 + 23 254 + ] 255 + }, 256 + { 257 + "screen_name": "unsplash", 258 + "name": "Unsplash", 259 + "id": 1520228526, 260 + "id_str": "1520228526", 261 + "indices": [ 262 + 24, 263 + 33 264 + ] 265 + }, 266 + { 267 + "screen_name": "cagarrity", 268 + "name": "Chris Garrity", 269 + "id": 92321453, 270 + "id_str": "92321453", 271 + "indices": [ 272 + 34, 273 + 44 274 + ] 275 + }, 276 + { 277 + "screen_name": "inaturalist", 278 + "name": "iNaturalist", 279 + "id": 14239043, 280 + "id_str": "14239043", 281 + "indices": [ 282 + 49, 283 + 61 284 + ] 285 + }, 286 + { 287 + "screen_name": "Natbat", 288 + "name": "Natbat", 289 + "id": 12161, 290 + "id_str": "12161", 291 + "indices": [ 292 + 183, 293 + 190 294 + ] 295 + }, 296 + { 297 + "screen_name": "observablehq", 298 + "name": "Observable", 299 + "id": 905255756789825500, 300 + "id_str": "905255756789825536", 301 + "indices": [ 302 + 225, 303 + 238 304 + ] 305 + } 306 + ], 307 + "urls": [ 308 + { 309 + "url": "https://t.co/q17EOpkGc3", 310 + "expanded_url": "http://www.owlsnearme.com", 311 + "display_url": "owlsnearme.com", 312 + "indices": [ 313 + 148, 314 + 171 315 + ] 316 + }, 317 + { 318 + "url": "https://t.co/WbYktnYxBc", 319 + "expanded_url": "https://24ways.org/2018/observable-notebooks-and-inaturalist/", 320 + "display_url": "24ways.org/2018/observabl…", 321 + "indices": [ 322 + 239, 323 + 262 324 + ] 325 + } 326 + ] 327 + }, 328 + "source": "<a href=\"http://twitter.com/download/iphone\" rel=\"nofollow\">Twitter for iPhone</a>", 329 + "in_reply_to_status_id": 1169079390577320000, 330 + "in_reply_to_status_id_str": "1169079390577319937", 331 + "in_reply_to_user_id": 82016165, 332 + "in_reply_to_user_id_str": "82016165", 333 + "in_reply_to_screen_name": "scientiffic", 334 + "user": { 335 + "id": 12497, 336 + "id_str": "12497", 337 + "name": "Simon Willison", 338 + "screen_name": "simonw", 339 + "location": "San Francisco, CA", 340 + "description": "Creator of Datasette, co-creator Django. Fellow at @JSKstanford. Usually hanging out with @natbat and @cleopaws. He/Him", 341 + "url": "https://t.co/wyNggeHZ8W", 342 + "entities": { 343 + "url": { 344 + "urls": [ 345 + { 346 + "url": "https://t.co/wyNggeHZ8W", 347 + "expanded_url": "https://simonwillison.net/", 348 + "display_url": "simonwillison.net", 349 + "indices": [ 350 + 0, 351 + 23 352 + ] 353 + } 354 + ] 355 + }, 356 + "description": { 357 + "urls": [] 358 + } 359 + }, 360 + "protected": false, 361 + "followers_count": 17754, 362 + "friends_count": 3460, 363 + "listed_count": 1230, 364 + "created_at": "Wed Nov 15 13:18:50 +0000 2006", 365 + "favourites_count": 21506, 366 + "utc_offset": null, 367 + "time_zone": null, 368 + "geo_enabled": true, 369 + "verified": true, 370 + "statuses_count": 17780, 371 + "lang": null, 372 + "contributors_enabled": false, 373 + "is_translator": false, 374 + "is_translation_enabled": false, 375 + "profile_background_color": "000000", 376 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 377 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 378 + "profile_background_tile": false, 379 + "profile_image_url": "http://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 380 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 381 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/12497/1347977147", 382 + "profile_link_color": "0000FF", 383 + "profile_sidebar_border_color": "FFFFFF", 384 + "profile_sidebar_fill_color": "FFFFFF", 385 + "profile_text_color": "000000", 386 + "profile_use_background_image": true, 387 + "has_extended_profile": true, 388 + "default_profile": false, 389 + "default_profile_image": false, 390 + "following": false, 391 + "follow_request_sent": false, 392 + "notifications": false, 393 + "translator_type": "regular" 394 + }, 395 + "geo": null, 396 + "coordinates": null, 397 + "place": null, 398 + "contributors": null, 399 + "is_quote_status": false, 400 + "retweet_count": 0, 401 + "favorite_count": 2, 402 + "favorited": false, 403 + "retweeted": false, 404 + "possibly_sensitive": false, 405 + "lang": "en" 406 + }, 407 + { 408 + "created_at": "Mon Sep 02 14:19:58 +0000 2019", 409 + "id": 1168529001599533000, 410 + "id_str": "1168529001599533057", 411 + "full_text": "Finally got around to running this script. It is BRILLIANT - it produces a concatenated .wav file of the audio from every live photo you've ever taken.\n\nNeeds quite a lot of disk space to run - the /tmp/picblast folder can take multiple GB https://t.co/AJNTJbhF0g", 412 + "truncated": false, 413 + "display_text_range": [ 414 + 0, 415 + 239 416 + ], 417 + "entities": { 418 + "hashtags": [], 419 + "symbols": [], 420 + "user_mentions": [], 421 + "urls": [ 422 + { 423 + "url": "https://t.co/AJNTJbhF0g", 424 + "expanded_url": "https://twitter.com/bwhitman/status/861696799362478085", 425 + "display_url": "twitter.com/bwhitman/statu…", 426 + "indices": [ 427 + 240, 428 + 263 429 + ] 430 + } 431 + ] 432 + }, 433 + "source": "<a href=\"https://mobile.twitter.com\" rel=\"nofollow\">Twitter Web App</a>", 434 + "in_reply_to_status_id": null, 435 + "in_reply_to_status_id_str": null, 436 + "in_reply_to_user_id": null, 437 + "in_reply_to_user_id_str": null, 438 + "in_reply_to_screen_name": null, 439 + "user": { 440 + "id": 12497, 441 + "id_str": "12497", 442 + "name": "Simon Willison", 443 + "screen_name": "simonw", 444 + "location": "San Francisco, CA", 445 + "description": "Creator of Datasette, co-creator Django. Fellow at @JSKstanford. Usually hanging out with @natbat and @cleopaws. He/Him", 446 + "url": "https://t.co/wyNggeHZ8W", 447 + "entities": { 448 + "url": { 449 + "urls": [ 450 + { 451 + "url": "https://t.co/wyNggeHZ8W", 452 + "expanded_url": "https://simonwillison.net/", 453 + "display_url": "simonwillison.net", 454 + "indices": [ 455 + 0, 456 + 23 457 + ] 458 + } 459 + ] 460 + }, 461 + "description": { 462 + "urls": [] 463 + } 464 + }, 465 + "protected": false, 466 + "followers_count": 17754, 467 + "friends_count": 3460, 468 + "listed_count": 1230, 469 + "created_at": "Wed Nov 15 13:18:50 +0000 2006", 470 + "favourites_count": 21506, 471 + "utc_offset": null, 472 + "time_zone": null, 473 + "geo_enabled": true, 474 + "verified": true, 475 + "statuses_count": 17780, 476 + "lang": null, 477 + "contributors_enabled": false, 478 + "is_translator": false, 479 + "is_translation_enabled": false, 480 + "profile_background_color": "000000", 481 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme1/bg.png", 482 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme1/bg.png", 483 + "profile_background_tile": false, 484 + "profile_image_url": "http://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 485 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/378800000261649705/be9cc55e64014e6d7663c50d7cb9fc75_normal.jpeg", 486 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/12497/1347977147", 487 + "profile_link_color": "0000FF", 488 + "profile_sidebar_border_color": "FFFFFF", 489 + "profile_sidebar_fill_color": "FFFFFF", 490 + "profile_text_color": "000000", 491 + "profile_use_background_image": true, 492 + "has_extended_profile": true, 493 + "default_profile": false, 494 + "default_profile_image": false, 495 + "following": false, 496 + "follow_request_sent": false, 497 + "notifications": false, 498 + "translator_type": "regular" 499 + }, 500 + "geo": null, 501 + "coordinates": null, 502 + "place": null, 503 + "contributors": null, 504 + "is_quote_status": true, 505 + "quoted_status_id": 861696799362478100, 506 + "quoted_status_id_str": "861696799362478085", 507 + "quoted_status_permalink": { 508 + "url": "https://t.co/AJNTJbhF0g", 509 + "expanded": "https://twitter.com/bwhitman/status/861696799362478085", 510 + "display": "twitter.com/bwhitman/statu…" 511 + }, 512 + "quoted_status": { 513 + "created_at": "Mon May 08 21:38:21 +0000 2017", 514 + "id": 861696799362478100, 515 + "id_str": "861696799362478085", 516 + "full_text": "If you use Photos (mac) &amp; Live Photos, run this command to generate a lovely sound collage of where you’ve been https://t.co/cEbhE4P3ZM", 517 + "truncated": false, 518 + "display_text_range": [ 519 + 0, 520 + 139 521 + ], 522 + "entities": { 523 + "hashtags": [], 524 + "symbols": [], 525 + "user_mentions": [], 526 + "urls": [ 527 + { 528 + "url": "https://t.co/cEbhE4P3ZM", 529 + "expanded_url": "https://gist.github.com/bwhitman/5be2f905556a25145dbac74fe4080739", 530 + "display_url": "gist.github.com/bwhitman/5be2f…", 531 + "indices": [ 532 + 116, 533 + 139 534 + ] 535 + } 536 + ] 537 + }, 538 + "source": "<a href=\"http://itunes.apple.com/us/app/twitter/id409789998?mt=12\" rel=\"nofollow\">Twitter for Mac</a>", 539 + "in_reply_to_status_id": null, 540 + "in_reply_to_status_id_str": null, 541 + "in_reply_to_user_id": null, 542 + "in_reply_to_user_id_str": null, 543 + "in_reply_to_screen_name": null, 544 + "user": { 545 + "id": 14148390, 546 + "id_str": "14148390", 547 + "name": "Brian Whitman", 548 + "screen_name": "bwhitman", 549 + "location": "Fort Greene NYC", 550 + "description": "finding the good @ourcanopy with the best people. was CTO/cofounder of Echo Nest, then research @ Spotify. always music", 551 + "url": "https://t.co/S9eq2BkZsn", 552 + "entities": { 553 + "url": { 554 + "urls": [ 555 + { 556 + "url": "https://t.co/S9eq2BkZsn", 557 + "expanded_url": "https://notes.variogr.am/about/", 558 + "display_url": "notes.variogr.am/about/", 559 + "indices": [ 560 + 0, 561 + 23 562 + ] 563 + } 564 + ] 565 + }, 566 + "description": { 567 + "urls": [] 568 + } 569 + }, 570 + "protected": false, 571 + "followers_count": 4300, 572 + "friends_count": 639, 573 + "listed_count": 235, 574 + "created_at": "Fri Mar 14 18:19:20 +0000 2008", 575 + "favourites_count": 8966, 576 + "utc_offset": null, 577 + "time_zone": null, 578 + "geo_enabled": true, 579 + "verified": false, 580 + "statuses_count": 2192, 581 + "lang": null, 582 + "contributors_enabled": false, 583 + "is_translator": false, 584 + "is_translation_enabled": false, 585 + "profile_background_color": "FFFFFF", 586 + "profile_background_image_url": "http://abs.twimg.com/images/themes/theme13/bg.gif", 587 + "profile_background_image_url_https": "https://abs.twimg.com/images/themes/theme13/bg.gif", 588 + "profile_background_tile": false, 589 + "profile_image_url": "http://pbs.twimg.com/profile_images/742302060/avatars-000000620200-z21ozh-crop_normal.jpeg", 590 + "profile_image_url_https": "https://pbs.twimg.com/profile_images/742302060/avatars-000000620200-z21ozh-crop_normal.jpeg", 591 + "profile_banner_url": "https://pbs.twimg.com/profile_banners/14148390/1398269147", 592 + "profile_link_color": "911A1A", 593 + "profile_sidebar_border_color": "EEEEEE", 594 + "profile_sidebar_fill_color": "FFFFFF", 595 + "profile_text_color": "333333", 596 + "profile_use_background_image": false, 597 + "has_extended_profile": true, 598 + "default_profile": false, 599 + "default_profile_image": false, 600 + "following": false, 601 + "follow_request_sent": false, 602 + "notifications": false, 603 + "translator_type": "none" 604 + }, 605 + "geo": null, 606 + "coordinates": null, 607 + "place": null, 608 + "contributors": null, 609 + "is_quote_status": false, 610 + "retweet_count": 14, 611 + "favorite_count": 57, 612 + "favorited": false, 613 + "retweeted": false, 614 + "possibly_sensitive": false, 615 + "lang": "en" 616 + }, 617 + "retweet_count": 4, 618 + "favorite_count": 31, 619 + "favorited": false, 620 + "retweeted": false, 621 + "possibly_sensitive": false, 622 + "lang": "en" 623 + } 624 + ]
+57 -1
twitter_to_sqlite/cli.py
··· 103 103 go(bar.update) 104 104 else: 105 105 go(lambda x: None) 106 - open("/tmp/all.json", "w").write(json.dumps(fetched, indent=4)) 106 + #open("/tmp/all.json", "w").write(json.dumps(fetched, indent=4)) 107 + 108 + 109 + @cli.command() 110 + @click.argument( 111 + "db_path", 112 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 113 + required=True, 114 + ) 115 + @click.option( 116 + "-a", 117 + "--auth", 118 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 119 + default="auth.json", 120 + help="Path to auth.json token file", 121 + ) 122 + @click.option("--user_id", help="Numeric user ID") 123 + @click.option("--screen_name", help="Screen name") 124 + def favorites(db_path, auth, user_id, screen_name): 125 + "Save tweets favorited by specified user" 126 + auth = json.load(open(auth)) 127 + session = utils.session_for_auth(auth) 128 + db = sqlite_utils.Database(db_path) 129 + with click.progressbar( 130 + utils.fetch_favorites(session, user_id, screen_name), 131 + label="Importing favorites", 132 + show_pos=True, 133 + ) as bar: 134 + utils.save_tweets(db, bar) 135 + 136 + 137 + @cli.command(name="user-timeline") 138 + @click.argument( 139 + "db_path", 140 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=False), 141 + required=True, 142 + ) 143 + @click.option( 144 + "-a", 145 + "--auth", 146 + type=click.Path(file_okay=True, dir_okay=False, allow_dash=True, exists=True), 147 + default="auth.json", 148 + help="Path to auth.json token file", 149 + ) 150 + @click.option("--user_id", help="Numeric user ID") 151 + @click.option("--screen_name", help="Screen name") 152 + def user_timeline(db_path, auth, user_id, screen_name): 153 + "Save tweets posted by specified user" 154 + auth = json.load(open(auth)) 155 + session = utils.session_for_auth(auth) 156 + db = sqlite_utils.Database(db_path) 157 + with click.progressbar( 158 + utils.fetch_user_timeline(session, user_id, screen_name), 159 + label="Importing tweets", 160 + show_pos=True, 161 + ) as bar: 162 + utils.save_tweets(db, bar)
+128 -11
twitter_to_sqlite/utils.py
··· 1 1 from requests_oauthlib import OAuth1Session 2 + from dateutil import parser 2 3 import time 4 + import json 3 5 import urllib.parse 4 6 5 7 ··· 12 14 ) 13 15 14 16 15 - def fetch_follower_chunks(session, user_id, screen_name): 17 + def fetch_follower_chunks(session, user_id, screen_name, sleep=61): 16 18 cursor = -1 17 19 users = [] 18 20 while cursor: ··· 21 23 cursor = body["next_cursor"] 22 24 if not cursor: 23 25 break 24 - time.sleep(61) # Rate limit = 15 per 15 minutes! 26 + time.sleep(sleep) # Rate limit = 15 per 15 minutes! 25 27 26 28 27 29 def fetch_followers(session, cursor, user_id, screen_name): 28 - args = {"count": 200, "cursor": cursor} 29 - if user_id: 30 - args["user_id"] = user_id 31 - if screen_name: 32 - args["screen_name"] = screen_name 30 + args = user_args(user_id, screen_name) 31 + args.update({"count": 200, "cursor": cursor}) 33 32 r = session.get( 34 33 "https://api.twitter.com/1.1/followers/list.json?" 35 34 + urllib.parse.urlencode(args) ··· 42 41 return session.get( 43 42 "https://api.twitter.com/1.1/account/verify_credentials.json" 44 43 ).json() 44 + args = user_args(user_id, screen_name) 45 + url = "https://api.twitter.com/1.1/users/show.json" 46 + if args: 47 + url += "?" + urllib.parse.urlencode(args) 48 + return session.get(url).json() 49 + 50 + 51 + def fetch_timeline(session, url, args, sleep=1): 52 + # See https://developer.twitter.com/en/docs/tweets/timelines/guides/working-with-timelines 53 + args = dict(args) 54 + args["count"] = 200 55 + args["tweet_mode"] = "extended" 56 + min_seen_id = None 57 + while True: 58 + if min_seen_id is not None: 59 + args["max_id"] = min_seen_id - 1 60 + tweets = session.get(url, params=args).json() 61 + if not tweets: 62 + break 63 + for tweet in tweets: 64 + yield tweet 65 + min_seen_id = min(t["id"] for t in tweets) 66 + time.sleep(sleep) 67 + 68 + 69 + def fetch_user_timeline(session, user_id, screen_name): 70 + args = user_args(user_id, screen_name) 71 + yield from fetch_timeline( 72 + session, 73 + "https://api.twitter.com/1.1/statuses/user_timeline.json", 74 + args, 75 + sleep=1, 76 + ) 77 + 78 + 79 + def fetch_favorites(session, user_id, screen_name): 80 + args = user_args(user_id, screen_name) 81 + # Rate limit 75/15 mins = 5/minute = every 12 seconds 82 + sleep = 12 83 + yield from fetch_timeline( 84 + session, "https://api.twitter.com/1.1/favorites/list.json", args, sleep=sleep 85 + ) 86 + 87 + 88 + def user_args(user_id, screen_name): 45 89 args = {} 46 90 if user_id: 47 91 args["user_id"] = user_id 48 92 if screen_name: 49 93 args["screen_name"] = screen_name 50 - url = "https://api.twitter.com/1.1/users/show.json" 51 - if args: 52 - url += "?" + urllib.parse.urlencode(args) 53 - return session.get(url).json() 94 + return args 95 + 96 + 97 + def save_tweet(db, tweet): 98 + pass 99 + 100 + 101 + def expand_entities(s, entities): 102 + for _, ents in entities.items(): 103 + for ent in ents: 104 + if "url" in ent: 105 + replacement = ent["expanded_url"] or ent["url"] 106 + s = s.replace(ent["url"], replacement) 107 + return s 108 + 109 + 110 + def transform_user(user): 111 + user["created_at"] = parser.parse(user["created_at"]) 112 + if user["description"] and "description" in user["entities"]: 113 + user["description"] = expand_entities( 114 + user["description"], user["entities"]["description"] 115 + ) 116 + if user["url"] and "url" in user["entities"]: 117 + user["url"] = expand_entities(user["url"], user["entities"]["url"]) 118 + user.pop("entities", None) 119 + user.pop("status", None) 120 + to_remove = [k for k in user if k.endswith("_str")] 121 + for key in to_remove: 122 + del user[key] 123 + 124 + 125 + def transform_tweet(tweet): 126 + tweet["full_text"] = expand_entities(tweet["full_text"], tweet.pop("entities")) 127 + to_remove = [k for k in tweet if k.endswith("_str")] + [ 128 + "quoted_status_id", 129 + "quoted_status_permalink", 130 + ] 131 + for key in to_remove: 132 + if key in tweet: 133 + del tweet[key] 134 + tweet["created_at"] = parser.parse(tweet["created_at"]).isoformat() 135 + 136 + 137 + def save_tweets(db, tweets): 138 + if "users" not in db.table_names(): 139 + db["users"].create({"id": int}, pk="id") 140 + if "tweets" not in db.table_names(): 141 + db["tweets"].create( 142 + { 143 + "id": int, 144 + "user": int, 145 + "created_at": str, 146 + "full_text": str, 147 + "retweeted_status": int, 148 + "quoted_status": int, 149 + }, 150 + pk="id", 151 + foreign_keys=(("user", "users", "id"),), 152 + ) 153 + db["tweets"].add_foreign_key("retweeted_status", "tweets") 154 + db["tweets"].add_foreign_key("quoted_status", "tweets") 155 + 156 + for tweet in tweets: 157 + transform_tweet(tweet) 158 + user = tweet.pop("user") 159 + transform_user(user) 160 + tweet["user"] = user["id"] 161 + # Deal with nested retweeted_status / quoted_status 162 + nested = [] 163 + for tweet_key in ("quoted_status", "retweeted_status"): 164 + if tweet.get(tweet_key): 165 + nested.append(tweet[tweet_key]) 166 + tweet[tweet_key] = tweet[tweet_key]["id"] 167 + if nested: 168 + save_tweets(db, nested) 169 + db["users"].upsert(user, pk="id", alter=True) 170 + db["tweets"].upsert_all(tweets, pk="id", alter=True)