twitwi 0.19.2__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/bluesky/__init__.py +0 -0
- test/bluesky/formatters_test.py +101 -0
- test/bluesky/normalizers_test.py +130 -0
- twitwi/__init__.py +19 -3
- twitwi/anonymizers.py +3 -9
- twitwi/bluesky/__init__.py +16 -0
- twitwi/bluesky/constants.py +19 -0
- twitwi/bluesky/formatters.py +29 -0
- twitwi/bluesky/normalizers.py +641 -0
- twitwi/bluesky/types.py +135 -0
- twitwi/bluesky/utils.py +103 -0
- twitwi/constants.py +324 -355
- twitwi/exceptions.py +8 -5
- twitwi/formatters.py +35 -37
- twitwi/normalizers.py +403 -339
- twitwi/utils.py +44 -17
- twitwi-0.21.0.dist-info/METADATA +435 -0
- twitwi-0.21.0.dist-info/RECORD +22 -0
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info}/WHEEL +1 -1
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info}/top_level.txt +1 -0
- twitwi/client_wrapper.py +0 -166
- twitwi-0.19.2.dist-info/METADATA +0 -146
- twitwi-0.19.2.dist-info/RECORD +0 -14
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info/licenses}/LICENSE.txt +0 -0
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info}/zip-safe +0 -0
twitwi/constants.py
CHANGED
|
@@ -4,432 +4,401 @@
|
|
|
4
4
|
#
|
|
5
5
|
# Useful constants used throughout the library.
|
|
6
6
|
#
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
SOURCE_DATETIME_FORMAT = "%a %b %d %H:%M:%S +0000 %Y"
|
|
8
|
+
SOURCE_DATETIME_FORMAT_V2 = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
9
|
+
SOURCE_DATETIME_FORMAT_V3 = "%Y-%m-%dT%H:%M:%SZ"
|
|
10
|
+
FORMATTED_TWEET_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
|
|
11
|
+
|
|
12
|
+
FORMATTED_FULL_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
|
|
10
13
|
|
|
11
14
|
# More details on Twitter's tweets metadata can be read here: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object
|
|
12
15
|
TWEET_FIELDS = [
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
# 'filter_level',
|
|
19
|
-
|
|
20
|
-
# 'withheld_copyright',
|
|
21
|
-
# 'withheld_scope',
|
|
22
|
-
# 'withheld_countries',
|
|
23
|
-
# 'truncated',
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# 'source',
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# 'user_utcoffset',
|
|
45
|
-
# 'user_timezone',
|
|
46
|
-
# 'user_lang',
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
16
|
+
"id", # digital ID
|
|
17
|
+
"timestamp_utc", # UNIX timestamp of creation - UTC time
|
|
18
|
+
"local_time", # ISO datetime of creation - local time
|
|
19
|
+
"user_screen_name", # author's user text ID (@user) (at collection time)
|
|
20
|
+
"text", # message's text content
|
|
21
|
+
# 'filter_level', # maximum value of the filter_level parameter which may be used and still stream this Tweet
|
|
22
|
+
"possibly_sensitive", # whether a link present in the message might contain sensitive content according to Twitter
|
|
23
|
+
# 'withheld_copyright', # whether the tweet might be censored by Twitter following copyright requests, ignorable
|
|
24
|
+
# 'withheld_scope', # whether the content withheld is the 'status' or a 'user', ignorable
|
|
25
|
+
# 'withheld_countries', # list of ISO country codes in which the message is withheld, separated by |, ignorable
|
|
26
|
+
# 'truncated', # whether the tweet is bigger than 140 characters, obsolete
|
|
27
|
+
"retweet_count", # number of retweets of the message (at collection time)
|
|
28
|
+
"like_count", # number of likes of the message (at collection time)
|
|
29
|
+
"reply_count", # number of answers to the message, dropped by Twitter (since Oct 17, now charged), unreliable and ignorable
|
|
30
|
+
"impression_count", # number of impressions generated by the message (at collection time)
|
|
31
|
+
"lang", # language of the message automatically identified by Twitter's algorithms (equals 'und' when no language could be detected)
|
|
32
|
+
"to_username", # text ID of the user the message is answering to
|
|
33
|
+
"to_userid", # digital ID of the user the message is answering to
|
|
34
|
+
"to_tweetid", # digital ID of the tweet the message is answering to
|
|
35
|
+
# 'source', # medium used by the user to post the message, now exported in source_name and source_url fields
|
|
36
|
+
"source_name", # name of the medium used to post the message
|
|
37
|
+
"source_url", # link to the medium used to post the message
|
|
38
|
+
"user_location", # location declared in the user's profile (at collection time)
|
|
39
|
+
"lat", # latitude of messages geolocalized
|
|
40
|
+
"lng", # longitude of messages geolocalized
|
|
41
|
+
"user_id", # author's user digital ID
|
|
42
|
+
"user_name", # author's detailed textual name (at collection time)
|
|
43
|
+
"user_verified", # whether the author's account is certified
|
|
44
|
+
"user_description", # description given in the author's profile (at collection time)
|
|
45
|
+
"user_url", # link to a website given in the author's profile (at collection time)
|
|
46
|
+
"user_image", # link to the image avatar of the author's profile (at collection time)
|
|
47
|
+
# 'user_utcoffset', # time offset due to the user's timezone, dropped by Twitter (since May 2018), ignorable
|
|
48
|
+
# 'user_timezone', # timezone declared in the user's profile, dropped by Twitter (since May 2018), ignorable
|
|
49
|
+
# 'user_lang', # language declared in the user's profile (at collection time), dropped by Twitter (since May 2019), ignorable
|
|
50
|
+
"user_tweets", # number of tweets sent by the user (at collection time)
|
|
51
|
+
"user_followers", # number of users following the author (at collection time)
|
|
52
|
+
"user_friends", # number of users the author is following (at collection time)
|
|
53
|
+
"user_likes", # number of likes the author has expressed (at collection time)
|
|
54
|
+
"user_lists", # number of users lists the author has been included in (at collection time)
|
|
55
|
+
"user_created_at", # ISO datetime of creation of the author's account
|
|
56
|
+
"user_timestamp_utc", # UNIX timestamp of creation of the author's account - UTC time
|
|
57
|
+
"collected_via", # How we received the message: 'stream', 'search', 'retweet' (the original tweet was
|
|
58
|
+
# contained in the retweet metadata), 'quote' (the original tweet was contained in
|
|
59
|
+
# the quote metadata), 'thread' (the tweet is part of the same conversation as a
|
|
60
|
+
# tweet collected via search or stream). If the message was collected via multiple
|
|
61
|
+
# ways, they are separated by |
|
|
62
|
+
"match_query", # whether the tweet was retrieved because it matches the query, or whether it was
|
|
63
|
+
# collected via 'quote' or 'thread'
|
|
64
|
+
"retweeted_id", # digital ID of the retweeted message
|
|
65
|
+
"retweeted_user", # text ID of the user who authored the retweeted message
|
|
66
|
+
"retweeted_user_id", # digital ID of the user who authoring the retweeted message
|
|
67
|
+
"retweeted_timestamp_utc", # UNIX timestamp of creation of the retweeted message - UTC time
|
|
68
|
+
"quoted_id", # digital ID of the retweeted message
|
|
69
|
+
"quoted_user", # text ID of the user who authored the quoted message
|
|
70
|
+
"quoted_user_id", # digital ID of the user who authoring the quoted message
|
|
71
|
+
"quoted_timestamp_utc", # UNIX timestamp of creation of the quoted message - UTC time
|
|
72
|
+
"collection_time", # ISO datetime of message collection - local time
|
|
73
|
+
"url", # url of the tweet (to get a view of the message directly on Twitter)
|
|
74
|
+
"place_country_code", # if the tweet has an associated 'place', country code of that place
|
|
75
|
+
"place_name", # if the tweet has an associated 'place', name of that place
|
|
76
|
+
"place_type", # if the tweet has an associated 'place', type of that place ('city', 'admin', etc.)
|
|
77
|
+
"place_coordinates", # if the tweet has an associated 'place', coordinates of that place, separated by |
|
|
78
|
+
"links", # list of links included in the text content, with redirections resolved, separated by |
|
|
79
|
+
"domains", # list of domain names in the links fields, separated by |
|
|
80
|
+
"media_urls", # list of links to images/videos embedded, separated by |
|
|
81
|
+
"media_files", # list of filenames of images/videos embedded and downloaded, separated by |, ignorable when medias collections isn't enabled
|
|
82
|
+
"media_types", # list of media types (photo, video, animated gif), separated by |
|
|
83
|
+
"media_alt_texts", # list of alternative texts (image descriptions), separated by |
|
|
84
|
+
"mentioned_names", # list of text IDs of users mentionned, separated by |
|
|
85
|
+
"mentioned_ids", # list of digital IDs of users mentionned, separated by |
|
|
86
|
+
"hashtags", # list of hashtags used, lowercased, separated by |
|
|
84
87
|
]
|
|
85
88
|
|
|
86
89
|
TWEET_FIELDS_TCAT = [
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
90
|
+
"id",
|
|
91
|
+
"time",
|
|
92
|
+
"created_at",
|
|
93
|
+
"from_user_name",
|
|
94
|
+
"text",
|
|
95
|
+
"filter_level",
|
|
96
|
+
"possibly_sensitive",
|
|
97
|
+
"withheld_copyright",
|
|
98
|
+
"withheld_scope",
|
|
99
|
+
"truncated",
|
|
100
|
+
"retweet_count",
|
|
101
|
+
"favorite_count",
|
|
102
|
+
"lang",
|
|
103
|
+
"to_user_name",
|
|
104
|
+
"in_reply_to_status_id",
|
|
105
|
+
"quoted_status_id",
|
|
106
|
+
"source",
|
|
107
|
+
"location",
|
|
108
|
+
"lat",
|
|
109
|
+
"lng",
|
|
110
|
+
"from_user_id",
|
|
111
|
+
"from_user_realname",
|
|
112
|
+
"from_user_verified",
|
|
113
|
+
"from_user_description",
|
|
114
|
+
"from_user_url",
|
|
115
|
+
"from_user_profile_image_url",
|
|
116
|
+
"from_user_utcoffset",
|
|
117
|
+
"from_user_timezone",
|
|
118
|
+
"from_user_lang",
|
|
119
|
+
"from_user_tweetcount",
|
|
120
|
+
"from_user_followercount",
|
|
121
|
+
"from_user_friendcount",
|
|
122
|
+
"from_user_favourites_count",
|
|
123
|
+
"from_user_listed",
|
|
124
|
+
"from_user_withheld_scope",
|
|
125
|
+
"from_user_created_at",
|
|
126
|
+
"urls",
|
|
127
|
+
"urls_expanded",
|
|
128
|
+
"urls_followed",
|
|
129
|
+
"domains",
|
|
130
|
+
"HTTP status code",
|
|
131
|
+
"media_id",
|
|
132
|
+
"media_urls",
|
|
133
|
+
"media_type",
|
|
134
|
+
"media_indice_start",
|
|
135
|
+
"media_indice_end",
|
|
136
|
+
"photo_sizes_width",
|
|
137
|
+
"photo_sizes_height",
|
|
138
|
+
"photo_resize",
|
|
139
|
+
"mentions",
|
|
140
|
+
"hashtags",
|
|
138
141
|
]
|
|
139
142
|
|
|
140
143
|
GAZOU_TO_TCAT = {
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
'media_types': 'media_type'
|
|
144
|
+
"identical_fields": {
|
|
145
|
+
"id": "id",
|
|
146
|
+
"timestamp_utc": "time",
|
|
147
|
+
"local_time": "created_at",
|
|
148
|
+
"user_screen_name": "from_user_name",
|
|
149
|
+
"text": "text",
|
|
150
|
+
"possibly_sensitive": "possibly_sensitive",
|
|
151
|
+
"retweet_count": "retweet_count",
|
|
152
|
+
"like_count": "favorite_count",
|
|
153
|
+
"lang": "lang",
|
|
154
|
+
"to_username": "to_user_name",
|
|
155
|
+
"to_userid": "to_user_id",
|
|
156
|
+
"to_tweetid": "in_reply_to_status_id",
|
|
157
|
+
"quoted_id": "quoted_status_id",
|
|
158
|
+
"user_location": "location",
|
|
159
|
+
"lat": "lat",
|
|
160
|
+
"lng": "lng",
|
|
161
|
+
"user_id": "from_user_id",
|
|
162
|
+
"user_name": "from_user_realname",
|
|
163
|
+
"user_verified": "from_user_verified",
|
|
164
|
+
"user_description": "from_user_description",
|
|
165
|
+
"user_url": "from_user_url",
|
|
166
|
+
"user_image": "from_user_profile_image_url",
|
|
167
|
+
"user_tweets": "from_user_tweetcount",
|
|
168
|
+
"user_followers": "from_user_followercount",
|
|
169
|
+
"user_friends": "from_user_friendcount",
|
|
170
|
+
"user_likes": "from_user_favourites_count",
|
|
171
|
+
"user_lists": "from_user_listed",
|
|
172
|
+
"user_created_at": "from_user_created_at",
|
|
173
|
+
"links": "urls_expanded",
|
|
174
|
+
"domains": "domains",
|
|
175
|
+
"mentioned_ids": "mentions",
|
|
176
|
+
"hashtags": "hashtags",
|
|
177
|
+
"media_urls": "media_urls",
|
|
178
|
+
"media_types": "media_type",
|
|
177
179
|
},
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
180
|
+
"modified_fields": ["source"],
|
|
181
|
+
"removed_fields": [
|
|
182
|
+
"filter_level",
|
|
183
|
+
"witheld_copyright",
|
|
184
|
+
"withheld_scope",
|
|
185
|
+
"truncated",
|
|
186
|
+
"from_user_utcoffset",
|
|
187
|
+
"from_user_timezone",
|
|
188
|
+
"from_user_lang",
|
|
189
|
+
"from_user_withheld_scope",
|
|
190
|
+
"urls",
|
|
191
|
+
"media_id",
|
|
192
|
+
"media_indice_start",
|
|
193
|
+
"media_indice_end",
|
|
194
|
+
"photo_sizes_width",
|
|
195
|
+
"photo_sizes_height",
|
|
196
|
+
"photo_resize",
|
|
181
197
|
],
|
|
182
|
-
|
|
183
|
-
'removed_fields': [
|
|
184
|
-
'filter_level',
|
|
185
|
-
'witheld_copyright',
|
|
186
|
-
'withheld_scope',
|
|
187
|
-
'truncated',
|
|
188
|
-
'from_user_utcoffset',
|
|
189
|
-
'from_user_timezone',
|
|
190
|
-
'from_user_lang',
|
|
191
|
-
'from_user_withheld_scope',
|
|
192
|
-
'urls',
|
|
193
|
-
'media_id',
|
|
194
|
-
'media_indice_start',
|
|
195
|
-
'media_indice_end',
|
|
196
|
-
'photo_sizes_width',
|
|
197
|
-
'photo_sizes_height',
|
|
198
|
-
'photo_resize',
|
|
199
|
-
]
|
|
200
198
|
}
|
|
201
199
|
|
|
202
200
|
TWEET_PLURAL_FIELDS = {
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
201
|
+
"links",
|
|
202
|
+
"urls_expanded",
|
|
203
|
+
"domains",
|
|
204
|
+
"hashtags",
|
|
205
|
+
"collected_via",
|
|
206
|
+
"media_urls",
|
|
207
|
+
"media_files",
|
|
208
|
+
"media_types",
|
|
209
|
+
"media_alt_texts",
|
|
210
|
+
"mentioned_names",
|
|
211
|
+
"mentioned_ids",
|
|
212
|
+
"mentions",
|
|
215
213
|
}
|
|
216
214
|
|
|
217
|
-
TWEET_BOOLEAN_FIELDS = {
|
|
218
|
-
'possibly_sensitive',
|
|
219
|
-
'user_verified',
|
|
220
|
-
'match_query'
|
|
221
|
-
}
|
|
215
|
+
TWEET_BOOLEAN_FIELDS = {"possibly_sensitive", "user_verified", "match_query"}
|
|
222
216
|
|
|
223
217
|
# More details on Twitter's users metadata can be read here: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/user-object
|
|
224
218
|
USER_FIELDS = [
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
# 'lang', # dropped from tweet objects only by Twitter (since May
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
# 'utc_offset', # dropped by Twitter (since May
|
|
234
|
-
# 'time_zone', # dropped by Twitter (since May
|
|
235
|
-
|
|
236
|
-
# 'geo_enabled', # dropped by Twitter (since May
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
# 'is_translator', # dropped by Twitter (since May
|
|
245
|
-
# 'translator_type', # dropped by Twitter (since May
|
|
246
|
-
# 'is_translation_enabled', # dropped by Twitter (since May
|
|
219
|
+
"id",
|
|
220
|
+
"screen_name",
|
|
221
|
+
"name",
|
|
222
|
+
"description",
|
|
223
|
+
"url",
|
|
224
|
+
# 'lang', # dropped from tweet objects only by Twitter (since May 2019)
|
|
225
|
+
"timestamp_utc",
|
|
226
|
+
"local_time",
|
|
227
|
+
# 'utc_offset', # dropped by Twitter (since May 2018), ignorable
|
|
228
|
+
# 'time_zone', # dropped by Twitter (since May 2018), ignorable
|
|
229
|
+
"location",
|
|
230
|
+
# 'geo_enabled', # dropped by Twitter (since May 2019), ignorable
|
|
231
|
+
"verified",
|
|
232
|
+
"protected",
|
|
233
|
+
"tweets",
|
|
234
|
+
"followers",
|
|
235
|
+
"friends",
|
|
236
|
+
"likes",
|
|
237
|
+
"lists",
|
|
238
|
+
# 'is_translator', # dropped by Twitter (since May 2019), ignorable
|
|
239
|
+
# 'translator_type', # dropped by Twitter (since May 2019), ignorable
|
|
240
|
+
# 'is_translation_enabled', # dropped by Twitter (since May 2019), ignorable
|
|
247
241
|
# 'default_profile',
|
|
248
242
|
# 'default_profile_image',
|
|
249
|
-
# 'has_extended_profile', # dropped by Twitter (since May
|
|
250
|
-
# 'profile_image_url', # dropped by Twitter (since May
|
|
251
|
-
|
|
243
|
+
# 'has_extended_profile', # dropped by Twitter (since May 2019), ignorable
|
|
244
|
+
# 'profile_image_url', # dropped by Twitter (since May 2019), ignorable
|
|
245
|
+
"image",
|
|
252
246
|
# 'profile_banner_url',
|
|
253
|
-
# 'profile_use_background_image', # dropped by Twitter (since May
|
|
254
|
-
# 'profile_background_image_url', # dropped by Twitter (since May
|
|
255
|
-
# 'profile_background_image_url_https', # dropped by Twitter (since May
|
|
256
|
-
# 'profile_background_tile', # dropped by Twitter (since May
|
|
257
|
-
# 'profile_background_color', # dropped by Twitter (since May
|
|
258
|
-
# 'profile_link_color', # dropped by Twitter (since May
|
|
259
|
-
# 'profile_text_color', # dropped by Twitter (since May
|
|
260
|
-
# 'profile_sidebar_fill_color', # dropped by Twitter (since May
|
|
261
|
-
# 'profile_sidebar_border_color' # dropped by Twitter (since May
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
247
|
+
# 'profile_use_background_image', # dropped by Twitter (since May 2019), ignorable
|
|
248
|
+
# 'profile_background_image_url', # dropped by Twitter (since May 2019), ignorable
|
|
249
|
+
# 'profile_background_image_url_https', # dropped by Twitter (since May 2019), ignorable
|
|
250
|
+
# 'profile_background_tile', # dropped by Twitter (since May 2019), ignorable
|
|
251
|
+
# 'profile_background_color', # dropped by Twitter (since May 2019), ignorable
|
|
252
|
+
# 'profile_link_color', # dropped by Twitter (since May 2019), ignorable
|
|
253
|
+
# 'profile_text_color', # dropped by Twitter (since May 2019), ignorable
|
|
254
|
+
# 'profile_sidebar_fill_color', # dropped by Twitter (since May 2019), ignorable
|
|
255
|
+
# 'profile_sidebar_border_color' # dropped by Twitter (since May 2019), ignorable
|
|
256
|
+
"default_profile",
|
|
257
|
+
"default_profile_image",
|
|
258
|
+
"witheld_in_countries",
|
|
259
|
+
"witheld_scope",
|
|
266
260
|
]
|
|
267
261
|
|
|
268
|
-
USER_PLURAL_FIELDS = {
|
|
269
|
-
'witheld_in_countries'
|
|
270
|
-
}
|
|
262
|
+
USER_PLURAL_FIELDS = {"witheld_in_countries"}
|
|
271
263
|
|
|
272
264
|
USER_BOOLEAN_FIELDS = {
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
265
|
+
"verified",
|
|
266
|
+
"protected",
|
|
267
|
+
"default_profile",
|
|
268
|
+
"default_profile_image",
|
|
277
269
|
}
|
|
278
270
|
|
|
279
271
|
CANONICAL_URL_KWARGS = {
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
272
|
+
"strip_authentication": False,
|
|
273
|
+
"strip_trailing_slash": False,
|
|
274
|
+
"strip_protocol": False,
|
|
275
|
+
"strip_irrelevant_subdomains": False,
|
|
276
|
+
"strip_fragment": False,
|
|
277
|
+
"normalize_amp": False,
|
|
278
|
+
"fix_common_mistakes": False,
|
|
279
|
+
"infer_redirection": False,
|
|
280
|
+
"quoted": True,
|
|
289
281
|
}
|
|
290
282
|
|
|
291
|
-
CANONICAL_HOSTNAME_KWARGS = {
|
|
292
|
-
'normalize_amp': False,
|
|
293
|
-
'infer_redirection': False
|
|
294
|
-
}
|
|
283
|
+
CANONICAL_HOSTNAME_KWARGS = {"normalize_amp": False, "infer_redirection": False}
|
|
295
284
|
|
|
296
285
|
# API v2 constants
|
|
297
286
|
TWEET_FIELDS_V2 = {
|
|
298
|
-
|
|
299
|
-
|
|
287
|
+
"attachments",
|
|
288
|
+
"author_id",
|
|
300
289
|
# NOTE: (2023-04-26) dropping this because we don't use it and it prevents us
|
|
301
290
|
# from being able to get 500 tweets per call using academic v2 API.
|
|
302
291
|
# 'context_annotations',
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
292
|
+
"conversation_id",
|
|
293
|
+
"created_at",
|
|
294
|
+
"entities",
|
|
295
|
+
"geo",
|
|
296
|
+
"id",
|
|
297
|
+
"in_reply_to_user_id",
|
|
298
|
+
"lang",
|
|
299
|
+
"possibly_sensitive",
|
|
300
|
+
"public_metrics",
|
|
301
|
+
"referenced_tweets",
|
|
302
|
+
"reply_settings",
|
|
303
|
+
"source",
|
|
304
|
+
"text",
|
|
305
|
+
"withheld",
|
|
317
306
|
}
|
|
318
307
|
|
|
319
308
|
MEDIA_FIELDS = {
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
309
|
+
"media_key",
|
|
310
|
+
"type",
|
|
311
|
+
"duration_ms",
|
|
312
|
+
"height",
|
|
313
|
+
"preview_image_url",
|
|
314
|
+
"public_metrics",
|
|
315
|
+
"width",
|
|
316
|
+
"alt_text",
|
|
317
|
+
"url",
|
|
318
|
+
"variants",
|
|
330
319
|
}
|
|
331
320
|
|
|
332
|
-
POLL_FIELDS = {
|
|
333
|
-
'id',
|
|
334
|
-
'options',
|
|
335
|
-
'duration_minutes',
|
|
336
|
-
'end_datetime',
|
|
337
|
-
'voting_status'
|
|
338
|
-
}
|
|
321
|
+
POLL_FIELDS = {"id", "options", "duration_minutes", "end_datetime", "voting_status"}
|
|
339
322
|
|
|
340
323
|
PLACE_FIELDS = {
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
324
|
+
"full_name",
|
|
325
|
+
"id",
|
|
326
|
+
"contained_within",
|
|
327
|
+
"country",
|
|
328
|
+
"country_code",
|
|
329
|
+
"geo",
|
|
330
|
+
"name",
|
|
331
|
+
"place_type",
|
|
349
332
|
}
|
|
350
333
|
|
|
351
334
|
USER_FIELDS_V2 = {
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
335
|
+
"id",
|
|
336
|
+
"name",
|
|
337
|
+
"username",
|
|
338
|
+
"created_at",
|
|
339
|
+
"description",
|
|
340
|
+
"entities",
|
|
341
|
+
"location",
|
|
342
|
+
"pinned_tweet_id",
|
|
343
|
+
"profile_image_url",
|
|
344
|
+
"protected",
|
|
345
|
+
"public_metrics",
|
|
346
|
+
"url",
|
|
347
|
+
"verified",
|
|
348
|
+
"withheld",
|
|
366
349
|
}
|
|
367
350
|
|
|
368
351
|
TWEET_EXPANSIONS = {
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
352
|
+
"author_id",
|
|
353
|
+
"referenced_tweets.id",
|
|
354
|
+
"in_reply_to_user_id",
|
|
355
|
+
"attachments.media_keys",
|
|
356
|
+
"attachments.poll_ids",
|
|
357
|
+
"geo.place_id",
|
|
358
|
+
"entities.mentions.username",
|
|
359
|
+
"referenced_tweets.id.author_id",
|
|
377
360
|
}
|
|
378
361
|
|
|
379
362
|
TWEET_PARAMS = {
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
363
|
+
"tweet.fields": ",".join(TWEET_FIELDS_V2),
|
|
364
|
+
"media.fields": ",".join(MEDIA_FIELDS),
|
|
365
|
+
"poll.fields": ",".join(POLL_FIELDS),
|
|
366
|
+
"place.fields": ",".join(PLACE_FIELDS),
|
|
367
|
+
"user.fields": ",".join(USER_FIELDS_V2),
|
|
385
368
|
}
|
|
386
369
|
|
|
387
|
-
USER_EXPANSIONS = {
|
|
388
|
-
'pinned_tweet_id'
|
|
389
|
-
}
|
|
370
|
+
USER_EXPANSIONS = {"pinned_tweet_id"}
|
|
390
371
|
|
|
391
372
|
USER_PARAMS = {
|
|
392
|
-
|
|
393
|
-
|
|
373
|
+
"user.fields": ",".join(USER_FIELDS_V2),
|
|
374
|
+
"tweet.fields": ",".join(TWEET_FIELDS_V2),
|
|
394
375
|
}
|
|
395
376
|
|
|
396
377
|
# Lists
|
|
397
378
|
|
|
398
379
|
LIST_FIELDS = {
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
380
|
+
"created_at",
|
|
381
|
+
"follower_count",
|
|
382
|
+
"member_count",
|
|
383
|
+
"private",
|
|
384
|
+
"description",
|
|
385
|
+
"owner_id",
|
|
405
386
|
}
|
|
406
387
|
|
|
407
|
-
LIST_EXPANSIONS = {
|
|
408
|
-
'owner_id'
|
|
409
|
-
}
|
|
388
|
+
LIST_EXPANSIONS = {"owner_id"}
|
|
410
389
|
|
|
411
390
|
LIST_PARAMS = {
|
|
412
|
-
|
|
413
|
-
|
|
391
|
+
"list.fields": ",".join(LIST_FIELDS),
|
|
392
|
+
"user.fields": ",".join(USER_FIELDS_V2),
|
|
414
393
|
}
|
|
415
394
|
|
|
416
|
-
LIST_TWEETS_EXPANSIONS = {
|
|
417
|
-
'author_id'
|
|
418
|
-
}
|
|
395
|
+
LIST_TWEETS_EXPANSIONS = {"author_id"}
|
|
419
396
|
|
|
420
|
-
LIST_MEMBERS_EXPANSIONS = {
|
|
421
|
-
'pinned_tweet_id'
|
|
422
|
-
}
|
|
397
|
+
LIST_MEMBERS_EXPANSIONS = {"pinned_tweet_id"}
|
|
423
398
|
|
|
424
399
|
LIST_TWEETS_OR_MEMBERS_PARAMS = {
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
}
|
|
428
|
-
|
|
429
|
-
APP_ONLY_ROUTES = {
|
|
430
|
-
'tweets/counts/recent',
|
|
431
|
-
'tweets/counts/all',
|
|
432
|
-
'tweets/search/all'
|
|
400
|
+
"tweet.fields": ",".join(TWEET_FIELDS_V2),
|
|
401
|
+
"user.fields": ",".join(USER_FIELDS_V2),
|
|
433
402
|
}
|
|
434
403
|
|
|
435
404
|
PRE_SNOWFLAKE_LAST_TWEET_ID = 29700859247
|