twitwi 0.20.0__py3-none-any.whl → 0.21.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
twitwi/constants.py CHANGED
@@ -4,426 +4,400 @@
4
4
  #
5
5
  # Useful constants used throughout the library.
6
6
  #
7
- TWEET_DATETIME_FORMAT = '%a %b %d %H:%M:%S +0000 %Y'
8
- TWEET_DATETIME_FORMAT_V2 = '%Y-%m-%dT%H:%M:%S.%fZ'
9
- FORMATTED_TWEET_DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%S'
7
+ SOURCE_DATETIME_FORMAT = "%a %b %d %H:%M:%S +0000 %Y"
8
+ SOURCE_DATETIME_FORMAT_V2 = "%Y-%m-%dT%H:%M:%S.%fZ"
9
+ FORMATTED_TWEET_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
10
+
11
+ FORMATTED_FULL_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
10
12
 
11
13
  # More details on Twitter's tweets metadata can be read here: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object
12
14
  TWEET_FIELDS = [
13
- 'id', # digital ID
14
- 'timestamp_utc', # UNIX timestamp of creation - UTC time
15
- 'local_time', # ISO datetime of creation - local time
16
- 'user_screen_name', # author's user text ID (@user) (at collection time)
17
- 'text', # message's text content
18
- # 'filter_level', # maximum value of the filter_level parameter which may be used and still stream this Tweet
19
- 'possibly_sensitive', # whether a link present in the message might contain sensitive content according to Twitter
20
- # 'withheld_copyright', # whether the tweet might be censored by Twitter following copyright requests, ignorable
21
- # 'withheld_scope', # whether the content withheld is the 'status' or a 'user', ignorable
22
- # 'withheld_countries', # list of ISO country codes in which the message is withheld, separated by |, ignorable
23
- # 'truncated', # whether the tweet is bigger than 140 characters, obsolete
24
- 'retweet_count', # number of retweets of the message (at collection time)
25
- 'like_count', # number of likes of the message (at collection time)
26
- 'reply_count', # number of answers to the message, dropped by Twitter (since Oct 17, now charged), unreliable and ignorable
27
- 'impression_count', # number of impressions generated by the message (at collection time)
28
- 'lang', # language of the message automatically identified by Twitter's algorithms (equals 'und' when no language could be detected)
29
- 'to_username', # text ID of the user the message is answering to
30
- 'to_userid', # digital ID of the user the message is answering to
31
- 'to_tweetid', # digital ID of the tweet the message is answering to
32
- # 'source', # medium used by the user to post the message, now exported in source_name and source_url fields
33
- 'source_name', # name of the medium used to post the message
34
- 'source_url', # link to the medium used to post the message
35
- 'user_location', # location declared in the user's profile (at collection time)
36
- 'lat', # latitude of messages geolocalized
37
- 'lng', # longitude of messages geolocalized
38
- 'user_id', # author's user digital ID
39
- 'user_name', # author's detailed textual name (at collection time)
40
- 'user_verified', # whether the author's account is certified
41
- 'user_description', # description given in the author's profile (at collection time)
42
- 'user_url', # link to a website given in the author's profile (at collection time)
43
- 'user_image', # link to the image avatar of the author's profile (at collection time)
44
- # 'user_utcoffset', # time offset due to the user's timezone, dropped by Twitter (since May 18), ignorable
45
- # 'user_timezone', # timezone declared in the user's profile, dropped by Twitter (since May 18), ignorable
46
- # 'user_lang', # language declared in the user's profile (at collection time), dropped by Twitter (since May 19), ignorable
47
- 'user_tweets', # number of tweets sent by the user (at collection time)
48
- 'user_followers', # number of users following the author (at collection time)
49
- 'user_friends', # number of users the author is following (at collection time)
50
- 'user_likes', # number of likes the author has expressed (at collection time)
51
- 'user_lists', # number of users lists the author has been included in (at collection time)
52
- 'user_created_at', # ISO datetime of creation of the author's account
53
- 'user_timestamp_utc', # UNIX timestamp of creation of the author's account - UTC time
54
- 'collected_via', # How we received the message: 'stream', 'search', 'retweet' (the original tweet was
55
- # contained in the retweet metadata), 'quote' (the original tweet was contained in
56
- # the quote metadata), 'thread' (the tweet is part of the same conversation as a
57
- # tweet collected via search or stream). If the message was collected via multiple
58
- # ways, they are separated by |
59
- 'match_query', # whether the tweet was retrieved because it matches the query, or whether it was
60
- # collected via 'quote' or 'thread'
61
- 'retweeted_id', # digital ID of the retweeted message
62
- 'retweeted_user', # text ID of the user who authored the retweeted message
63
- 'retweeted_user_id', # digital ID of the user who authoring the retweeted message
64
- 'retweeted_timestamp_utc', # UNIX timestamp of creation of the retweeted message - UTC time
65
- 'quoted_id', # digital ID of the retweeted message
66
- 'quoted_user', # text ID of the user who authored the quoted message
67
- 'quoted_user_id', # digital ID of the user who authoring the quoted message
68
- 'quoted_timestamp_utc', # UNIX timestamp of creation of the quoted message - UTC time
69
- 'collection_time', # ISO datetime of message collection - local time
70
- 'url', # url of the tweet (to get a view of the message directly on Twitter)
71
- 'place_country_code', # if the tweet has an associated 'place', country code of that place
72
- 'place_name', # if the tweet has an associated 'place', name of that place
73
- 'place_type', # if the tweet has an associated 'place', type of that place ('city', 'admin', etc.)
74
- 'place_coordinates', # if the tweet has an associated 'place', coordinates of that place, separated by |
75
- 'links', # list of links included in the text content, with redirections resolved, separated by |
76
- 'domains', # list of domain names in the links fields, separated by |
77
- 'media_urls', # list of links to images/videos embedded, separated by |
78
- 'media_files', # list of filenames of images/videos embedded and downloaded, separated by |, ignorable when medias collections isn't enabled
79
- 'media_types', # list of media types (photo, video, animated gif), separated by |
80
- 'media_alt_texts', # list of alternative texts (image descriptions), separated by |
81
- 'mentioned_names', # list of text IDs of users mentionned, separated by |
82
- 'mentioned_ids', # list of digital IDs of users mentionned, separated by |
83
- 'hashtags' # list of hashtags used, lowercased, separated by |
15
+ "id", # digital ID
16
+ "timestamp_utc", # UNIX timestamp of creation - UTC time
17
+ "local_time", # ISO datetime of creation - local time
18
+ "user_screen_name", # author's user text ID (@user) (at collection time)
19
+ "text", # message's text content
20
+ # 'filter_level', # maximum value of the filter_level parameter which may be used and still stream this Tweet
21
+ "possibly_sensitive", # whether a link present in the message might contain sensitive content according to Twitter
22
+ # 'withheld_copyright', # whether the tweet might be censored by Twitter following copyright requests, ignorable
23
+ # 'withheld_scope', # whether the content withheld is the 'status' or a 'user', ignorable
24
+ # 'withheld_countries', # list of ISO country codes in which the message is withheld, separated by |, ignorable
25
+ # 'truncated', # whether the tweet is bigger than 140 characters, obsolete
26
+ "retweet_count", # number of retweets of the message (at collection time)
27
+ "like_count", # number of likes of the message (at collection time)
28
+ "reply_count", # number of answers to the message, dropped by Twitter (since Oct 17, now charged), unreliable and ignorable
29
+ "impression_count", # number of impressions generated by the message (at collection time)
30
+ "lang", # language of the message automatically identified by Twitter's algorithms (equals 'und' when no language could be detected)
31
+ "to_username", # text ID of the user the message is answering to
32
+ "to_userid", # digital ID of the user the message is answering to
33
+ "to_tweetid", # digital ID of the tweet the message is answering to
34
+ # 'source', # medium used by the user to post the message, now exported in source_name and source_url fields
35
+ "source_name", # name of the medium used to post the message
36
+ "source_url", # link to the medium used to post the message
37
+ "user_location", # location declared in the user's profile (at collection time)
38
+ "lat", # latitude of messages geolocalized
39
+ "lng", # longitude of messages geolocalized
40
+ "user_id", # author's user digital ID
41
+ "user_name", # author's detailed textual name (at collection time)
42
+ "user_verified", # whether the author's account is certified
43
+ "user_description", # description given in the author's profile (at collection time)
44
+ "user_url", # link to a website given in the author's profile (at collection time)
45
+ "user_image", # link to the image avatar of the author's profile (at collection time)
46
+ # 'user_utcoffset', # time offset due to the user's timezone, dropped by Twitter (since May 2018), ignorable
47
+ # 'user_timezone', # timezone declared in the user's profile, dropped by Twitter (since May 2018), ignorable
48
+ # 'user_lang', # language declared in the user's profile (at collection time), dropped by Twitter (since May 2019), ignorable
49
+ "user_tweets", # number of tweets sent by the user (at collection time)
50
+ "user_followers", # number of users following the author (at collection time)
51
+ "user_friends", # number of users the author is following (at collection time)
52
+ "user_likes", # number of likes the author has expressed (at collection time)
53
+ "user_lists", # number of users lists the author has been included in (at collection time)
54
+ "user_created_at", # ISO datetime of creation of the author's account
55
+ "user_timestamp_utc", # UNIX timestamp of creation of the author's account - UTC time
56
+ "collected_via", # How we received the message: 'stream', 'search', 'retweet' (the original tweet was
57
+ # contained in the retweet metadata), 'quote' (the original tweet was contained in
58
+ # the quote metadata), 'thread' (the tweet is part of the same conversation as a
59
+ # tweet collected via search or stream). If the message was collected via multiple
60
+ # ways, they are separated by |
61
+ "match_query", # whether the tweet was retrieved because it matches the query, or whether it was
62
+ # collected via 'quote' or 'thread'
63
+ "retweeted_id", # digital ID of the retweeted message
64
+ "retweeted_user", # text ID of the user who authored the retweeted message
65
+ "retweeted_user_id", # digital ID of the user who authoring the retweeted message
66
+ "retweeted_timestamp_utc", # UNIX timestamp of creation of the retweeted message - UTC time
67
+ "quoted_id", # digital ID of the retweeted message
68
+ "quoted_user", # text ID of the user who authored the quoted message
69
+ "quoted_user_id", # digital ID of the user who authoring the quoted message
70
+ "quoted_timestamp_utc", # UNIX timestamp of creation of the quoted message - UTC time
71
+ "collection_time", # ISO datetime of message collection - local time
72
+ "url", # url of the tweet (to get a view of the message directly on Twitter)
73
+ "place_country_code", # if the tweet has an associated 'place', country code of that place
74
+ "place_name", # if the tweet has an associated 'place', name of that place
75
+ "place_type", # if the tweet has an associated 'place', type of that place ('city', 'admin', etc.)
76
+ "place_coordinates", # if the tweet has an associated 'place', coordinates of that place, separated by |
77
+ "links", # list of links included in the text content, with redirections resolved, separated by |
78
+ "domains", # list of domain names in the links fields, separated by |
79
+ "media_urls", # list of links to images/videos embedded, separated by |
80
+ "media_files", # list of filenames of images/videos embedded and downloaded, separated by |, ignorable when medias collections isn't enabled
81
+ "media_types", # list of media types (photo, video, animated gif), separated by |
82
+ "media_alt_texts", # list of alternative texts (image descriptions), separated by |
83
+ "mentioned_names", # list of text IDs of users mentionned, separated by |
84
+ "mentioned_ids", # list of digital IDs of users mentionned, separated by |
85
+ "hashtags", # list of hashtags used, lowercased, separated by |
84
86
  ]
85
87
 
86
88
  TWEET_FIELDS_TCAT = [
87
- 'id',
88
- 'time',
89
- 'created_at',
90
- 'from_user_name',
91
- 'text',
92
- 'filter_level',
93
- 'possibly_sensitive',
94
- 'withheld_copyright',
95
- 'withheld_scope',
96
- 'truncated',
97
- 'retweet_count',
98
- 'favorite_count',
99
- 'lang',
100
- 'to_user_name',
101
- 'in_reply_to_status_id',
102
- 'quoted_status_id',
103
- 'source',
104
- 'location',
105
- 'lat',
106
- 'lng',
107
- 'from_user_id',
108
- 'from_user_realname',
109
- 'from_user_verified',
110
- 'from_user_description',
111
- 'from_user_url',
112
- 'from_user_profile_image_url',
113
- 'from_user_utcoffset',
114
- 'from_user_timezone',
115
- 'from_user_lang',
116
- 'from_user_tweetcount',
117
- 'from_user_followercount',
118
- 'from_user_friendcount',
119
- 'from_user_favourites_count',
120
- 'from_user_listed',
121
- 'from_user_withheld_scope',
122
- 'from_user_created_at',
123
- 'urls',
124
- 'urls_expanded',
125
- 'urls_followed',
126
- 'domains',
127
- 'HTTP status code',
128
- 'media_id',
129
- 'media_urls',
130
- 'media_type',
131
- 'media_indice_start',
132
- 'media_indice_end',
133
- 'photo_sizes_width',
134
- 'photo_sizes_height',
135
- 'photo_resize',
136
- 'mentions',
137
- 'hashtags'
89
+ "id",
90
+ "time",
91
+ "created_at",
92
+ "from_user_name",
93
+ "text",
94
+ "filter_level",
95
+ "possibly_sensitive",
96
+ "withheld_copyright",
97
+ "withheld_scope",
98
+ "truncated",
99
+ "retweet_count",
100
+ "favorite_count",
101
+ "lang",
102
+ "to_user_name",
103
+ "in_reply_to_status_id",
104
+ "quoted_status_id",
105
+ "source",
106
+ "location",
107
+ "lat",
108
+ "lng",
109
+ "from_user_id",
110
+ "from_user_realname",
111
+ "from_user_verified",
112
+ "from_user_description",
113
+ "from_user_url",
114
+ "from_user_profile_image_url",
115
+ "from_user_utcoffset",
116
+ "from_user_timezone",
117
+ "from_user_lang",
118
+ "from_user_tweetcount",
119
+ "from_user_followercount",
120
+ "from_user_friendcount",
121
+ "from_user_favourites_count",
122
+ "from_user_listed",
123
+ "from_user_withheld_scope",
124
+ "from_user_created_at",
125
+ "urls",
126
+ "urls_expanded",
127
+ "urls_followed",
128
+ "domains",
129
+ "HTTP status code",
130
+ "media_id",
131
+ "media_urls",
132
+ "media_type",
133
+ "media_indice_start",
134
+ "media_indice_end",
135
+ "photo_sizes_width",
136
+ "photo_sizes_height",
137
+ "photo_resize",
138
+ "mentions",
139
+ "hashtags",
138
140
  ]
139
141
 
140
142
  GAZOU_TO_TCAT = {
141
-
142
- 'identical_fields': {
143
- 'id': 'id',
144
- 'timestamp_utc': 'time',
145
- 'local_time': 'created_at',
146
- 'user_screen_name': 'from_user_name',
147
- 'text': 'text',
148
- 'possibly_sensitive': 'possibly_sensitive',
149
- 'retweet_count': 'retweet_count',
150
- 'like_count': 'favorite_count',
151
- 'lang': 'lang',
152
- 'to_username': 'to_user_name',
153
- 'to_userid': 'to_user_id',
154
- 'to_tweetid': 'in_reply_to_status_id',
155
- 'quoted_id': 'quoted_status_id',
156
- 'user_location': 'location',
157
- 'lat': 'lat',
158
- 'lng': 'lng',
159
- 'user_id': 'from_user_id',
160
- 'user_name': 'from_user_realname',
161
- 'user_verified': 'from_user_verified',
162
- 'user_description': 'from_user_description',
163
- 'user_url': 'from_user_url',
164
- 'user_image': 'from_user_profile_image_url',
165
- 'user_tweets': 'from_user_tweetcount',
166
- 'user_followers': 'from_user_followercount',
167
- 'user_friends': 'from_user_friendcount',
168
- 'user_likes': 'from_user_favourites_count',
169
- 'user_lists': 'from_user_listed',
170
- 'user_created_at': 'from_user_created_at',
171
- 'links': 'urls_expanded',
172
- 'domains': 'domains',
173
- 'mentioned_ids': 'mentions',
174
- 'hashtags': 'hashtags',
175
- 'media_urls': 'media_urls',
176
- 'media_types': 'media_type'
143
+ "identical_fields": {
144
+ "id": "id",
145
+ "timestamp_utc": "time",
146
+ "local_time": "created_at",
147
+ "user_screen_name": "from_user_name",
148
+ "text": "text",
149
+ "possibly_sensitive": "possibly_sensitive",
150
+ "retweet_count": "retweet_count",
151
+ "like_count": "favorite_count",
152
+ "lang": "lang",
153
+ "to_username": "to_user_name",
154
+ "to_userid": "to_user_id",
155
+ "to_tweetid": "in_reply_to_status_id",
156
+ "quoted_id": "quoted_status_id",
157
+ "user_location": "location",
158
+ "lat": "lat",
159
+ "lng": "lng",
160
+ "user_id": "from_user_id",
161
+ "user_name": "from_user_realname",
162
+ "user_verified": "from_user_verified",
163
+ "user_description": "from_user_description",
164
+ "user_url": "from_user_url",
165
+ "user_image": "from_user_profile_image_url",
166
+ "user_tweets": "from_user_tweetcount",
167
+ "user_followers": "from_user_followercount",
168
+ "user_friends": "from_user_friendcount",
169
+ "user_likes": "from_user_favourites_count",
170
+ "user_lists": "from_user_listed",
171
+ "user_created_at": "from_user_created_at",
172
+ "links": "urls_expanded",
173
+ "domains": "domains",
174
+ "mentioned_ids": "mentions",
175
+ "hashtags": "hashtags",
176
+ "media_urls": "media_urls",
177
+ "media_types": "media_type",
177
178
  },
178
-
179
- 'modified_fields': [
180
- 'source'
179
+ "modified_fields": ["source"],
180
+ "removed_fields": [
181
+ "filter_level",
182
+ "witheld_copyright",
183
+ "withheld_scope",
184
+ "truncated",
185
+ "from_user_utcoffset",
186
+ "from_user_timezone",
187
+ "from_user_lang",
188
+ "from_user_withheld_scope",
189
+ "urls",
190
+ "media_id",
191
+ "media_indice_start",
192
+ "media_indice_end",
193
+ "photo_sizes_width",
194
+ "photo_sizes_height",
195
+ "photo_resize",
181
196
  ],
182
-
183
- 'removed_fields': [
184
- 'filter_level',
185
- 'witheld_copyright',
186
- 'withheld_scope',
187
- 'truncated',
188
- 'from_user_utcoffset',
189
- 'from_user_timezone',
190
- 'from_user_lang',
191
- 'from_user_withheld_scope',
192
- 'urls',
193
- 'media_id',
194
- 'media_indice_start',
195
- 'media_indice_end',
196
- 'photo_sizes_width',
197
- 'photo_sizes_height',
198
- 'photo_resize',
199
- ]
200
197
  }
201
198
 
202
199
  TWEET_PLURAL_FIELDS = {
203
- 'links',
204
- 'urls_expanded',
205
- 'domains',
206
- 'hashtags',
207
- 'collected_via',
208
- 'media_urls',
209
- 'media_files',
210
- 'media_types',
211
- 'media_alt_texts',
212
- 'mentioned_names',
213
- 'mentioned_ids',
214
- 'mentions'
200
+ "links",
201
+ "urls_expanded",
202
+ "domains",
203
+ "hashtags",
204
+ "collected_via",
205
+ "media_urls",
206
+ "media_files",
207
+ "media_types",
208
+ "media_alt_texts",
209
+ "mentioned_names",
210
+ "mentioned_ids",
211
+ "mentions",
215
212
  }
216
213
 
217
- TWEET_BOOLEAN_FIELDS = {
218
- 'possibly_sensitive',
219
- 'user_verified',
220
- 'match_query'
221
- }
214
+ TWEET_BOOLEAN_FIELDS = {"possibly_sensitive", "user_verified", "match_query"}
222
215
 
223
216
  # More details on Twitter's users metadata can be read here: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/user-object
224
217
  USER_FIELDS = [
225
- 'id',
226
- 'screen_name',
227
- 'name',
228
- 'description',
229
- 'url',
230
- # 'lang', # dropped from tweet objects only by Twitter (since May 19)
231
- 'timestamp_utc',
232
- 'local_time',
233
- # 'utc_offset', # dropped by Twitter (since May 18), ignorable
234
- # 'time_zone', # dropped by Twitter (since May 18), ignorable
235
- 'location',
236
- # 'geo_enabled', # dropped by Twitter (since May 19), ignorable
237
- 'verified',
238
- 'protected',
239
- 'tweets',
240
- 'followers',
241
- 'friends',
242
- 'likes',
243
- 'lists',
244
- # 'is_translator', # dropped by Twitter (since May 19), ignorable
245
- # 'translator_type', # dropped by Twitter (since May 19), ignorable
246
- # 'is_translation_enabled', # dropped by Twitter (since May 19), ignorable
218
+ "id",
219
+ "screen_name",
220
+ "name",
221
+ "description",
222
+ "url",
223
+ # 'lang', # dropped from tweet objects only by Twitter (since May 2019)
224
+ "timestamp_utc",
225
+ "local_time",
226
+ # 'utc_offset', # dropped by Twitter (since May 2018), ignorable
227
+ # 'time_zone', # dropped by Twitter (since May 2018), ignorable
228
+ "location",
229
+ # 'geo_enabled', # dropped by Twitter (since May 2019), ignorable
230
+ "verified",
231
+ "protected",
232
+ "tweets",
233
+ "followers",
234
+ "friends",
235
+ "likes",
236
+ "lists",
237
+ # 'is_translator', # dropped by Twitter (since May 2019), ignorable
238
+ # 'translator_type', # dropped by Twitter (since May 2019), ignorable
239
+ # 'is_translation_enabled', # dropped by Twitter (since May 2019), ignorable
247
240
  # 'default_profile',
248
241
  # 'default_profile_image',
249
- # 'has_extended_profile', # dropped by Twitter (since May 19), ignorable
250
- # 'profile_image_url', # dropped by Twitter (since May 19), ignorable
251
- 'image',
242
+ # 'has_extended_profile', # dropped by Twitter (since May 2019), ignorable
243
+ # 'profile_image_url', # dropped by Twitter (since May 2019), ignorable
244
+ "image",
252
245
  # 'profile_banner_url',
253
- # 'profile_use_background_image', # dropped by Twitter (since May 19), ignorable
254
- # 'profile_background_image_url', # dropped by Twitter (since May 19), ignorable
255
- # 'profile_background_image_url_https', # dropped by Twitter (since May 19), ignorable
256
- # 'profile_background_tile', # dropped by Twitter (since May 19), ignorable
257
- # 'profile_background_color', # dropped by Twitter (since May 19), ignorable
258
- # 'profile_link_color', # dropped by Twitter (since May 19), ignorable
259
- # 'profile_text_color', # dropped by Twitter (since May 19), ignorable
260
- # 'profile_sidebar_fill_color', # dropped by Twitter (since May 19), ignorable
261
- # 'profile_sidebar_border_color' # dropped by Twitter (since May 19), ignorable
262
- 'default_profile',
263
- 'default_profile_image',
264
- 'witheld_in_countries',
265
- 'witheld_scope'
246
+ # 'profile_use_background_image', # dropped by Twitter (since May 2019), ignorable
247
+ # 'profile_background_image_url', # dropped by Twitter (since May 2019), ignorable
248
+ # 'profile_background_image_url_https', # dropped by Twitter (since May 2019), ignorable
249
+ # 'profile_background_tile', # dropped by Twitter (since May 2019), ignorable
250
+ # 'profile_background_color', # dropped by Twitter (since May 2019), ignorable
251
+ # 'profile_link_color', # dropped by Twitter (since May 2019), ignorable
252
+ # 'profile_text_color', # dropped by Twitter (since May 2019), ignorable
253
+ # 'profile_sidebar_fill_color', # dropped by Twitter (since May 2019), ignorable
254
+ # 'profile_sidebar_border_color' # dropped by Twitter (since May 2019), ignorable
255
+ "default_profile",
256
+ "default_profile_image",
257
+ "witheld_in_countries",
258
+ "witheld_scope",
266
259
  ]
267
260
 
268
- USER_PLURAL_FIELDS = {
269
- 'witheld_in_countries'
270
- }
261
+ USER_PLURAL_FIELDS = {"witheld_in_countries"}
271
262
 
272
263
  USER_BOOLEAN_FIELDS = {
273
- 'verified',
274
- 'protected',
275
- 'default_profile',
276
- 'default_profile_image'
264
+ "verified",
265
+ "protected",
266
+ "default_profile",
267
+ "default_profile_image",
277
268
  }
278
269
 
279
270
  CANONICAL_URL_KWARGS = {
280
- 'strip_authentication': False,
281
- 'strip_trailing_slash': False,
282
- 'strip_protocol': False,
283
- 'strip_irrelevant_subdomains': False,
284
- 'strip_fragment': False,
285
- 'normalize_amp': False,
286
- 'fix_common_mistakes': False,
287
- 'infer_redirection': False,
288
- 'quoted': True
271
+ "strip_authentication": False,
272
+ "strip_trailing_slash": False,
273
+ "strip_protocol": False,
274
+ "strip_irrelevant_subdomains": False,
275
+ "strip_fragment": False,
276
+ "normalize_amp": False,
277
+ "fix_common_mistakes": False,
278
+ "infer_redirection": False,
279
+ "quoted": True,
289
280
  }
290
281
 
291
- CANONICAL_HOSTNAME_KWARGS = {
292
- 'normalize_amp': False,
293
- 'infer_redirection': False
294
- }
282
+ CANONICAL_HOSTNAME_KWARGS = {"normalize_amp": False, "infer_redirection": False}
295
283
 
296
284
  # API v2 constants
297
285
  TWEET_FIELDS_V2 = {
298
- 'attachments',
299
- 'author_id',
286
+ "attachments",
287
+ "author_id",
300
288
  # NOTE: (2023-04-26) dropping this because we don't use it and it prevents us
301
289
  # from being able to get 500 tweets per call using academic v2 API.
302
290
  # 'context_annotations',
303
- 'conversation_id',
304
- 'created_at',
305
- 'entities',
306
- 'geo',
307
- 'id',
308
- 'in_reply_to_user_id',
309
- 'lang',
310
- 'possibly_sensitive',
311
- 'public_metrics',
312
- 'referenced_tweets',
313
- 'reply_settings',
314
- 'source',
315
- 'text',
316
- 'withheld'
291
+ "conversation_id",
292
+ "created_at",
293
+ "entities",
294
+ "geo",
295
+ "id",
296
+ "in_reply_to_user_id",
297
+ "lang",
298
+ "possibly_sensitive",
299
+ "public_metrics",
300
+ "referenced_tweets",
301
+ "reply_settings",
302
+ "source",
303
+ "text",
304
+ "withheld",
317
305
  }
318
306
 
319
307
  MEDIA_FIELDS = {
320
- 'media_key',
321
- 'type',
322
- 'duration_ms',
323
- 'height',
324
- 'preview_image_url',
325
- 'public_metrics',
326
- 'width',
327
- 'alt_text',
328
- 'url',
329
- 'variants'
308
+ "media_key",
309
+ "type",
310
+ "duration_ms",
311
+ "height",
312
+ "preview_image_url",
313
+ "public_metrics",
314
+ "width",
315
+ "alt_text",
316
+ "url",
317
+ "variants",
330
318
  }
331
319
 
332
- POLL_FIELDS = {
333
- 'id',
334
- 'options',
335
- 'duration_minutes',
336
- 'end_datetime',
337
- 'voting_status'
338
- }
320
+ POLL_FIELDS = {"id", "options", "duration_minutes", "end_datetime", "voting_status"}
339
321
 
340
322
  PLACE_FIELDS = {
341
- 'full_name',
342
- 'id',
343
- 'contained_within',
344
- 'country',
345
- 'country_code',
346
- 'geo',
347
- 'name',
348
- 'place_type'
323
+ "full_name",
324
+ "id",
325
+ "contained_within",
326
+ "country",
327
+ "country_code",
328
+ "geo",
329
+ "name",
330
+ "place_type",
349
331
  }
350
332
 
351
333
  USER_FIELDS_V2 = {
352
- 'id',
353
- 'name',
354
- 'username',
355
- 'created_at',
356
- 'description',
357
- 'entities',
358
- 'location',
359
- 'pinned_tweet_id',
360
- 'profile_image_url',
361
- 'protected',
362
- 'public_metrics',
363
- 'url',
364
- 'verified',
365
- 'withheld'
334
+ "id",
335
+ "name",
336
+ "username",
337
+ "created_at",
338
+ "description",
339
+ "entities",
340
+ "location",
341
+ "pinned_tweet_id",
342
+ "profile_image_url",
343
+ "protected",
344
+ "public_metrics",
345
+ "url",
346
+ "verified",
347
+ "withheld",
366
348
  }
367
349
 
368
350
  TWEET_EXPANSIONS = {
369
- 'author_id',
370
- 'referenced_tweets.id',
371
- 'in_reply_to_user_id',
372
- 'attachments.media_keys',
373
- 'attachments.poll_ids',
374
- 'geo.place_id',
375
- 'entities.mentions.username',
376
- 'referenced_tweets.id.author_id'
351
+ "author_id",
352
+ "referenced_tweets.id",
353
+ "in_reply_to_user_id",
354
+ "attachments.media_keys",
355
+ "attachments.poll_ids",
356
+ "geo.place_id",
357
+ "entities.mentions.username",
358
+ "referenced_tweets.id.author_id",
377
359
  }
378
360
 
379
361
  TWEET_PARAMS = {
380
- 'tweet.fields': ','.join(TWEET_FIELDS_V2),
381
- 'media.fields': ','.join(MEDIA_FIELDS),
382
- 'poll.fields': ','.join(POLL_FIELDS),
383
- 'place.fields': ','.join(PLACE_FIELDS),
384
- 'user.fields': ','.join(USER_FIELDS_V2)
362
+ "tweet.fields": ",".join(TWEET_FIELDS_V2),
363
+ "media.fields": ",".join(MEDIA_FIELDS),
364
+ "poll.fields": ",".join(POLL_FIELDS),
365
+ "place.fields": ",".join(PLACE_FIELDS),
366
+ "user.fields": ",".join(USER_FIELDS_V2),
385
367
  }
386
368
 
387
- USER_EXPANSIONS = {
388
- 'pinned_tweet_id'
389
- }
369
+ USER_EXPANSIONS = {"pinned_tweet_id"}
390
370
 
391
371
  USER_PARAMS = {
392
- 'user.fields': ','.join(USER_FIELDS_V2),
393
- 'tweet.fields': ','.join(TWEET_FIELDS_V2)
372
+ "user.fields": ",".join(USER_FIELDS_V2),
373
+ "tweet.fields": ",".join(TWEET_FIELDS_V2),
394
374
  }
395
375
 
396
376
  # Lists
397
377
 
398
378
  LIST_FIELDS = {
399
- 'created_at',
400
- 'follower_count',
401
- 'member_count',
402
- 'private',
403
- 'description',
404
- 'owner_id'
379
+ "created_at",
380
+ "follower_count",
381
+ "member_count",
382
+ "private",
383
+ "description",
384
+ "owner_id",
405
385
  }
406
386
 
407
- LIST_EXPANSIONS = {
408
- 'owner_id'
409
- }
387
+ LIST_EXPANSIONS = {"owner_id"}
410
388
 
411
389
  LIST_PARAMS = {
412
- 'list.fields': ','.join(LIST_FIELDS),
413
- 'user.fields': ','.join(USER_FIELDS_V2)
390
+ "list.fields": ",".join(LIST_FIELDS),
391
+ "user.fields": ",".join(USER_FIELDS_V2),
414
392
  }
415
393
 
416
- LIST_TWEETS_EXPANSIONS = {
417
- 'author_id'
418
- }
394
+ LIST_TWEETS_EXPANSIONS = {"author_id"}
419
395
 
420
- LIST_MEMBERS_EXPANSIONS = {
421
- 'pinned_tweet_id'
422
- }
396
+ LIST_MEMBERS_EXPANSIONS = {"pinned_tweet_id"}
423
397
 
424
398
  LIST_TWEETS_OR_MEMBERS_PARAMS = {
425
- 'tweet.fields': ','.join(TWEET_FIELDS_V2),
426
- 'user.fields': ','.join(USER_FIELDS_V2)
399
+ "tweet.fields": ",".join(TWEET_FIELDS_V2),
400
+ "user.fields": ",".join(USER_FIELDS_V2),
427
401
  }
428
402
 
429
403
  PRE_SNOWFLAKE_LAST_TWEET_ID = 29700859247