twitwi 0.19.0__tar.gz → 0.20.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {twitwi-0.19.0/twitwi.egg-info → twitwi-0.20.0}/PKG-INFO +13 -3
- {twitwi-0.19.0 → twitwi-0.20.0}/setup.py +1 -2
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/__init__.py +0 -1
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/constants.py +0 -6
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/exceptions.py +0 -4
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/normalizers.py +5 -1
- {twitwi-0.19.0 → twitwi-0.20.0/twitwi.egg-info}/PKG-INFO +13 -3
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi.egg-info/SOURCES.txt +0 -1
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi.egg-info/requires.txt +0 -1
- twitwi-0.19.0/twitwi/client_wrapper.py +0 -166
- {twitwi-0.19.0 → twitwi-0.20.0}/LICENSE.txt +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/README.md +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/setup.cfg +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/anonymizers.py +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/formatters.py +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi/utils.py +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi.egg-info/dependency_links.txt +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi.egg-info/top_level.txt +0 -0
- {twitwi-0.19.0 → twitwi-0.20.0}/twitwi.egg-info/zip-safe +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: twitwi
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20.0
|
|
4
4
|
Summary: A collection of Twitter-related helper functions for python.
|
|
5
5
|
Home-page: http://github.com/medialab/twitwi
|
|
6
6
|
Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
|
|
@@ -11,8 +11,18 @@ Requires-Python: >=3.4
|
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE.txt
|
|
13
13
|
Requires-Dist: pytz>=2019.3
|
|
14
|
-
Requires-Dist: twitter==2.0a2
|
|
15
14
|
Requires-Dist: ural>=0.31.1
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: author-email
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: keywords
|
|
21
|
+
Dynamic: license
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
16
26
|
|
|
17
27
|
[](https://github.com/medialab/twitwi/actions)
|
|
18
28
|
|
|
@@ -4,7 +4,7 @@ with open('./README.md', 'r') as f:
|
|
|
4
4
|
long_description = f.read()
|
|
5
5
|
|
|
6
6
|
setup(name='twitwi',
|
|
7
|
-
version='0.
|
|
7
|
+
version='0.20.0',
|
|
8
8
|
description='A collection of Twitter-related helper functions for python.',
|
|
9
9
|
long_description=long_description,
|
|
10
10
|
long_description_content_type='text/markdown',
|
|
@@ -18,7 +18,6 @@ setup(name='twitwi',
|
|
|
18
18
|
package_data={'docs': ['README.md']},
|
|
19
19
|
install_requires=[
|
|
20
20
|
'pytz>=2019.3',
|
|
21
|
-
'twitter==2.0a2',
|
|
22
21
|
'ural>=0.31.1'
|
|
23
22
|
],
|
|
24
23
|
zip_safe=True)
|
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
# =============================================================================
|
|
4
4
|
#
|
|
5
5
|
from twitwi.anonymizers import anonymize_normalized_tweet
|
|
6
|
-
from twitwi.client_wrapper import TwitterWrapper
|
|
7
6
|
from twitwi.formatters import (
|
|
8
7
|
transform_tweet_into_csv_dict,
|
|
9
8
|
format_tweet_as_csv_row,
|
|
@@ -426,11 +426,5 @@ LIST_TWEETS_OR_MEMBERS_PARAMS = {
|
|
|
426
426
|
'user.fields': ','.join(USER_FIELDS_V2)
|
|
427
427
|
}
|
|
428
428
|
|
|
429
|
-
APP_ONLY_ROUTES = {
|
|
430
|
-
'tweets/counts/recent',
|
|
431
|
-
'tweets/counts/all',
|
|
432
|
-
'tweets/search/all'
|
|
433
|
-
}
|
|
434
|
-
|
|
435
429
|
PRE_SNOWFLAKE_LAST_TWEET_ID = 29700859247
|
|
436
430
|
OFFSET_TIMESTAMP = 1288834974657
|
|
@@ -330,7 +330,11 @@ def normalize_tweet(tweet, locale=None, extract_referenced_tweets=False,
|
|
|
330
330
|
media_urls.append(med_url.split('?tag=')[0])
|
|
331
331
|
media_files.append('%s_%s' % (source_id, med_name))
|
|
332
332
|
media_alt_texts.append(entity.get("ext_alt_text") or '')
|
|
333
|
-
|
|
333
|
+
|
|
334
|
+
# NOTE: fun fact, Twitter is starting to break down and we cannot guarantee
|
|
335
|
+
# expanded_url exists anymore. It even crashes the website itself lol:
|
|
336
|
+
# https://x.com/lmerzeau/status/426318495450943488
|
|
337
|
+
elif "expanded_url" in entity:
|
|
334
338
|
normalized = custom_normalize_url(entity['expanded_url'])
|
|
335
339
|
links.add(normalized)
|
|
336
340
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: twitwi
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.20.0
|
|
4
4
|
Summary: A collection of Twitter-related helper functions for python.
|
|
5
5
|
Home-page: http://github.com/medialab/twitwi
|
|
6
6
|
Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
|
|
@@ -11,8 +11,18 @@ Requires-Python: >=3.4
|
|
|
11
11
|
Description-Content-Type: text/markdown
|
|
12
12
|
License-File: LICENSE.txt
|
|
13
13
|
Requires-Dist: pytz>=2019.3
|
|
14
|
-
Requires-Dist: twitter==2.0a2
|
|
15
14
|
Requires-Dist: ural>=0.31.1
|
|
15
|
+
Dynamic: author
|
|
16
|
+
Dynamic: author-email
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: keywords
|
|
21
|
+
Dynamic: license
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
16
26
|
|
|
17
27
|
[](https://github.com/medialab/twitwi/actions)
|
|
18
28
|
|
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
# =============================================================================
|
|
2
|
-
# Twitwi Client Wrapper
|
|
3
|
-
# =============================================================================
|
|
4
|
-
#
|
|
5
|
-
# Wrapper for the `twitter` library API client able to rotate the two possible
|
|
6
|
-
# endpoints to maximize throughput.
|
|
7
|
-
#
|
|
8
|
-
import json
|
|
9
|
-
from time import sleep, time
|
|
10
|
-
from operator import itemgetter
|
|
11
|
-
from twitter import Twitter, OAuth, OAuth2, TwitterHTTPError, Twitter2
|
|
12
|
-
|
|
13
|
-
from twitwi.exceptions import TwitterWrapperMaxAttemptsExceeded
|
|
14
|
-
from twitwi.constants import APP_ONLY_ROUTES
|
|
15
|
-
|
|
16
|
-
DEFAULT_MAX_ATTEMPTS = 5
|
|
17
|
-
|
|
18
|
-
# Established from: https://developer.twitter.com/en/support/twitter-api/error-troubleshooting
|
|
19
|
-
NO_RETRY_STATUSES = set([
|
|
20
|
-
400,
|
|
21
|
-
401,
|
|
22
|
-
403,
|
|
23
|
-
404,
|
|
24
|
-
406,
|
|
25
|
-
410,
|
|
26
|
-
422
|
|
27
|
-
])
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class TwitterWrapper(object):
|
|
31
|
-
|
|
32
|
-
def __init__(self, token, token_secret, consumer_key, consumer_secret,
|
|
33
|
-
listener=None, api_version='1.1'):
|
|
34
|
-
|
|
35
|
-
api_version = str(api_version)
|
|
36
|
-
|
|
37
|
-
if api_version not in ['1.1', '2']:
|
|
38
|
-
raise TypeError('API version can only be \'1.1\' or \'2\'.')
|
|
39
|
-
|
|
40
|
-
self.oauth = OAuth(
|
|
41
|
-
token,
|
|
42
|
-
token_secret,
|
|
43
|
-
consumer_key,
|
|
44
|
-
consumer_secret
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
bearer_token_client = Twitter(
|
|
48
|
-
api_version=None,
|
|
49
|
-
format='',
|
|
50
|
-
secure=True,
|
|
51
|
-
auth=OAuth2(consumer_key, consumer_secret)
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
bearer_token = json.loads(
|
|
55
|
-
bearer_token_client.oauth2.token(grant_type='client_credentials')
|
|
56
|
-
)['access_token']
|
|
57
|
-
|
|
58
|
-
self.oauth2 = OAuth2(bearer_token=bearer_token)
|
|
59
|
-
|
|
60
|
-
self.auth = {}
|
|
61
|
-
self.waits = {}
|
|
62
|
-
|
|
63
|
-
TwitterClass = Twitter
|
|
64
|
-
|
|
65
|
-
if api_version == '2':
|
|
66
|
-
TwitterClass = Twitter2
|
|
67
|
-
|
|
68
|
-
for route in APP_ONLY_ROUTES:
|
|
69
|
-
self.auth[route] = 'app'
|
|
70
|
-
self.waits[route] = {'app': 0}
|
|
71
|
-
|
|
72
|
-
self.endpoints = {
|
|
73
|
-
'user': TwitterClass(auth=self.oauth),
|
|
74
|
-
'app': TwitterClass(auth=self.oauth2)
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
self.listener = listener
|
|
78
|
-
|
|
79
|
-
def call(self, route, max_attempts=DEFAULT_MAX_ATTEMPTS, **kwargs):
|
|
80
|
-
attempts = 0
|
|
81
|
-
|
|
82
|
-
if not isinstance(route, list):
|
|
83
|
-
raise TypeError('twitwi.TwitterWrapper.call: expecting route as a list, such as ["friends", "ids"].')
|
|
84
|
-
|
|
85
|
-
route = '/'.join(route)
|
|
86
|
-
|
|
87
|
-
while attempts < max_attempts:
|
|
88
|
-
|
|
89
|
-
if route not in self.auth:
|
|
90
|
-
self.auth[route] = 'user'
|
|
91
|
-
|
|
92
|
-
auth = self.auth[route]
|
|
93
|
-
|
|
94
|
-
try:
|
|
95
|
-
return self.endpoints[auth].__getattr__(route)(**kwargs)
|
|
96
|
-
|
|
97
|
-
except TwitterHTTPError as e:
|
|
98
|
-
|
|
99
|
-
# Rate limited
|
|
100
|
-
if e.e.code == 429:
|
|
101
|
-
now = int(time())
|
|
102
|
-
|
|
103
|
-
# If there are still API calls available, we obviously
|
|
104
|
-
# queried Twitter too fast and should just let it breathe
|
|
105
|
-
remaining = int(e.e.headers['X-Rate-Limit-Remaining'])
|
|
106
|
-
if remaining > 0:
|
|
107
|
-
attempts += 1
|
|
108
|
-
|
|
109
|
-
if callable(self.listener):
|
|
110
|
-
self.listener('excessive-rate', {
|
|
111
|
-
'error': e,
|
|
112
|
-
'route': route,
|
|
113
|
-
'attempts': attempts,
|
|
114
|
-
'auth': auth
|
|
115
|
-
})
|
|
116
|
-
|
|
117
|
-
sleep(1)
|
|
118
|
-
continue
|
|
119
|
-
|
|
120
|
-
reset = int(e.e.headers['X-Rate-Limit-Reset'])
|
|
121
|
-
|
|
122
|
-
if route not in self.waits:
|
|
123
|
-
self.waits[route] = {'user': now, 'app': now}
|
|
124
|
-
|
|
125
|
-
self.waits[route][auth] = reset
|
|
126
|
-
|
|
127
|
-
if callable(self.listener):
|
|
128
|
-
self.listener('rate-limited', {
|
|
129
|
-
'route': route,
|
|
130
|
-
'kwargs': kwargs,
|
|
131
|
-
'reset': reset,
|
|
132
|
-
'auth': auth
|
|
133
|
-
})
|
|
134
|
-
|
|
135
|
-
min_wait = min(self.waits[route].items(), key=itemgetter(1))
|
|
136
|
-
|
|
137
|
-
if min_wait[1] > now:
|
|
138
|
-
sleeptime = 5 + max(0, int(min_wait[1] - now))
|
|
139
|
-
|
|
140
|
-
if callable(self.listener):
|
|
141
|
-
self.listener('waiting', {
|
|
142
|
-
'auth': min_wait[0],
|
|
143
|
-
'reset': min_wait[1],
|
|
144
|
-
'sleep': sleeptime
|
|
145
|
-
})
|
|
146
|
-
|
|
147
|
-
sleep(sleeptime)
|
|
148
|
-
|
|
149
|
-
self.auth[route] = min_wait[0]
|
|
150
|
-
|
|
151
|
-
continue
|
|
152
|
-
|
|
153
|
-
# Errors that should terminate immediately
|
|
154
|
-
elif e.e.code in NO_RETRY_STATUSES:
|
|
155
|
-
raise e
|
|
156
|
-
|
|
157
|
-
# Different error
|
|
158
|
-
else:
|
|
159
|
-
attempts += 1
|
|
160
|
-
|
|
161
|
-
if callable(self.listener):
|
|
162
|
-
self.listener('error', {
|
|
163
|
-
'error': e
|
|
164
|
-
})
|
|
165
|
-
|
|
166
|
-
raise TwitterWrapperMaxAttemptsExceeded
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|