twitwi 0.19.2__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/bluesky/__init__.py +0 -0
- test/bluesky/formatters_test.py +101 -0
- test/bluesky/normalizers_test.py +130 -0
- twitwi/__init__.py +19 -3
- twitwi/anonymizers.py +3 -9
- twitwi/bluesky/__init__.py +16 -0
- twitwi/bluesky/constants.py +19 -0
- twitwi/bluesky/formatters.py +29 -0
- twitwi/bluesky/normalizers.py +641 -0
- twitwi/bluesky/types.py +135 -0
- twitwi/bluesky/utils.py +103 -0
- twitwi/constants.py +324 -355
- twitwi/exceptions.py +8 -5
- twitwi/formatters.py +35 -37
- twitwi/normalizers.py +403 -339
- twitwi/utils.py +44 -17
- twitwi-0.21.0.dist-info/METADATA +435 -0
- twitwi-0.21.0.dist-info/RECORD +22 -0
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info}/WHEEL +1 -1
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info}/top_level.txt +1 -0
- twitwi/client_wrapper.py +0 -166
- twitwi-0.19.2.dist-info/METADATA +0 -146
- twitwi-0.19.2.dist-info/RECORD +0 -14
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info/licenses}/LICENSE.txt +0 -0
- {twitwi-0.19.2.dist-info → twitwi-0.21.0.dist-info}/zip-safe +0 -0
twitwi/client_wrapper.py
DELETED
|
@@ -1,166 +0,0 @@
|
|
|
1
|
-
# =============================================================================
|
|
2
|
-
# Twitwi Client Wrapper
|
|
3
|
-
# =============================================================================
|
|
4
|
-
#
|
|
5
|
-
# Wrapper for the `twitter` library API client able to rotate the two possible
|
|
6
|
-
# endpoints to maximize throughput.
|
|
7
|
-
#
|
|
8
|
-
import json
|
|
9
|
-
from time import sleep, time
|
|
10
|
-
from operator import itemgetter
|
|
11
|
-
from twitter import Twitter, OAuth, OAuth2, TwitterHTTPError, Twitter2
|
|
12
|
-
|
|
13
|
-
from twitwi.exceptions import TwitterWrapperMaxAttemptsExceeded
|
|
14
|
-
from twitwi.constants import APP_ONLY_ROUTES
|
|
15
|
-
|
|
16
|
-
DEFAULT_MAX_ATTEMPTS = 5
|
|
17
|
-
|
|
18
|
-
# Established from: https://developer.twitter.com/en/support/twitter-api/error-troubleshooting
|
|
19
|
-
NO_RETRY_STATUSES = set([
|
|
20
|
-
400,
|
|
21
|
-
401,
|
|
22
|
-
403,
|
|
23
|
-
404,
|
|
24
|
-
406,
|
|
25
|
-
410,
|
|
26
|
-
422
|
|
27
|
-
])
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class TwitterWrapper(object):
|
|
31
|
-
|
|
32
|
-
def __init__(self, token, token_secret, consumer_key, consumer_secret,
|
|
33
|
-
listener=None, api_version='1.1'):
|
|
34
|
-
|
|
35
|
-
api_version = str(api_version)
|
|
36
|
-
|
|
37
|
-
if api_version not in ['1.1', '2']:
|
|
38
|
-
raise TypeError('API version can only be \'1.1\' or \'2\'.')
|
|
39
|
-
|
|
40
|
-
self.oauth = OAuth(
|
|
41
|
-
token,
|
|
42
|
-
token_secret,
|
|
43
|
-
consumer_key,
|
|
44
|
-
consumer_secret
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
bearer_token_client = Twitter(
|
|
48
|
-
api_version=None,
|
|
49
|
-
format='',
|
|
50
|
-
secure=True,
|
|
51
|
-
auth=OAuth2(consumer_key, consumer_secret)
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
bearer_token = json.loads(
|
|
55
|
-
bearer_token_client.oauth2.token(grant_type='client_credentials')
|
|
56
|
-
)['access_token']
|
|
57
|
-
|
|
58
|
-
self.oauth2 = OAuth2(bearer_token=bearer_token)
|
|
59
|
-
|
|
60
|
-
self.auth = {}
|
|
61
|
-
self.waits = {}
|
|
62
|
-
|
|
63
|
-
TwitterClass = Twitter
|
|
64
|
-
|
|
65
|
-
if api_version == '2':
|
|
66
|
-
TwitterClass = Twitter2
|
|
67
|
-
|
|
68
|
-
for route in APP_ONLY_ROUTES:
|
|
69
|
-
self.auth[route] = 'app'
|
|
70
|
-
self.waits[route] = {'app': 0}
|
|
71
|
-
|
|
72
|
-
self.endpoints = {
|
|
73
|
-
'user': TwitterClass(auth=self.oauth),
|
|
74
|
-
'app': TwitterClass(auth=self.oauth2)
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
self.listener = listener
|
|
78
|
-
|
|
79
|
-
def call(self, route, max_attempts=DEFAULT_MAX_ATTEMPTS, **kwargs):
|
|
80
|
-
attempts = 0
|
|
81
|
-
|
|
82
|
-
if not isinstance(route, list):
|
|
83
|
-
raise TypeError('twitwi.TwitterWrapper.call: expecting route as a list, such as ["friends", "ids"].')
|
|
84
|
-
|
|
85
|
-
route = '/'.join(route)
|
|
86
|
-
|
|
87
|
-
while attempts < max_attempts:
|
|
88
|
-
|
|
89
|
-
if route not in self.auth:
|
|
90
|
-
self.auth[route] = 'user'
|
|
91
|
-
|
|
92
|
-
auth = self.auth[route]
|
|
93
|
-
|
|
94
|
-
try:
|
|
95
|
-
return self.endpoints[auth].__getattr__(route)(**kwargs)
|
|
96
|
-
|
|
97
|
-
except TwitterHTTPError as e:
|
|
98
|
-
|
|
99
|
-
# Rate limited
|
|
100
|
-
if e.e.code == 429:
|
|
101
|
-
now = int(time())
|
|
102
|
-
|
|
103
|
-
# If there are still API calls available, we obviously
|
|
104
|
-
# queried Twitter too fast and should just let it breathe
|
|
105
|
-
remaining = int(e.e.headers['X-Rate-Limit-Remaining'])
|
|
106
|
-
if remaining > 0:
|
|
107
|
-
attempts += 1
|
|
108
|
-
|
|
109
|
-
if callable(self.listener):
|
|
110
|
-
self.listener('excessive-rate', {
|
|
111
|
-
'error': e,
|
|
112
|
-
'route': route,
|
|
113
|
-
'attempts': attempts,
|
|
114
|
-
'auth': auth
|
|
115
|
-
})
|
|
116
|
-
|
|
117
|
-
sleep(1)
|
|
118
|
-
continue
|
|
119
|
-
|
|
120
|
-
reset = int(e.e.headers['X-Rate-Limit-Reset'])
|
|
121
|
-
|
|
122
|
-
if route not in self.waits:
|
|
123
|
-
self.waits[route] = {'user': now, 'app': now}
|
|
124
|
-
|
|
125
|
-
self.waits[route][auth] = reset
|
|
126
|
-
|
|
127
|
-
if callable(self.listener):
|
|
128
|
-
self.listener('rate-limited', {
|
|
129
|
-
'route': route,
|
|
130
|
-
'kwargs': kwargs,
|
|
131
|
-
'reset': reset,
|
|
132
|
-
'auth': auth
|
|
133
|
-
})
|
|
134
|
-
|
|
135
|
-
min_wait = min(self.waits[route].items(), key=itemgetter(1))
|
|
136
|
-
|
|
137
|
-
if min_wait[1] > now:
|
|
138
|
-
sleeptime = 5 + max(0, int(min_wait[1] - now))
|
|
139
|
-
|
|
140
|
-
if callable(self.listener):
|
|
141
|
-
self.listener('waiting', {
|
|
142
|
-
'auth': min_wait[0],
|
|
143
|
-
'reset': min_wait[1],
|
|
144
|
-
'sleep': sleeptime
|
|
145
|
-
})
|
|
146
|
-
|
|
147
|
-
sleep(sleeptime)
|
|
148
|
-
|
|
149
|
-
self.auth[route] = min_wait[0]
|
|
150
|
-
|
|
151
|
-
continue
|
|
152
|
-
|
|
153
|
-
# Errors that should terminate immediately
|
|
154
|
-
elif e.e.code in NO_RETRY_STATUSES:
|
|
155
|
-
raise e
|
|
156
|
-
|
|
157
|
-
# Different error
|
|
158
|
-
else:
|
|
159
|
-
attempts += 1
|
|
160
|
-
|
|
161
|
-
if callable(self.listener):
|
|
162
|
-
self.listener('error', {
|
|
163
|
-
'error': e
|
|
164
|
-
})
|
|
165
|
-
|
|
166
|
-
raise TwitterWrapperMaxAttemptsExceeded
|
twitwi-0.19.2.dist-info/METADATA
DELETED
|
@@ -1,146 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.1
|
|
2
|
-
Name: twitwi
|
|
3
|
-
Version: 0.19.2
|
|
4
|
-
Summary: A collection of Twitter-related helper functions for python.
|
|
5
|
-
Home-page: http://github.com/medialab/twitwi
|
|
6
|
-
Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
|
|
7
|
-
Author-email: guillaume.plique@sciencespo.fr
|
|
8
|
-
License: MIT
|
|
9
|
-
Keywords: twitter
|
|
10
|
-
Requires-Python: >=3.4
|
|
11
|
-
Description-Content-Type: text/markdown
|
|
12
|
-
License-File: LICENSE.txt
|
|
13
|
-
Requires-Dist: pytz >=2019.3
|
|
14
|
-
Requires-Dist: twitter ==2.0a2
|
|
15
|
-
Requires-Dist: ural >=0.31.1
|
|
16
|
-
|
|
17
|
-
[](https://github.com/medialab/twitwi/actions)
|
|
18
|
-
|
|
19
|
-
# Twitwi
|
|
20
|
-
|
|
21
|
-
A collection of Twitter-related helper functions for python.
|
|
22
|
-
|
|
23
|
-
## Installation
|
|
24
|
-
|
|
25
|
-
You can install `twitwi` with pip with the following command:
|
|
26
|
-
|
|
27
|
-
```
|
|
28
|
-
pip install twitwi
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
## Usage
|
|
32
|
-
|
|
33
|
-
*Normalization functions*
|
|
34
|
-
|
|
35
|
-
* [normalize_tweets_payload_v2](#normalize_tweets_payload_v2)
|
|
36
|
-
|
|
37
|
-
*Formatting functions*
|
|
38
|
-
|
|
39
|
-
* [transform_tweet_into_csv_dict](#transform_tweet_into_csv_dict)
|
|
40
|
-
* [transform_user_into_csv_dict](#transform_user_into_csv_dict)
|
|
41
|
-
* [format_tweet_as_csv_row](#format_tweet_as_csv_row)
|
|
42
|
-
|
|
43
|
-
*Useful constants (under twitwi.constants)*
|
|
44
|
-
|
|
45
|
-
* [TWEET_FIELDS](#tweet_fields)
|
|
46
|
-
* [USER_FIELDS](#user_fields)
|
|
47
|
-
|
|
48
|
-
### normalize_tweets_payload_v2
|
|
49
|
-
|
|
50
|
-
Function taking an entire tweets payload from the v2 API and returning a list of the contained tweets normalized and structured in a way that makes further analysis of the data convenient.
|
|
51
|
-
|
|
52
|
-
```python
|
|
53
|
-
from twitwi import normalize_tweets_payload_v2
|
|
54
|
-
|
|
55
|
-
# Normalizing an entire tweets payload to extract a list of tweets
|
|
56
|
-
normalize_tweets_payload_v2(payload)
|
|
57
|
-
|
|
58
|
-
# Normalizing an entire tweets payload to extract a list of tweets
|
|
59
|
-
# as well as the referenced tweets (quoted, retweeted, etc.)
|
|
60
|
-
normalize_tweets_payload_v2(payload, extract_referenced_tweets=True)
|
|
61
|
-
|
|
62
|
-
# Converting found dates to a chosen timezone
|
|
63
|
-
from pytz import timezone
|
|
64
|
-
paris_tz = timezone('Europe/Paris')
|
|
65
|
-
|
|
66
|
-
normalize_tweets_payload_v2(payload, locale=paris_tz)
|
|
67
|
-
```
|
|
68
|
-
|
|
69
|
-
*Arguments*
|
|
70
|
-
|
|
71
|
-
* **payload** *(dict)*: tweets payload coming from Twitter API v2.
|
|
72
|
-
* **locale** *(pytz.timezone, optional)*: timezone used to convert dates. If not given, will default to UTC.
|
|
73
|
-
* **extract_referenced_tweets** *(bool, optional)*: whether to keep referenced tweets (retweeted, quoted etc.) in the output. Defaults to `False`.
|
|
74
|
-
* **collection_source** *(string, optional): An optional information to add to the tweets to indicate whence you collected them.
|
|
75
|
-
|
|
76
|
-
### transform_tweet_into_csv_dict
|
|
77
|
-
|
|
78
|
-
Function transforming (i.e. mutating, so beware) a given normalized tweet into a suitable dict able to be written by a `csv.DictWriter` as a row.
|
|
79
|
-
|
|
80
|
-
```python
|
|
81
|
-
from twitwi import transform_tweet_into_csv_dict
|
|
82
|
-
|
|
83
|
-
# The function returns nothing, `normalized_tweet` has been mutated
|
|
84
|
-
transform_tweet_into_csv_dict(normalized_tweet)
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
### transform_user_into_csv_dict
|
|
88
|
-
|
|
89
|
-
Function transforming (i.e. mutating, so beware) a given normalized Twitter user into a suitable dict able to be written by a `csv.DictWriter` as a row.
|
|
90
|
-
|
|
91
|
-
```python
|
|
92
|
-
from twitwi import transform_user_into_csv_dict
|
|
93
|
-
|
|
94
|
-
# The function returns nothing, `normalized_user` has been mutated
|
|
95
|
-
transform_user_into_csv_dict(normalized_user)
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
### format_tweet_as_csv_row
|
|
99
|
-
|
|
100
|
-
Function formatting the given normalized tweet as a list able to be written by a `csv.writer` as a row.
|
|
101
|
-
|
|
102
|
-
```python
|
|
103
|
-
from twitwi import format_tweet_as_csv_row
|
|
104
|
-
|
|
105
|
-
row = format_tweet_as_csv_row(normalized_tweet)
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
### format_user_as_csv_row
|
|
109
|
-
|
|
110
|
-
Function formatting the given normalized Twitter user as a list able to be written by a `csv.writer` as a row.
|
|
111
|
-
|
|
112
|
-
```python
|
|
113
|
-
from twitwi import format_user_as_csv_row
|
|
114
|
-
|
|
115
|
-
row = format_user_as_csv_row(normalized_user)
|
|
116
|
-
```
|
|
117
|
-
|
|
118
|
-
### TWEET_FIELDS
|
|
119
|
-
|
|
120
|
-
List of tweet field names. Useful to declare headers with csv writers:
|
|
121
|
-
|
|
122
|
-
```python
|
|
123
|
-
from twitwi.constants import TWEET_FIELDS
|
|
124
|
-
|
|
125
|
-
# Using csv.writer
|
|
126
|
-
w = csv.writer(f)
|
|
127
|
-
w.writerow(TWEET_FIELDS)
|
|
128
|
-
|
|
129
|
-
# Using csv.DictWriter
|
|
130
|
-
w = csv.DictWriter(f, fieldnames=TWEET_FIELDS)
|
|
131
|
-
w.writeheader()
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
### USER_FIELDS
|
|
135
|
-
|
|
136
|
-
```python
|
|
137
|
-
from twitwi.constants import USER_FIELDS
|
|
138
|
-
|
|
139
|
-
# Using csv.writer
|
|
140
|
-
w = csv.writer(f)
|
|
141
|
-
w.writerow(USER_FIELDS)
|
|
142
|
-
|
|
143
|
-
# Using csv.DictWriter
|
|
144
|
-
w = csv.DictWriter(f, fieldnames=USER_FIELDS)
|
|
145
|
-
w.writeheader()
|
|
146
|
-
```
|
twitwi-0.19.2.dist-info/RECORD
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
twitwi/__init__.py,sha256=_QyXJBafpPuU_Q6iu6u7m-c7ud7JLFRnJE2jDUe1isM,754
|
|
2
|
-
twitwi/anonymizers.py,sha256=Dt72fdAJ4RzAxj-ZRmCVUNNWstlgvlJUNmUk520COOI,1594
|
|
3
|
-
twitwi/client_wrapper.py,sha256=vX3viQ97xTQ9JJfztnNRCGrfJm-XYpdswo6NDItXufQ,5056
|
|
4
|
-
twitwi/constants.py,sha256=KG0yIyWaLlwpqxxud75fsEY3n3CPio6uFWqZL9I401Y,16509
|
|
5
|
-
twitwi/exceptions.py,sha256=3AlspgS4u6Y2skbCKluVJnCQcnY-y7AI9UbMNMTn7Ak,532
|
|
6
|
-
twitwi/formatters.py,sha256=rK4IBBH-0PMRxQdDZ5szHKBk-J-AzefN8Wdj79L7YiY,3111
|
|
7
|
-
twitwi/normalizers.py,sha256=L4YcQZudizP7gdHQmhwafT6o39uGNk3cxtOcA9U7OKE,27694
|
|
8
|
-
twitwi/utils.py,sha256=iBxlCKjwPOz3RKIa8I_vs7d1miRi-eMXMpobKC5apZI,2349
|
|
9
|
-
twitwi-0.19.2.dist-info/LICENSE.txt,sha256=Ddg_PcGnl0qd2167o2dheCjE_rCZJOoBxjJnJhhOpX4,1099
|
|
10
|
-
twitwi-0.19.2.dist-info/METADATA,sha256=wigdEBMaf3jq-z89cWt2F3lhuQkWL9-lb6qeu0WbWuI,4159
|
|
11
|
-
twitwi-0.19.2.dist-info/WHEEL,sha256=y4mX-SOX4fYIkonsAGA5N0Oy-8_gI4FXw5HNI1xqvWg,91
|
|
12
|
-
twitwi-0.19.2.dist-info/top_level.txt,sha256=kellKVCFa_xo69jlXUtZXY2RYDd9ywdBe_-_iYK4W74,7
|
|
13
|
-
twitwi-0.19.2.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
14
|
-
twitwi-0.19.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|