twitwi 0.20.0__py3-none-any.whl → 0.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/bluesky/__init__.py +0 -0
- test/bluesky/formatters_test.py +101 -0
- test/bluesky/normalizers_test.py +130 -0
- twitwi/__init__.py +19 -2
- twitwi/anonymizers.py +3 -9
- twitwi/bluesky/__init__.py +16 -0
- twitwi/bluesky/constants.py +19 -0
- twitwi/bluesky/formatters.py +29 -0
- twitwi/bluesky/normalizers.py +641 -0
- twitwi/bluesky/types.py +135 -0
- twitwi/bluesky/utils.py +103 -0
- twitwi/constants.py +324 -349
- twitwi/exceptions.py +8 -1
- twitwi/formatters.py +35 -37
- twitwi/normalizers.py +403 -339
- twitwi/utils.py +44 -17
- twitwi-0.21.0.dist-info/METADATA +435 -0
- twitwi-0.21.0.dist-info/RECORD +22 -0
- {twitwi-0.20.0.dist-info → twitwi-0.21.0.dist-info}/WHEEL +1 -1
- {twitwi-0.20.0.dist-info → twitwi-0.21.0.dist-info}/top_level.txt +1 -0
- twitwi-0.20.0.dist-info/METADATA +0 -156
- twitwi-0.20.0.dist-info/RECORD +0 -13
- {twitwi-0.20.0.dist-info → twitwi-0.21.0.dist-info}/licenses/LICENSE.txt +0 -0
- {twitwi-0.20.0.dist-info → twitwi-0.21.0.dist-info}/zip-safe +0 -0
twitwi/exceptions.py
CHANGED
|
@@ -14,4 +14,11 @@ class TwitterPayloadV2IncompleteIncludesError(TwitwiError):
|
|
|
14
14
|
def __init__(self, kind, key):
|
|
15
15
|
self.kind = kind
|
|
16
16
|
self.key = key
|
|
17
|
-
super().__init__(
|
|
17
|
+
super().__init__("{!r} ({})".format(key, kind))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BlueskyPayloadError(TwitwiError):
|
|
21
|
+
def __init__(self, source, message):
|
|
22
|
+
self.source = source
|
|
23
|
+
self.message = message
|
|
24
|
+
super().__init__(f"Error while processing Bluesky post {source}:\n{message}")
|
twitwi/formatters.py
CHANGED
|
@@ -12,64 +12,68 @@ from twitwi.constants import (
|
|
|
12
12
|
TWEET_PLURAL_FIELDS,
|
|
13
13
|
USER_FIELDS,
|
|
14
14
|
USER_BOOLEAN_FIELDS,
|
|
15
|
-
USER_PLURAL_FIELDS
|
|
15
|
+
USER_PLURAL_FIELDS,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
def apply_tcat_format(item):
|
|
20
|
-
result = {
|
|
21
|
-
|
|
20
|
+
result = {
|
|
21
|
+
v: item[k] for k, v in GAZOU_TO_TCAT["identical_fields"].items() if k in item
|
|
22
|
+
}
|
|
23
|
+
result["source"] = "<a href={} rel=nofollow>{}</a>".format(
|
|
24
|
+
item["source_url"], item["source_name"]
|
|
25
|
+
)
|
|
22
26
|
return result
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
def make_transform_into_csv_dict(plural_fields, boolean_fields):
|
|
26
|
-
|
|
27
|
-
|
|
30
|
+
def transform_into_csv_dict(
|
|
31
|
+
item, item_id=None, plural_separator="|", allow_erroneous_plurals=False
|
|
32
|
+
):
|
|
28
33
|
if item_id is not None:
|
|
29
|
-
item[
|
|
34
|
+
item["id"] = item_id
|
|
30
35
|
|
|
31
|
-
item[
|
|
36
|
+
item["links"] = item.get("proper_links", item.get("links", []))
|
|
32
37
|
|
|
33
38
|
for plural_field in plural_fields:
|
|
34
39
|
plurals = item.get(plural_field, [])
|
|
35
40
|
if allow_erroneous_plurals:
|
|
36
|
-
plurals = [
|
|
41
|
+
plurals = [
|
|
42
|
+
element if element is not None else "" for element in plurals
|
|
43
|
+
]
|
|
37
44
|
item[plural_field] = plural_separator.join(plurals)
|
|
38
45
|
|
|
39
46
|
for boolean_field in boolean_fields:
|
|
40
|
-
item[boolean_field] =
|
|
47
|
+
item[boolean_field] = (
|
|
48
|
+
int(item[boolean_field]) if boolean_field in item else ""
|
|
49
|
+
)
|
|
41
50
|
|
|
42
51
|
return transform_into_csv_dict
|
|
43
52
|
|
|
44
53
|
|
|
45
54
|
def make_format_as_csv_row(fields, plural_fields, boolean_fields):
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
if field == 'id' and item_id is not None:
|
|
55
|
+
def format_field_for_csv(field, item, item_id=None, plural_separator="|"):
|
|
56
|
+
if field == "id" and item_id is not None:
|
|
50
57
|
return item_id
|
|
51
58
|
|
|
52
59
|
# NOTE: this is hardly the most performant way to proceed by I am lazy...
|
|
53
60
|
if field in plural_fields:
|
|
54
61
|
v = item.get(field, [])
|
|
55
62
|
|
|
56
|
-
if field ==
|
|
57
|
-
v = item.get(
|
|
63
|
+
if field == "links":
|
|
64
|
+
v = item.get("proper_links", v)
|
|
58
65
|
|
|
59
66
|
return plural_separator.join(v)
|
|
60
67
|
|
|
61
68
|
if field in boolean_fields:
|
|
62
|
-
return int(item[field]) if field in item else
|
|
69
|
+
return int(item[field]) if field in item else ""
|
|
63
70
|
|
|
64
|
-
return item.get(field,
|
|
71
|
+
return item.get(field, "")
|
|
65
72
|
|
|
66
|
-
def format_item_as_csv_row(item, item_id=None, plural_separator=
|
|
73
|
+
def format_item_as_csv_row(item, item_id=None, plural_separator="|"):
|
|
67
74
|
return [
|
|
68
75
|
format_field_for_csv(
|
|
69
|
-
field,
|
|
70
|
-
item,
|
|
71
|
-
item_id=item_id,
|
|
72
|
-
plural_separator=plural_separator
|
|
76
|
+
field, item, item_id=item_id, plural_separator=plural_separator
|
|
73
77
|
)
|
|
74
78
|
for field in fields
|
|
75
79
|
]
|
|
@@ -78,31 +82,25 @@ def make_format_as_csv_row(fields, plural_fields, boolean_fields):
|
|
|
78
82
|
|
|
79
83
|
|
|
80
84
|
transform_tweet_into_csv_dict = make_transform_into_csv_dict(
|
|
81
|
-
TWEET_PLURAL_FIELDS,
|
|
82
|
-
TWEET_BOOLEAN_FIELDS
|
|
85
|
+
TWEET_PLURAL_FIELDS, TWEET_BOOLEAN_FIELDS
|
|
83
86
|
)
|
|
84
87
|
|
|
85
88
|
format_tweet_as_csv_row = make_format_as_csv_row(
|
|
86
|
-
TWEET_FIELDS,
|
|
87
|
-
TWEET_PLURAL_FIELDS,
|
|
88
|
-
TWEET_BOOLEAN_FIELDS
|
|
89
|
+
TWEET_FIELDS, TWEET_PLURAL_FIELDS, TWEET_BOOLEAN_FIELDS
|
|
89
90
|
)
|
|
90
91
|
|
|
91
92
|
transform_user_into_csv_dict = make_transform_into_csv_dict(
|
|
92
|
-
USER_PLURAL_FIELDS,
|
|
93
|
-
USER_BOOLEAN_FIELDS
|
|
93
|
+
USER_PLURAL_FIELDS, USER_BOOLEAN_FIELDS
|
|
94
94
|
)
|
|
95
95
|
|
|
96
96
|
format_user_as_csv_row = make_format_as_csv_row(
|
|
97
|
-
USER_FIELDS,
|
|
98
|
-
USER_PLURAL_FIELDS,
|
|
99
|
-
USER_BOOLEAN_FIELDS
|
|
97
|
+
USER_FIELDS, USER_PLURAL_FIELDS, USER_BOOLEAN_FIELDS
|
|
100
98
|
)
|
|
101
99
|
|
|
102
100
|
__all__ = [
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
101
|
+
"transform_tweet_into_csv_dict",
|
|
102
|
+
"format_tweet_as_csv_row",
|
|
103
|
+
"transform_user_into_csv_dict",
|
|
104
|
+
"format_user_as_csv_row",
|
|
105
|
+
"apply_tcat_format",
|
|
108
106
|
]
|