twitwi 0.21.0__py3-none-any.whl → 0.21.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- twitwi/bluesky/normalizers.py +61 -16
- twitwi/bluesky/types.py +3 -3
- twitwi/bluesky/utils.py +8 -1
- twitwi/constants.py +0 -1
- twitwi/utils.py +6 -5
- {twitwi-0.21.0.dist-info → twitwi-0.21.1.dist-info}/METADATA +2 -1
- {twitwi-0.21.0.dist-info → twitwi-0.21.1.dist-info}/RECORD +11 -11
- {twitwi-0.21.0.dist-info → twitwi-0.21.1.dist-info}/WHEEL +0 -0
- {twitwi-0.21.0.dist-info → twitwi-0.21.1.dist-info}/licenses/LICENSE.txt +0 -0
- {twitwi-0.21.0.dist-info → twitwi-0.21.1.dist-info}/top_level.txt +0 -0
- {twitwi-0.21.0.dist-info → twitwi-0.21.1.dist-info}/zip-safe +0 -0
twitwi/bluesky/normalizers.py
CHANGED
|
@@ -15,6 +15,7 @@ from twitwi.bluesky.utils import (
|
|
|
15
15
|
format_post_url,
|
|
16
16
|
parse_post_url,
|
|
17
17
|
parse_post_uri,
|
|
18
|
+
format_starterpack_url,
|
|
18
19
|
format_media_url,
|
|
19
20
|
)
|
|
20
21
|
from twitwi.bluesky.types import BlueskyProfile, BlueskyPost
|
|
@@ -37,11 +38,11 @@ def normalize_profile(data: Dict, locale: Optional[str] = None) -> BlueskyProfil
|
|
|
37
38
|
"did": data["did"],
|
|
38
39
|
"url": format_profile_url(data["handle"]),
|
|
39
40
|
"handle": data["handle"],
|
|
40
|
-
"display_name": data
|
|
41
|
+
"display_name": data.get("displayName", ""),
|
|
41
42
|
"created_at": created_at,
|
|
42
43
|
"timestamp_utc": timestamp_utc,
|
|
43
44
|
"description": data["description"],
|
|
44
|
-
"avatar": data
|
|
45
|
+
"avatar": data.get("avatar", ""),
|
|
45
46
|
"posts": data["postsCount"],
|
|
46
47
|
"followers": data["followersCount"],
|
|
47
48
|
"follows": data["followsCount"],
|
|
@@ -55,8 +56,13 @@ def normalize_profile(data: Dict, locale: Optional[str] = None) -> BlueskyProfil
|
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
def prepare_native_gif_as_media(gif_data, user_did, source):
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
if "thumb" in gif_data:
|
|
60
|
+
media_cid = gif_data["thumb"]["ref"]["$link"]
|
|
61
|
+
_, thumbnail = format_media_url(user_did, media_cid, "image/jpeg", source)
|
|
62
|
+
else:
|
|
63
|
+
media_cid = ""
|
|
64
|
+
thumbnail = ""
|
|
65
|
+
|
|
60
66
|
return {
|
|
61
67
|
"id": media_cid,
|
|
62
68
|
"type": "video/gif",
|
|
@@ -81,6 +87,18 @@ def prepare_video_as_media(video_data):
|
|
|
81
87
|
}
|
|
82
88
|
|
|
83
89
|
|
|
90
|
+
def process_starterpack_card(embed_data, post):
|
|
91
|
+
# Warning: mutates post
|
|
92
|
+
|
|
93
|
+
card = embed_data.get("record", {})
|
|
94
|
+
creator_did, pack_did = parse_post_uri(embed_data["uri"])
|
|
95
|
+
post["card_link"] = format_starterpack_url(embed_data.get("creator", {}).get("handle") or creator_did, pack_did)
|
|
96
|
+
post["card_title"] = card.get("name", "")
|
|
97
|
+
post["card_description"] = card.get("description", "")
|
|
98
|
+
post["card_thumbnail"] = card.get("thumb", "")
|
|
99
|
+
return post
|
|
100
|
+
|
|
101
|
+
|
|
84
102
|
def process_card_data(embed_data, post):
|
|
85
103
|
# Warning: mutates post
|
|
86
104
|
|
|
@@ -122,10 +140,12 @@ def prepare_quote_data(embed_quote, card_data, post, links):
|
|
|
122
140
|
break
|
|
123
141
|
|
|
124
142
|
# Remove quoted link from post links
|
|
125
|
-
|
|
143
|
+
if post["quoted_url"] in links:
|
|
144
|
+
links.remove(post["quoted_url"])
|
|
126
145
|
|
|
127
146
|
# Extract user handle from url
|
|
128
|
-
|
|
147
|
+
if "did:plc:" not in post["quoted_url"]:
|
|
148
|
+
post["quoted_user_handle"], _ = parse_post_url(post["quoted_url"], post["url"])
|
|
129
149
|
|
|
130
150
|
return (post, quoted_data, links)
|
|
131
151
|
|
|
@@ -255,8 +275,8 @@ def normalize_post(
|
|
|
255
275
|
)
|
|
256
276
|
|
|
257
277
|
# Handle user metadata
|
|
258
|
-
post["user_diplay_name"] = data["author"]
|
|
259
|
-
post["user_avatar"] = data["author"]
|
|
278
|
+
post["user_diplay_name"] = data["author"].get("displayName", "")
|
|
279
|
+
post["user_avatar"] = data["author"].get("avatar", "")
|
|
260
280
|
post["user_timestamp_utc"], post["user_created_at"] = get_dates(
|
|
261
281
|
data["author"]["createdAt"], locale=locale, source="bluesky"
|
|
262
282
|
)
|
|
@@ -312,15 +332,32 @@ def normalize_post(
|
|
|
312
332
|
|
|
313
333
|
# Links
|
|
314
334
|
elif feat["$type"].endswith("#link"):
|
|
335
|
+
# Handle native polls
|
|
336
|
+
if "https://poll.blue/" in feat["uri"]:
|
|
337
|
+
if feat["uri"].endswith("/0"):
|
|
338
|
+
links.add(custom_normalize_url(feat["uri"]))
|
|
339
|
+
text += b" %s" % feat["uri"].encode("utf-8")
|
|
340
|
+
continue
|
|
341
|
+
|
|
315
342
|
links.add(custom_normalize_url(feat["uri"]))
|
|
343
|
+
# Check & fix occasional errored link positioning
|
|
344
|
+
# example: https://bsky.app/profile/ecrime.ch/post/3lqotmopayr23
|
|
345
|
+
byteStart = facet["index"]["byteStart"]
|
|
346
|
+
if b" " in text[byteStart : facet["index"]["byteEnd"]]:
|
|
347
|
+
byteStart = text.find(b"http", byteStart)
|
|
348
|
+
|
|
316
349
|
links_to_replace.append(
|
|
317
350
|
{
|
|
318
351
|
"uri": feat["uri"].encode("utf-8"),
|
|
319
|
-
"start":
|
|
320
|
-
"end": facet["index"]["byteEnd"],
|
|
352
|
+
"start": byteStart,
|
|
353
|
+
"end": byteStart - facet["index"]["byteStart"] + facet["index"]["byteEnd"],
|
|
321
354
|
}
|
|
322
355
|
)
|
|
323
356
|
|
|
357
|
+
elif feat["$type"].endswith("#bold"):
|
|
358
|
+
pass
|
|
359
|
+
elif feat["$type"].endswith("#option"):
|
|
360
|
+
pass
|
|
324
361
|
else:
|
|
325
362
|
raise BlueskyPayloadError(
|
|
326
363
|
post["url"], "unusual record facet feature $type: %s" % feat
|
|
@@ -329,7 +366,10 @@ def normalize_post(
|
|
|
329
366
|
|
|
330
367
|
# Rewrite full links within post's text
|
|
331
368
|
for link in sorted(links_to_replace, key=lambda x: x["start"], reverse=True):
|
|
332
|
-
|
|
369
|
+
if link["start"] < 0:
|
|
370
|
+
text = text + b" " + link["uri"]
|
|
371
|
+
else:
|
|
372
|
+
text = text[: link["start"]] + link["uri"] + text[link["end"] :]
|
|
333
373
|
|
|
334
374
|
# Handle thread info when applicable
|
|
335
375
|
# Unfortunately posts' payload only provide at uris for these so we do not have the handles
|
|
@@ -399,11 +439,16 @@ def normalize_post(
|
|
|
399
439
|
if embed["$type"].endswith(".video"):
|
|
400
440
|
media_data.append(prepare_video_as_media(embed["video"]))
|
|
401
441
|
|
|
402
|
-
# Quote
|
|
442
|
+
# Quote & Starter-packs
|
|
403
443
|
if embed["$type"].endswith(".record"):
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
444
|
+
if "app.bsky.graph.starterpack" in embed["record"]["uri"]:
|
|
445
|
+
post = process_starterpack_card(data.get("embed", {}).get("record"), post)
|
|
446
|
+
if post["card_link"]:
|
|
447
|
+
extra_links.append(post["card_link"])
|
|
448
|
+
else:
|
|
449
|
+
post, quoted_data, links = prepare_quote_data(
|
|
450
|
+
embed["record"], data.get("embed", {}).get("record"), post, links
|
|
451
|
+
)
|
|
407
452
|
|
|
408
453
|
# Quote with medias
|
|
409
454
|
if embed["$type"].endswith(".recordWithMedia"):
|
|
@@ -482,7 +527,7 @@ def normalize_post(
|
|
|
482
527
|
).encode("utf-8")
|
|
483
528
|
|
|
484
529
|
# Process quotes
|
|
485
|
-
if quoted_data:
|
|
530
|
+
if quoted_data and "value" in quoted_data:
|
|
486
531
|
if quoted_data["cid"] != post["quoted_cid"]:
|
|
487
532
|
raise BlueskyPayloadError(
|
|
488
533
|
post["url"],
|
twitwi/bluesky/types.py
CHANGED
|
@@ -9,7 +9,7 @@ class BlueskyProfile(TypedDict):
|
|
|
9
9
|
did: str # persistent long-term identifier of the account
|
|
10
10
|
url: str # URL of the profile accessible on the web
|
|
11
11
|
handle: str # updatable human-readable username of the account (usually like username.bsky.social or username.com)
|
|
12
|
-
display_name: str
|
|
12
|
+
display_name: Optional[str] # updatable human-readable name of the account
|
|
13
13
|
description: str # profile short description written by the user
|
|
14
14
|
posts: int # total number of posts submitted by the user (at collection time)
|
|
15
15
|
followers: int # total number of followers of the user (at collection time)
|
|
@@ -17,7 +17,7 @@ class BlueskyProfile(TypedDict):
|
|
|
17
17
|
lists: int # total number of lists created by the user (at collection time)
|
|
18
18
|
feedgens: int # total number of custom feeds created by the user (at collection time)
|
|
19
19
|
starter_packs: int # total number of starter packs created by the user (at collection time)
|
|
20
|
-
avatar: str
|
|
20
|
+
avatar: Optional[str] # URL to the image serving as avatar to the user
|
|
21
21
|
banner: str # URL to the image serving as profile banner to the user
|
|
22
22
|
pinned_post_uri: Optional[str] # ATProto's internal URI to the post potentially pinned by the user to appear at the top of his posts on his profile
|
|
23
23
|
created_at: str # datetime (potentially timezoned) of when the user created the account
|
|
@@ -63,7 +63,7 @@ class BlueskyPost(TypedDict):
|
|
|
63
63
|
# user_follows: int # not available from posts payloads
|
|
64
64
|
# user_lists: int # not available from posts payloads
|
|
65
65
|
user_langs: List[str] # languages in which the author of the posts usually writes posts (declarative)
|
|
66
|
-
user_avatar: str
|
|
66
|
+
user_avatar: Optional[str] # URL to the image serving as avatar to the user who authored the post
|
|
67
67
|
user_created_at: str # datetime (potentially timezoned) ofwhen the user who authored the post created the account
|
|
68
68
|
user_timestamp_utc: int # Unix UTC timestamp of when the user who authored the post created the account
|
|
69
69
|
|
twitwi/bluesky/utils.py
CHANGED
|
@@ -17,7 +17,7 @@ valid_post_keys = [
|
|
|
17
17
|
valid_record_keys = ["$type", "createdAt", "text"]
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
valid_author_keys = ["did", "handle", "
|
|
20
|
+
valid_author_keys = ["did", "handle", "createdAt"]
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def validate_post_payload(data):
|
|
@@ -81,6 +81,9 @@ def parse_post_url(url, source):
|
|
|
81
81
|
def parse_post_uri(uri, source=None):
|
|
82
82
|
"""Returns a tuple of (author_did, post_did) from an at:// post URI"""
|
|
83
83
|
|
|
84
|
+
if uri.startswith("at://") and "/app.bsky.graph.starterpack/" in uri:
|
|
85
|
+
return uri[5:].split("/app.bsky.graph.starterpack/")
|
|
86
|
+
|
|
84
87
|
if not uri.startswith("at://") and "/app.bsky.feed.post/" not in uri:
|
|
85
88
|
raise BlueskyPayloadError(
|
|
86
89
|
source or uri, f"{uri} is not a usual Bluesky post uri"
|
|
@@ -88,6 +91,10 @@ def parse_post_uri(uri, source=None):
|
|
|
88
91
|
return uri[5:].split("/app.bsky.feed.post/")
|
|
89
92
|
|
|
90
93
|
|
|
94
|
+
def format_starterpack_url(user_handle_or_did, record_did):
|
|
95
|
+
return f"https://bsky.app/starter-pack/{user_handle_or_did}/{record_did}"
|
|
96
|
+
|
|
97
|
+
|
|
91
98
|
def format_media_url(user_did, media_cid, mime_type, source):
|
|
92
99
|
media_type = mime_type.split("/")[1]
|
|
93
100
|
if mime_type.startswith("image"):
|
twitwi/constants.py
CHANGED
|
@@ -6,7 +6,6 @@
|
|
|
6
6
|
#
|
|
7
7
|
SOURCE_DATETIME_FORMAT = "%a %b %d %H:%M:%S +0000 %Y"
|
|
8
8
|
SOURCE_DATETIME_FORMAT_V2 = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
9
|
-
SOURCE_DATETIME_FORMAT_V3 = "%Y-%m-%dT%H:%M:%SZ"
|
|
10
9
|
FORMATTED_TWEET_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
|
|
11
10
|
|
|
12
11
|
FORMATTED_FULL_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
|
twitwi/utils.py
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
# Miscellaneous utility functions.
|
|
6
6
|
#
|
|
7
7
|
from pytz import timezone
|
|
8
|
+
from dateutil.parser import parse as parse_date
|
|
8
9
|
from ural import normalize_url, get_normalized_hostname
|
|
9
10
|
from functools import partial
|
|
10
11
|
from datetime import datetime
|
|
@@ -12,7 +13,6 @@ from datetime import datetime
|
|
|
12
13
|
from twitwi.constants import (
|
|
13
14
|
SOURCE_DATETIME_FORMAT,
|
|
14
15
|
SOURCE_DATETIME_FORMAT_V2,
|
|
15
|
-
SOURCE_DATETIME_FORMAT_V3,
|
|
16
16
|
FORMATTED_TWEET_DATETIME_FORMAT,
|
|
17
17
|
FORMATTED_FULL_DATETIME_FORMAT,
|
|
18
18
|
CANONICAL_URL_KWARGS,
|
|
@@ -47,12 +47,13 @@ def get_dates(date_str, locale=None, source="v1"):
|
|
|
47
47
|
SOURCE_DATETIME_FORMAT if source == "v1" else SOURCE_DATETIME_FORMAT_V2,
|
|
48
48
|
)
|
|
49
49
|
except ValueError as e:
|
|
50
|
-
if source
|
|
51
|
-
parsed_datetime = datetime.strptime(date_str, SOURCE_DATETIME_FORMAT_V3)
|
|
52
|
-
else:
|
|
50
|
+
if source != "bluesky":
|
|
53
51
|
raise e
|
|
52
|
+
parsed_datetime = parse_date(date_str)
|
|
54
53
|
|
|
55
|
-
utc_datetime =
|
|
54
|
+
utc_datetime = parsed_datetime
|
|
55
|
+
if not parsed_datetime.tzinfo:
|
|
56
|
+
utc_datetime = UTC_TIMEZONE.localize(parsed_datetime)
|
|
56
57
|
locale_datetime = utc_datetime.astimezone(locale)
|
|
57
58
|
|
|
58
59
|
return (
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: twitwi
|
|
3
|
-
Version: 0.21.
|
|
3
|
+
Version: 0.21.1
|
|
4
4
|
Summary: A collection of Twitter-related helper functions for python.
|
|
5
5
|
Home-page: http://github.com/medialab/twitwi
|
|
6
6
|
Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
|
|
@@ -12,6 +12,7 @@ Description-Content-Type: text/markdown
|
|
|
12
12
|
License-File: LICENSE.txt
|
|
13
13
|
Requires-Dist: pytz>=2019.3
|
|
14
14
|
Requires-Dist: ural>=0.31.1
|
|
15
|
+
Requires-Dist: python-dateutil>=2.9.0
|
|
15
16
|
Dynamic: author
|
|
16
17
|
Dynamic: author-email
|
|
17
18
|
Dynamic: description
|
|
@@ -3,20 +3,20 @@ test/bluesky/formatters_test.py,sha256=PUEFGdp6fUpNhiWaoEC1-CXfX9L3jPDYcAjoOZ_Di
|
|
|
3
3
|
test/bluesky/normalizers_test.py,sha256=hYIAM53RYHxxw1YsJ9kJMn8TljnttpczqRpYHyZezAo,4281
|
|
4
4
|
twitwi/__init__.py,sha256=y0bAx9gE3THtlWE1YpXDIhGwqJ5_I8DCStWyyiiXJkw,1095
|
|
5
5
|
twitwi/anonymizers.py,sha256=nkl6HL1BWLz00wJ060XSbqjN5JF8pvcpEPnRXt70TUY,1588
|
|
6
|
-
twitwi/constants.py,sha256=
|
|
6
|
+
twitwi/constants.py,sha256=fvqCngJIGyz5CpdVWbcAfjmE3_kvcx9giN0rEljL7OU,16001
|
|
7
7
|
twitwi/exceptions.py,sha256=OCIDagu2ErDyOGWunRBCK3O62TnzFpIMQ9gS8l9EALQ,696
|
|
8
8
|
twitwi/formatters.py,sha256=yn14AsrGAUw8rShOnYJvoMbzdWpfTeSs0P0ZPNTwhLU,3142
|
|
9
9
|
twitwi/normalizers.py,sha256=CWUK-XwhcEjLDjWH_qb6E03WZKsbIcwiRAVUjwXKQho,28438
|
|
10
|
-
twitwi/utils.py,sha256=
|
|
10
|
+
twitwi/utils.py,sha256=8BiUrkzeydTh4a-rcFW0IHhSNdxlvCXaEpHZGVROl3A,3090
|
|
11
11
|
twitwi/bluesky/__init__.py,sha256=7ITVm1bWE9p0HCWcUs2iOxwKQXK8NEzrvp9hXaicrp0,445
|
|
12
12
|
twitwi/bluesky/constants.py,sha256=CSxNZGYlZvlZ0IMYpP8tVTO6AnzoTmGDFh185hs8n88,473
|
|
13
13
|
twitwi/bluesky/formatters.py,sha256=r8MKOpU8z024z9DQ8-tlKjzaB92T1Su2IjcFV5hjfXg,731
|
|
14
|
-
twitwi/bluesky/normalizers.py,sha256=
|
|
15
|
-
twitwi/bluesky/types.py,sha256=
|
|
16
|
-
twitwi/bluesky/utils.py,sha256=
|
|
17
|
-
twitwi-0.21.
|
|
18
|
-
twitwi-0.21.
|
|
19
|
-
twitwi-0.21.
|
|
20
|
-
twitwi-0.21.
|
|
21
|
-
twitwi-0.21.
|
|
22
|
-
twitwi-0.21.
|
|
14
|
+
twitwi/bluesky/normalizers.py,sha256=LpxWgwrIG1TsjL-6nEG8U_FsMKh_BQHuXRrAJdu9lEA,26318
|
|
15
|
+
twitwi/bluesky/types.py,sha256=TaOnAbLPKJqWumLybKmDDh46FLb9WXzqOPyDQU8RHrI,12288
|
|
16
|
+
twitwi/bluesky/utils.py,sha256=KQd0YVWZHcCRcqnX5A1aeKdavwocN6Bsi8pCnHqRgXc,3486
|
|
17
|
+
twitwi-0.21.1.dist-info/licenses/LICENSE.txt,sha256=Ddg_PcGnl0qd2167o2dheCjE_rCZJOoBxjJnJhhOpX4,1099
|
|
18
|
+
twitwi-0.21.1.dist-info/METADATA,sha256=ruJ7MYnoR-xAoMFKf8RWYSri-jqvf8ulW-2fquLjcys,18017
|
|
19
|
+
twitwi-0.21.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
twitwi-0.21.1.dist-info/top_level.txt,sha256=TaKyGU7j_EVbP5KI0UD6qjbaKv2Qn0OrkfUQ29a04kg,12
|
|
21
|
+
twitwi-0.21.1.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
22
|
+
twitwi-0.21.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|