twitwi 0.21.0__tar.gz → 0.21.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {twitwi-0.21.0/twitwi.egg-info → twitwi-0.21.1}/PKG-INFO +2 -1
  2. {twitwi-0.21.0 → twitwi-0.21.1}/setup.py +2 -2
  3. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/bluesky/normalizers.py +61 -16
  4. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/bluesky/types.py +3 -3
  5. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/bluesky/utils.py +8 -1
  6. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/constants.py +0 -1
  7. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/utils.py +6 -5
  8. {twitwi-0.21.0 → twitwi-0.21.1/twitwi.egg-info}/PKG-INFO +2 -1
  9. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi.egg-info/requires.txt +1 -0
  10. {twitwi-0.21.0 → twitwi-0.21.1}/LICENSE.txt +0 -0
  11. {twitwi-0.21.0 → twitwi-0.21.1}/README.md +0 -0
  12. {twitwi-0.21.0 → twitwi-0.21.1}/setup.cfg +0 -0
  13. {twitwi-0.21.0 → twitwi-0.21.1}/test/bluesky/__init__.py +0 -0
  14. {twitwi-0.21.0 → twitwi-0.21.1}/test/bluesky/formatters_test.py +0 -0
  15. {twitwi-0.21.0 → twitwi-0.21.1}/test/bluesky/normalizers_test.py +0 -0
  16. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/__init__.py +0 -0
  17. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/anonymizers.py +0 -0
  18. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/bluesky/__init__.py +0 -0
  19. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/bluesky/constants.py +0 -0
  20. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/bluesky/formatters.py +0 -0
  21. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/exceptions.py +0 -0
  22. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/formatters.py +0 -0
  23. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi/normalizers.py +0 -0
  24. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi.egg-info/SOURCES.txt +0 -0
  25. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi.egg-info/dependency_links.txt +0 -0
  26. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi.egg-info/top_level.txt +0 -0
  27. {twitwi-0.21.0 → twitwi-0.21.1}/twitwi.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: twitwi
3
- Version: 0.21.0
3
+ Version: 0.21.1
4
4
  Summary: A collection of Twitter-related helper functions for python.
5
5
  Home-page: http://github.com/medialab/twitwi
6
6
  Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
@@ -12,6 +12,7 @@ Description-Content-Type: text/markdown
12
12
  License-File: LICENSE.txt
13
13
  Requires-Dist: pytz>=2019.3
14
14
  Requires-Dist: ural>=0.31.1
15
+ Requires-Dist: python-dateutil>=2.9.0
15
16
  Dynamic: author
16
17
  Dynamic: author-email
17
18
  Dynamic: description
@@ -5,7 +5,7 @@ with open("./README.md", "r") as f:
5
5
 
6
6
  setup(
7
7
  name="twitwi",
8
- version="0.21.0",
8
+ version="0.21.1",
9
9
  description="A collection of Twitter-related helper functions for python.",
10
10
  long_description=long_description,
11
11
  long_description_content_type="text/markdown",
@@ -17,6 +17,6 @@ setup(
17
17
  python_requires=">=3.8",
18
18
  packages=find_packages(exclude=["scripts", "test"]),
19
19
  package_data={"docs": ["README.md"]},
20
- install_requires=["pytz>=2019.3", "ural>=0.31.1"],
20
+ install_requires=["pytz>=2019.3", "ural>=0.31.1", "python-dateutil>=2.9.0"],
21
21
  zip_safe=True,
22
22
  )
@@ -15,6 +15,7 @@ from twitwi.bluesky.utils import (
15
15
  format_post_url,
16
16
  parse_post_url,
17
17
  parse_post_uri,
18
+ format_starterpack_url,
18
19
  format_media_url,
19
20
  )
20
21
  from twitwi.bluesky.types import BlueskyProfile, BlueskyPost
@@ -37,11 +38,11 @@ def normalize_profile(data: Dict, locale: Optional[str] = None) -> BlueskyProfil
37
38
  "did": data["did"],
38
39
  "url": format_profile_url(data["handle"]),
39
40
  "handle": data["handle"],
40
- "display_name": data["displayName"],
41
+ "display_name": data.get("displayName", ""),
41
42
  "created_at": created_at,
42
43
  "timestamp_utc": timestamp_utc,
43
44
  "description": data["description"],
44
- "avatar": data["avatar"],
45
+ "avatar": data.get("avatar", ""),
45
46
  "posts": data["postsCount"],
46
47
  "followers": data["followersCount"],
47
48
  "follows": data["followsCount"],
@@ -55,8 +56,13 @@ def normalize_profile(data: Dict, locale: Optional[str] = None) -> BlueskyProfil
55
56
 
56
57
 
57
58
  def prepare_native_gif_as_media(gif_data, user_did, source):
58
- media_cid = gif_data["thumb"]["ref"]["$link"]
59
- _, thumbnail = format_media_url(user_did, media_cid, "image/jpeg", source)
59
+ if "thumb" in gif_data:
60
+ media_cid = gif_data["thumb"]["ref"]["$link"]
61
+ _, thumbnail = format_media_url(user_did, media_cid, "image/jpeg", source)
62
+ else:
63
+ media_cid = ""
64
+ thumbnail = ""
65
+
60
66
  return {
61
67
  "id": media_cid,
62
68
  "type": "video/gif",
@@ -81,6 +87,18 @@ def prepare_video_as_media(video_data):
81
87
  }
82
88
 
83
89
 
90
+ def process_starterpack_card(embed_data, post):
91
+ # Warning: mutates post
92
+
93
+ card = embed_data.get("record", {})
94
+ creator_did, pack_did = parse_post_uri(embed_data["uri"])
95
+ post["card_link"] = format_starterpack_url(embed_data.get("creator", {}).get("handle") or creator_did, pack_did)
96
+ post["card_title"] = card.get("name", "")
97
+ post["card_description"] = card.get("description", "")
98
+ post["card_thumbnail"] = card.get("thumb", "")
99
+ return post
100
+
101
+
84
102
  def process_card_data(embed_data, post):
85
103
  # Warning: mutates post
86
104
 
@@ -122,10 +140,12 @@ def prepare_quote_data(embed_quote, card_data, post, links):
122
140
  break
123
141
 
124
142
  # Remove quoted link from post links
125
- links.remove(post["quoted_url"])
143
+ if post["quoted_url"] in links:
144
+ links.remove(post["quoted_url"])
126
145
 
127
146
  # Extract user handle from url
128
- post["quoted_user_handle"], _ = parse_post_url(post["quoted_url"], post["url"])
147
+ if "did:plc:" not in post["quoted_url"]:
148
+ post["quoted_user_handle"], _ = parse_post_url(post["quoted_url"], post["url"])
129
149
 
130
150
  return (post, quoted_data, links)
131
151
 
@@ -255,8 +275,8 @@ def normalize_post(
255
275
  )
256
276
 
257
277
  # Handle user metadata
258
- post["user_diplay_name"] = data["author"]["displayName"]
259
- post["user_avatar"] = data["author"]["avatar"]
278
+ post["user_diplay_name"] = data["author"].get("displayName", "")
279
+ post["user_avatar"] = data["author"].get("avatar", "")
260
280
  post["user_timestamp_utc"], post["user_created_at"] = get_dates(
261
281
  data["author"]["createdAt"], locale=locale, source="bluesky"
262
282
  )
@@ -312,15 +332,32 @@ def normalize_post(
312
332
 
313
333
  # Links
314
334
  elif feat["$type"].endswith("#link"):
335
+ # Handle native polls
336
+ if "https://poll.blue/" in feat["uri"]:
337
+ if feat["uri"].endswith("/0"):
338
+ links.add(custom_normalize_url(feat["uri"]))
339
+ text += b" %s" % feat["uri"].encode("utf-8")
340
+ continue
341
+
315
342
  links.add(custom_normalize_url(feat["uri"]))
343
+ # Check & fix occasional errored link positioning
344
+ # example: https://bsky.app/profile/ecrime.ch/post/3lqotmopayr23
345
+ byteStart = facet["index"]["byteStart"]
346
+ if b" " in text[byteStart : facet["index"]["byteEnd"]]:
347
+ byteStart = text.find(b"http", byteStart)
348
+
316
349
  links_to_replace.append(
317
350
  {
318
351
  "uri": feat["uri"].encode("utf-8"),
319
- "start": facet["index"]["byteStart"],
320
- "end": facet["index"]["byteEnd"],
352
+ "start": byteStart,
353
+ "end": byteStart - facet["index"]["byteStart"] + facet["index"]["byteEnd"],
321
354
  }
322
355
  )
323
356
 
357
+ elif feat["$type"].endswith("#bold"):
358
+ pass
359
+ elif feat["$type"].endswith("#option"):
360
+ pass
324
361
  else:
325
362
  raise BlueskyPayloadError(
326
363
  post["url"], "unusual record facet feature $type: %s" % feat
@@ -329,7 +366,10 @@ def normalize_post(
329
366
 
330
367
  # Rewrite full links within post's text
331
368
  for link in sorted(links_to_replace, key=lambda x: x["start"], reverse=True):
332
- text = text[: link["start"]] + link["uri"] + text[link["end"] :]
369
+ if link["start"] < 0:
370
+ text = text + b" " + link["uri"]
371
+ else:
372
+ text = text[: link["start"]] + link["uri"] + text[link["end"] :]
333
373
 
334
374
  # Handle thread info when applicable
335
375
  # Unfortunately posts' payload only provide at uris for these so we do not have the handles
@@ -399,11 +439,16 @@ def normalize_post(
399
439
  if embed["$type"].endswith(".video"):
400
440
  media_data.append(prepare_video_as_media(embed["video"]))
401
441
 
402
- # Quote
442
+ # Quote & Starter-packs
403
443
  if embed["$type"].endswith(".record"):
404
- post, quoted_data, links = prepare_quote_data(
405
- embed["record"], data.get("embed", {}).get("record"), post, links
406
- )
444
+ if "app.bsky.graph.starterpack" in embed["record"]["uri"]:
445
+ post = process_starterpack_card(data.get("embed", {}).get("record"), post)
446
+ if post["card_link"]:
447
+ extra_links.append(post["card_link"])
448
+ else:
449
+ post, quoted_data, links = prepare_quote_data(
450
+ embed["record"], data.get("embed", {}).get("record"), post, links
451
+ )
407
452
 
408
453
  # Quote with medias
409
454
  if embed["$type"].endswith(".recordWithMedia"):
@@ -482,7 +527,7 @@ def normalize_post(
482
527
  ).encode("utf-8")
483
528
 
484
529
  # Process quotes
485
- if quoted_data:
530
+ if quoted_data and "value" in quoted_data:
486
531
  if quoted_data["cid"] != post["quoted_cid"]:
487
532
  raise BlueskyPayloadError(
488
533
  post["url"],
@@ -9,7 +9,7 @@ class BlueskyProfile(TypedDict):
9
9
  did: str # persistent long-term identifier of the account
10
10
  url: str # URL of the profile accessible on the web
11
11
  handle: str # updatable human-readable username of the account (usually like username.bsky.social or username.com)
12
- display_name: str # updatable human-readable name of the account
12
+ display_name: Optional[str] # updatable human-readable name of the account
13
13
  description: str # profile short description written by the user
14
14
  posts: int # total number of posts submitted by the user (at collection time)
15
15
  followers: int # total number of followers of the user (at collection time)
@@ -17,7 +17,7 @@ class BlueskyProfile(TypedDict):
17
17
  lists: int # total number of lists created by the user (at collection time)
18
18
  feedgens: int # total number of custom feeds created by the user (at collection time)
19
19
  starter_packs: int # total number of starter packs created by the user (at collection time)
20
- avatar: str # URL to the image serving as avatar to the user
20
+ avatar: Optional[str] # URL to the image serving as avatar to the user
21
21
  banner: str # URL to the image serving as profile banner to the user
22
22
  pinned_post_uri: Optional[str] # ATProto's internal URI to the post potentially pinned by the user to appear at the top of his posts on his profile
23
23
  created_at: str # datetime (potentially timezoned) of when the user created the account
@@ -63,7 +63,7 @@ class BlueskyPost(TypedDict):
63
63
  # user_follows: int # not available from posts payloads
64
64
  # user_lists: int # not available from posts payloads
65
65
  user_langs: List[str] # languages in which the author of the posts usually writes posts (declarative)
66
- user_avatar: str # URL to the image serving as avatar to the user who authored the post
66
+ user_avatar: Optional[str] # URL to the image serving as avatar to the user who authored the post
67
67
  user_created_at: str # datetime (potentially timezoned) ofwhen the user who authored the post created the account
68
68
  user_timestamp_utc: int # Unix UTC timestamp of when the user who authored the post created the account
69
69
 
@@ -17,7 +17,7 @@ valid_post_keys = [
17
17
  valid_record_keys = ["$type", "createdAt", "text"]
18
18
 
19
19
 
20
- valid_author_keys = ["did", "handle", "displayName", "avatar", "createdAt"]
20
+ valid_author_keys = ["did", "handle", "createdAt"]
21
21
 
22
22
 
23
23
  def validate_post_payload(data):
@@ -81,6 +81,9 @@ def parse_post_url(url, source):
81
81
  def parse_post_uri(uri, source=None):
82
82
  """Returns a tuple of (author_did, post_did) from an at:// post URI"""
83
83
 
84
+ if uri.startswith("at://") and "/app.bsky.graph.starterpack/" in uri:
85
+ return uri[5:].split("/app.bsky.graph.starterpack/")
86
+
84
87
  if not uri.startswith("at://") and "/app.bsky.feed.post/" not in uri:
85
88
  raise BlueskyPayloadError(
86
89
  source or uri, f"{uri} is not a usual Bluesky post uri"
@@ -88,6 +91,10 @@ def parse_post_uri(uri, source=None):
88
91
  return uri[5:].split("/app.bsky.feed.post/")
89
92
 
90
93
 
94
+ def format_starterpack_url(user_handle_or_did, record_did):
95
+ return f"https://bsky.app/starter-pack/{user_handle_or_did}/{record_did}"
96
+
97
+
91
98
  def format_media_url(user_did, media_cid, mime_type, source):
92
99
  media_type = mime_type.split("/")[1]
93
100
  if mime_type.startswith("image"):
@@ -6,7 +6,6 @@
6
6
  #
7
7
  SOURCE_DATETIME_FORMAT = "%a %b %d %H:%M:%S +0000 %Y"
8
8
  SOURCE_DATETIME_FORMAT_V2 = "%Y-%m-%dT%H:%M:%S.%fZ"
9
- SOURCE_DATETIME_FORMAT_V3 = "%Y-%m-%dT%H:%M:%SZ"
10
9
  FORMATTED_TWEET_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
11
10
 
12
11
  FORMATTED_FULL_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%f"
@@ -5,6 +5,7 @@
5
5
  # Miscellaneous utility functions.
6
6
  #
7
7
  from pytz import timezone
8
+ from dateutil.parser import parse as parse_date
8
9
  from ural import normalize_url, get_normalized_hostname
9
10
  from functools import partial
10
11
  from datetime import datetime
@@ -12,7 +13,6 @@ from datetime import datetime
12
13
  from twitwi.constants import (
13
14
  SOURCE_DATETIME_FORMAT,
14
15
  SOURCE_DATETIME_FORMAT_V2,
15
- SOURCE_DATETIME_FORMAT_V3,
16
16
  FORMATTED_TWEET_DATETIME_FORMAT,
17
17
  FORMATTED_FULL_DATETIME_FORMAT,
18
18
  CANONICAL_URL_KWARGS,
@@ -47,12 +47,13 @@ def get_dates(date_str, locale=None, source="v1"):
47
47
  SOURCE_DATETIME_FORMAT if source == "v1" else SOURCE_DATETIME_FORMAT_V2,
48
48
  )
49
49
  except ValueError as e:
50
- if source == "bluesky":
51
- parsed_datetime = datetime.strptime(date_str, SOURCE_DATETIME_FORMAT_V3)
52
- else:
50
+ if source != "bluesky":
53
51
  raise e
52
+ parsed_datetime = parse_date(date_str)
54
53
 
55
- utc_datetime = UTC_TIMEZONE.localize(parsed_datetime)
54
+ utc_datetime = parsed_datetime
55
+ if not parsed_datetime.tzinfo:
56
+ utc_datetime = UTC_TIMEZONE.localize(parsed_datetime)
56
57
  locale_datetime = utc_datetime.astimezone(locale)
57
58
 
58
59
  return (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: twitwi
3
- Version: 0.21.0
3
+ Version: 0.21.1
4
4
  Summary: A collection of Twitter-related helper functions for python.
5
5
  Home-page: http://github.com/medialab/twitwi
6
6
  Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
@@ -12,6 +12,7 @@ Description-Content-Type: text/markdown
12
12
  License-File: LICENSE.txt
13
13
  Requires-Dist: pytz>=2019.3
14
14
  Requires-Dist: ural>=0.31.1
15
+ Requires-Dist: python-dateutil>=2.9.0
15
16
  Dynamic: author
16
17
  Dynamic: author-email
17
18
  Dynamic: description
@@ -1,2 +1,3 @@
1
1
  pytz>=2019.3
2
2
  ural>=0.31.1
3
+ python-dateutil>=2.9.0
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes