PyPI - twitwi - Versions diffs - 0.21.1__tar.gz → 0.21.2__tar.gz - Mend

twitwi 0.21.1tar.gz → 0.21.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{twitwi-0.21.1/twitwi.egg-info → twitwi-0.21.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: twitwi
-Version: 0.21.1
+Version: 0.21.2
 Summary: A collection of Twitter-related helper functions for python.
 Home-page: http://github.com/medialab/twitwi
 Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
@@ -95,7 +95,7 @@ for post_data in posts_payload_from_API:
 # Then, saving normalized profiles into a CSV using DictWriter:
-from csv import DictWriter
+import csv
 from twitwi.bluesky.constants import POST_FIELDS
 from twitwi.bluesky import transform_post_into_csv_dict
@@ -108,7 +108,6 @@ with open("normalized_bluesky_posts.csv", "w") as f:
 # Or using the basic CSV writer:
-from csv import writer
 from twitwi.bluesky import format_post_as_csv_row
 with open("normalized_bluesky_posts.csv", "w") as f:

{twitwi-0.21.1 → twitwi-0.21.2}/README.md RENAMED Viewed

@@ -68,7 +68,7 @@ for post_data in posts_payload_from_API:
 # Then, saving normalized profiles into a CSV using DictWriter:
-from csv import DictWriter
+import csv
 from twitwi.bluesky.constants import POST_FIELDS
 from twitwi.bluesky import transform_post_into_csv_dict
@@ -81,7 +81,6 @@ with open("normalized_bluesky_posts.csv", "w") as f:
 # Or using the basic CSV writer:
-from csv import writer
 from twitwi.bluesky import format_post_as_csv_row
 with open("normalized_bluesky_posts.csv", "w") as f:

{twitwi-0.21.1 → twitwi-0.21.2}/setup.py RENAMED Viewed

@@ -5,7 +5,7 @@ with open("./README.md", "r") as f:
 setup(
     name="twitwi",
-    version="0.21.1",
+    version="0.21.2",
     description="A collection of Twitter-related helper functions for python.",
     long_description=long_description,
     long_description_content_type="text/markdown",

{twitwi-0.21.1 → twitwi-0.21.2}/test/bluesky/normalizers_test.py RENAMED Viewed

@@ -15,6 +15,8 @@ OVERWRITE_TESTS = False
 FAKE_COLLECTION_TIME = "2025-01-01T00:00:00.000000"
 def set_fake_collection_time(dico):
     if "collection_time" in dico:
         dico["collection_time"] = FAKE_COLLECTION_TIME
@@ -47,7 +49,9 @@ class TestNormalizers:
         if OVERWRITE_TESTS:
             from test.utils import dump_json_resource
-            normalized_profiles = [set_fake_collection_time(fn(profile)) for profile in profiles]
+            normalized_profiles = [
+                set_fake_collection_time(fn(profile)) for profile in profiles
+            ]
             dump_json_resource(normalized_profiles, "bluesky-normalized-profiles.json")
         expected = get_json_resource("bluesky-normalized-profiles.json")
@@ -79,7 +83,13 @@ class TestNormalizers:
         if OVERWRITE_TESTS:
             from test.utils import dump_json_resource
-            normalized_posts = [[set_fake_collection_time(p) for p in fn(post, extract_referenced_posts=True)] for post in posts]
+            normalized_posts = [
+                [
+                    set_fake_collection_time(p)
+                    for p in fn(post, extract_referenced_posts=True)
+                ]
+                for post in posts
+            ]
             dump_json_resource(normalized_posts, "bluesky-normalized-posts.json")
         expected = get_json_resource("bluesky-normalized-posts.json")

{twitwi-0.21.1 → twitwi-0.21.2}/twitwi/bluesky/normalizers.py RENAMED Viewed

@@ -73,8 +73,12 @@ def prepare_native_gif_as_media(gif_data, user_did, source):
 def prepare_image_as_media(image_data):
+    if "ref" not in image_data["image"] or "$link" not in image_data["image"]["ref"]:
+        image_id = image_data["image"]["cid"]
+    else:
+        image_id = image_data["image"]["ref"]["$link"]
     return {
-        "id": image_data["image"]["ref"]["$link"],
+        "id": image_id,
         "type": image_data["image"]["mimeType"],
         "alt": image_data["alt"],
     }
@@ -92,7 +96,9 @@ def process_starterpack_card(embed_data, post):
     card = embed_data.get("record", {})
     creator_did, pack_did = parse_post_uri(embed_data["uri"])
-    post["card_link"] = format_starterpack_url(embed_data.get("creator", {}).get("handle") or creator_did, pack_did)
+    post["card_link"] = format_starterpack_url(
+        embed_data.get("creator", {}).get("handle") or creator_did, pack_did
+    )
     post["card_title"] = card.get("name", "")
     post["card_description"] = card.get("description", "")
     post["card_thumbnail"] = card.get("thumb", "")
@@ -145,7 +151,9 @@ def prepare_quote_data(embed_quote, card_data, post, links):
         # Extract user handle from url
         if "did:plc:" not in post["quoted_url"]:
-            post["quoted_user_handle"], _ = parse_post_url(post["quoted_url"], post["url"])
+            post["quoted_user_handle"], _ = parse_post_url(
+                post["quoted_url"], post["url"]
+            )
     return (post, quoted_data, links)
@@ -176,7 +184,7 @@ def merge_nested_posts(referenced_posts, nested, source):
 @overload
 def normalize_post(
-    data: Dict,
+    payload: Dict,
     locale: Optional[str] = ...,
     extract_referenced_posts: Literal[True] = ...,
     collection_source: Optional[str] = ...,
@@ -185,7 +193,7 @@ def normalize_post(
 @overload
 def normalize_post(
-    data: Dict,
+    payload: Dict,
     locale: Optional[str] = ...,
     extract_referenced_posts: Literal[False] = ...,
     collection_source: Optional[str] = ...,
@@ -308,7 +316,7 @@ def normalize_post(
         feat = facet["features"][0]
         # Hashtags
-        if feat["$type"].endswith("#tag"):
+        if feat["$type"].endswith("#tag") or feat["$type"].endswith("#hashtag"):
             hashtags.add(feat["tag"].strip().lower())
         # Mentions
@@ -323,7 +331,11 @@ def normalize_post(
                     byteStart = text.find(b"@", byteStart)
                 handle = (
-                    text[byteStart + 1 : facet["index"]["byteEnd"] + byteStart - facet["index"]["byteStart"]]
+                    text[
+                        byteStart + 1 : facet["index"]["byteEnd"]
+                        + byteStart
+                        - facet["index"]["byteStart"]
+                    ]
                     .strip()
                     .lower()
                     .decode("utf-8")
@@ -350,7 +362,9 @@ def normalize_post(
                 {
                     "uri": feat["uri"].encode("utf-8"),
                     "start": byteStart,
-                    "end": byteStart - facet["index"]["byteStart"] + facet["index"]["byteEnd"],
+                    "end": byteStart
+                    - facet["index"]["byteStart"]
+                    + facet["index"]["byteEnd"],
                 }
             )
@@ -442,7 +456,9 @@ def normalize_post(
         # Quote & Starter-packs
         if embed["$type"].endswith(".record"):
             if "app.bsky.graph.starterpack" in embed["record"]["uri"]:
-                post = process_starterpack_card(data.get("embed", {}).get("record"), post)
+                post = process_starterpack_card(
+                    data.get("embed", {}).get("record"), post
+                )
                 if post["card_link"]:
                     extra_links.append(post["card_link"])
             else:
@@ -523,7 +539,10 @@ def normalize_post(
                 # Rewrite post's text to include links to medias within
                 text += b" " + (
-                    media_thumb if media_type.startswith("video") and not media_type.endswith("/gif") else media_url
+                    media_thumb
+                    if media_type.startswith("video")
+                    and not media_type.endswith("/gif")
+                    else media_url
                 ).encode("utf-8")
         # Process quotes

{twitwi-0.21.1 → twitwi-0.21.2}/twitwi/bluesky/types.py RENAMED Viewed

@@ -64,7 +64,7 @@ class BlueskyPost(TypedDict):
     # user_lists: int                   # not available from posts payloads
     user_langs: List[str]               # languages in which the author of the posts usually writes posts (declarative)
     user_avatar: Optional[str]          # URL to the image serving as avatar to the user who authored the post
-    user_created_at: str                # datetime (potentially timezoned) ofwhen the user who authored the post created the account
+    user_created_at: str                # datetime (potentially timezoned) of when the user who authored the post created the account
     user_timestamp_utc: int             # Unix UTC timestamp of when the user who authored the post created the account
     # Parent post identifying fields
@@ -102,27 +102,27 @@ class BlueskyPost(TypedDict):
     quoted_user_handle: Optional[str]   # updatable human-readable username of the account who authored the quoted post
     quoted_created_at: Optional[int]    # datetime (potentially timezoned) of when the quoted post was submitted
     quoted_timestamp_utc: Optional[int] # Unix UTC timestamp of when the quoted post was submitted
-    quoted_status: Optional[str]        # empty or "detached" when the author of the quoted post intentionnally required the quoting post not to be accessible from their own
+    quoted_status: Optional[str]        # empty or "detached" when the author of the quoted post intentionnally required the quoting post not to appear in the list of this post's quotes
     # Embedded elements metadata fields
     links: List[str]                    # list of URLs of all links shared within the post (including potentially the embedded card detailed below, but not the link to a potential quoted post)
-    domains: List[str]                  # list of domains of the links shared within the post (here a domain refer to a full hostname, including subdomains, for instance bluesky.com or medialab.sciencespo.fr)
+    domains: List[str]                  # list of domains of the links shared within the post (here a domain refers to a full hostname, including subdomains, for instance bluesky.com or medialab.sciencespo.fr)
     card_link: Optional[str]            # URL of the link displayed as a card within the post if any
     card_title: Optional[str]           # title of the webpage corresponding to the linkg diplayed as a card within the post if any
     card_description: Optional[str]     # description of the webpage corresponding to the linkg diplayed as a card within the post if any
-    card_thumbnail: Optional[str]       # image displayed as an illustration of the webpage corresponding to the linkg diplayed as a card within the post if any
-    media_urls: List[str]               # list of URLs to all medias (images, videos, gifs) embedded in the post
-    media_thumbnails: List[str]         # list of URLs to small thumbnail version of all medias (images, videos, gifs) embedded in the post
-    media_types: List[str]              # MIME types (such as image/jpeg, image/gif, video/mp4, etc.) of all medias (images, videos, gifs) embedded in the post
-    media_alt_texts: List[str]          # description texts of all medias (images, videos, gifs) embedded in the post
-    mentioned_user_dids: List[str]      # list of all persistent long-term identifier of the accounts adressed within the post (does not include users to which the post replied)
-    mentioned_user_handles: List[str]   # list of all updatable human-readable username of the accounts adressed within the post (does not include users to which the post replied)
+    card_thumbnail: Optional[str]       # image displayed as an illustration of the webpage corresponding to the link diplayed as a card within the post if any
+    media_urls: List[str]               # list of URLs to all media (images, videos, gifs) embedded in the post
+    media_thumbnails: List[str]         # list of URLs to small thumbnail version of all media (images, videos, gifs) embedded in the post
+    media_types: List[str]              # MIME types (such as image/jpeg, image/gif, video/mp4, etc.) of all media (images, videos, gifs) embedded in the post
+    media_alt_texts: List[str]          # description texts of all media (images, videos, gifs) embedded in the post
+    mentioned_user_dids: List[str]      # list of all persistent long-term identifiers of the accounts adressed within the post (does not include users to which the post replied)
+    mentioned_user_handles: List[str]   # list of all updatable human-readable usernames of the accounts adressed within the post (does not include users to which the post replied)
     hashtags: List[str]                 # list of all unique lowercased hashtags found within the post's text
     # Conversation rules fields
     replies_rules: Optional[List[str]]          # list of specific conversation rules set by the author for the current post (can be one or a combination of: disallow, allow_from_follower, allow_from_following, allow_from_mention, or allow_from_list: followed by a list of user DIDs)
     replies_rules_created_at: Optional[str]     # datetime (potentially timezoned) of when the user set the replies_rules
-    replies_rules_timestamp_utc: Optional[int]  # Unix UTC timestamp of when the userset the replies_rules
+    replies_rules_timestamp_utc: Optional[int]  # Unix UTC timestamp of when the user set the replies_rules
     hidden_replies_uris: Optional[List[str]]    # list of ATProto's internal URIs to posts who replied to the post, but where intentionnally marked as hidden by the current post's author
     # quotes_rule: Optional[str]                # not available from posts payloads, cf https://github.com/bluesky-social/atproto/issues/3712
     # quotes_rules_created_at: Optional[str]    # not available from posts payloads, cf https://github.com/bluesky-social/atproto/issues/3712
@@ -131,5 +131,5 @@ class BlueskyPost(TypedDict):
     # Extra fields linked to the data collection and processing
     collection_time: Optional[str]      # datetime (potentially timezoned) of when the data was normalized
-    collected_via: Optional[List[str]]  # extra field added by the normalization process to express how the data collection was ran, will be "quote" or "thread" when a post was grabbed as a referenced post within a really collected post using the "extract_referenced_posts" option of "normalize_post"
-    match_query: Optional[bool]         # extra field added by the normalization process to express whether the post was an intentionnally collected one or only came as a referenced post within a really collected post using the "extract_referenced_posts" option of "normalize_post"
+    collected_via: Optional[List[str]]  # extra field added by the normalization process to express how the data collection was ran, will be "quote" or "thread" when a post was grabbed as a referenced post within the originally collected post using the "extract_referenced_posts" option of "normalize_post"
+    match_query: Optional[bool]         # extra field added by the normalization process to express whether the post was an intentionnally collected one or only came as a referenced post within the originally collected post using the "extract_referenced_posts" option of "normalize_post"

{twitwi-0.21.1 → twitwi-0.21.2/twitwi.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: twitwi
-Version: 0.21.1
+Version: 0.21.2
 Summary: A collection of Twitter-related helper functions for python.
 Home-page: http://github.com/medialab/twitwi
 Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
@@ -95,7 +95,7 @@ for post_data in posts_payload_from_API:
 # Then, saving normalized profiles into a CSV using DictWriter:
-from csv import DictWriter
+import csv
 from twitwi.bluesky.constants import POST_FIELDS
 from twitwi.bluesky import transform_post_into_csv_dict
@@ -108,7 +108,6 @@ with open("normalized_bluesky_posts.csv", "w") as f:
 # Or using the basic CSV writer:
-from csv import writer
 from twitwi.bluesky import format_post_as_csv_row
 with open("normalized_bluesky_posts.csv", "w") as f: