twitwi 0.23.0__py3-none-any.whl → 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- test/bluesky/formatters_test.py +4 -2
- twitwi/bluesky/normalizers.py +379 -76
- twitwi/bluesky/utils.py +30 -14
- twitwi/exceptions.py +1 -1
- twitwi/formatters.py +16 -3
- twitwi/utils.py +15 -9
- {twitwi-0.23.0.dist-info → twitwi-0.24.0.dist-info}/METADATA +3 -3
- {twitwi-0.23.0.dist-info → twitwi-0.24.0.dist-info}/RECORD +12 -12
- {twitwi-0.23.0.dist-info → twitwi-0.24.0.dist-info}/WHEEL +1 -1
- {twitwi-0.23.0.dist-info → twitwi-0.24.0.dist-info}/licenses/LICENSE.txt +0 -0
- {twitwi-0.23.0.dist-info → twitwi-0.24.0.dist-info}/top_level.txt +0 -0
- {twitwi-0.23.0.dist-info → twitwi-0.24.0.dist-info}/zip-safe +0 -0
test/bluesky/formatters_test.py
CHANGED
|
@@ -112,7 +112,9 @@ class TestFormatters:
|
|
|
112
112
|
|
|
113
113
|
for source in normalized_posts:
|
|
114
114
|
for post in source:
|
|
115
|
-
writer.writerow(
|
|
115
|
+
writer.writerow(
|
|
116
|
+
format_post_as_csv_row(post, allow_erroneous_plurals=True)
|
|
117
|
+
)
|
|
116
118
|
|
|
117
119
|
if OVERWRITE_TESTS:
|
|
118
120
|
written = buffer.getvalue()
|
|
@@ -140,7 +142,7 @@ class TestFormatters:
|
|
|
140
142
|
|
|
141
143
|
for source in normalized_posts:
|
|
142
144
|
for post in source:
|
|
143
|
-
transform_post_into_csv_dict(post)
|
|
145
|
+
transform_post_into_csv_dict(post, allow_erroneous_plurals=True)
|
|
144
146
|
writer.writerow(post)
|
|
145
147
|
|
|
146
148
|
with open_resource("bluesky-posts-export.csv") as f:
|
twitwi/bluesky/normalizers.py
CHANGED
|
@@ -99,14 +99,33 @@ def prepare_native_gif_as_media(gif_data, user_did, source):
|
|
|
99
99
|
}
|
|
100
100
|
|
|
101
101
|
|
|
102
|
-
def prepare_image_as_media(image_data):
|
|
103
|
-
if
|
|
104
|
-
|
|
102
|
+
def prepare_image_as_media(image_data, source):
|
|
103
|
+
if isinstance(image_data["image"], str):
|
|
104
|
+
# As in this post: https://bsky.app/profile/did:plc:xafmeedgq77f6smn6kmalasr/post/3lcnxglm3o62z
|
|
105
|
+
image_type = "image/jpeg"
|
|
106
|
+
image_id = image_data["image"]
|
|
107
|
+
elif isinstance(image_data["image"], dict):
|
|
108
|
+
image_type = image_data["image"]["mimeType"]
|
|
109
|
+
if (
|
|
110
|
+
"ref" not in image_data["image"]
|
|
111
|
+
or "$link" not in image_data["image"]["ref"]
|
|
112
|
+
):
|
|
113
|
+
# As in this post: https://bsky.app/profile/testjuan06.bsky.social/post/3ljkzygywso2b
|
|
114
|
+
if "link" in image_data["image"]:
|
|
115
|
+
image_id = image_data["image"]["link"]
|
|
116
|
+
elif "cid" in image_data["image"]:
|
|
117
|
+
image_id = image_data["image"]["cid"]
|
|
118
|
+
else:
|
|
119
|
+
raise BlueskyPayloadError(
|
|
120
|
+
source, "Unable to find image id in image data: %s" % image_data
|
|
121
|
+
)
|
|
122
|
+
else:
|
|
123
|
+
image_id = image_data["image"]["ref"]["$link"]
|
|
105
124
|
else:
|
|
106
|
-
|
|
125
|
+
raise BlueskyPayloadError(source, "Unable to parse image data: %s" % image_data)
|
|
107
126
|
return {
|
|
108
127
|
"id": image_id,
|
|
109
|
-
"type":
|
|
128
|
+
"type": image_type,
|
|
110
129
|
"alt": image_data["alt"],
|
|
111
130
|
}
|
|
112
131
|
|
|
@@ -140,7 +159,15 @@ def process_card_data(embed_data, post):
|
|
|
140
159
|
post["card_link"] = embed_data["uri"]
|
|
141
160
|
post["card_title"] = embed_data.get("title", "")
|
|
142
161
|
post["card_description"] = embed_data.get("description", "")
|
|
143
|
-
|
|
162
|
+
if isinstance(embed_data.get("thumb"), dict) and embed_data["thumb"].get(
|
|
163
|
+
"ref", {}
|
|
164
|
+
).get("$link"):
|
|
165
|
+
media_cid = embed_data["thumb"]["ref"]["$link"]
|
|
166
|
+
post["card_thumbnail"] = (
|
|
167
|
+
f"https://cdn.bsky.app/img/feed_thumbnail/plain/{post['user_did']}/{media_cid}@jpeg"
|
|
168
|
+
)
|
|
169
|
+
else:
|
|
170
|
+
post["card_thumbnail"] = embed_data.get("thumb", "")
|
|
144
171
|
return post
|
|
145
172
|
|
|
146
173
|
|
|
@@ -308,6 +335,10 @@ def normalize_post(
|
|
|
308
335
|
post["timestamp_utc"], post["local_time"] = get_dates(
|
|
309
336
|
data["record"]["createdAt"], locale=locale, source="bluesky"
|
|
310
337
|
)
|
|
338
|
+
# Completing year with less than 4 digits as in some posts: https://bsky.app/profile/koro.icu/post/3kbpuogc6fz2o
|
|
339
|
+
# len 26 example: '2023-06-15T12:34:56.789000'
|
|
340
|
+
while len(post["local_time"]) < 26 and len(post["local_time"].split("-")[0]) < 4:
|
|
341
|
+
post["local_time"] = "0" + post["local_time"]
|
|
311
342
|
post["indexed_at_utc"] = data["indexedAt"]
|
|
312
343
|
|
|
313
344
|
# Handle post/user identifiers
|
|
@@ -316,7 +347,11 @@ def normalize_post(
|
|
|
316
347
|
post["user_did"], post["did"] = parse_post_uri(data["uri"])
|
|
317
348
|
post["user_handle"] = data["author"]["handle"]
|
|
318
349
|
post["user_url"] = format_profile_url(post["user_handle"])
|
|
319
|
-
|
|
350
|
+
# example: https://bsky.app/profile/did:plc:n5pm4vggu475okayqvqipkoh/post/3lmdcgp3a7cnd
|
|
351
|
+
if post["user_handle"] == "handle.invalid":
|
|
352
|
+
post["url"] = format_post_url(post["user_did"], post["did"])
|
|
353
|
+
else:
|
|
354
|
+
post["url"] = format_post_url(post["user_handle"], post["did"])
|
|
320
355
|
|
|
321
356
|
if post["user_did"] != data["author"]["did"]:
|
|
322
357
|
raise BlueskyPayloadError(
|
|
@@ -350,19 +385,91 @@ def normalize_post(
|
|
|
350
385
|
hashtags = set()
|
|
351
386
|
links = set()
|
|
352
387
|
links_to_replace = []
|
|
388
|
+
media_data = []
|
|
389
|
+
extra_links = []
|
|
390
|
+
post["media_urls"] = []
|
|
353
391
|
for facet in data["record"].get("facets", []):
|
|
354
392
|
if len(facet["features"]) != 1:
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
393
|
+
raising_error = False
|
|
394
|
+
for feat in facet["features"]:
|
|
395
|
+
# Already handled linkcards separately below
|
|
396
|
+
if feat["$type"].endswith("#linkcard"):
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
# If there are links, we register them and do not replace anything in original text
|
|
400
|
+
# as we don't have position for each link
|
|
401
|
+
# example: https://bsky.app/profile/77cupons.bsky.social/post/3latbufuvqw25
|
|
402
|
+
elif feat["$type"].endswith("#link") and "uri" in feat:
|
|
403
|
+
link = safe_normalize_url(feat["uri"])
|
|
404
|
+
if is_url(link):
|
|
405
|
+
links.add(link)
|
|
406
|
+
links_to_replace.append(
|
|
407
|
+
{"uri": feat["uri"].encode("utf-8"), "start": -1, "end": -1}
|
|
408
|
+
)
|
|
409
|
+
elif feat["$type"].lower().endswith("#tag"):
|
|
410
|
+
hashtags.add(feat["tag"].strip().lower())
|
|
411
|
+
# As in this post: https://bsky.app/profile/havehashad.com/post/3ki3rk5ytqd2e
|
|
412
|
+
elif feat["$type"].endswith("#image") and "uri" in feat:
|
|
413
|
+
post["media_urls"].append(safe_normalize_url(feat["uri"]))
|
|
414
|
+
else:
|
|
415
|
+
raising_error = True
|
|
416
|
+
|
|
417
|
+
if raising_error:
|
|
418
|
+
raise BlueskyPayloadError(
|
|
419
|
+
post["url"],
|
|
420
|
+
"unusual record facet content with more or less than a unique feature: %s"
|
|
421
|
+
% facet,
|
|
422
|
+
)
|
|
423
|
+
continue
|
|
360
424
|
|
|
361
425
|
feat = facet["features"][0]
|
|
426
|
+
lower_feat_type = feat["$type"].lower()
|
|
362
427
|
|
|
363
428
|
# Hashtags
|
|
364
|
-
if
|
|
365
|
-
|
|
429
|
+
if (
|
|
430
|
+
lower_feat_type.endswith("#tag")
|
|
431
|
+
or lower_feat_type.endswith(".tag")
|
|
432
|
+
or lower_feat_type.endswith("#hashtag")
|
|
433
|
+
or lower_feat_type == "facettag"
|
|
434
|
+
):
|
|
435
|
+
# Some posts have the full text in the "text" field of the hashtag feature
|
|
436
|
+
if "text" in feat:
|
|
437
|
+
for tag in feat["text"].split("#"):
|
|
438
|
+
if tag.strip():
|
|
439
|
+
hashtags.add(tag.strip().lower())
|
|
440
|
+
# some posts have "hashtag" instead of "tag" field
|
|
441
|
+
# example: https://bsky.app/profile/did:plc:jrodn6nnfuwzm2zxbxbpzgot/post/3lhwag3mzoo2k
|
|
442
|
+
else:
|
|
443
|
+
if "tag" in feat:
|
|
444
|
+
tag = feat["tag"].strip().lower()
|
|
445
|
+
elif "hashtag" in feat:
|
|
446
|
+
tag = feat["hashtag"].strip().lower()
|
|
447
|
+
# Somehow no tag found, we'll try to get it in the text slice
|
|
448
|
+
# example: https://bsky.app/profile/did:plc:p6yojdpa5iatdk3ttaty2zu2/post/3knvsl6h4x22i
|
|
449
|
+
elif len(feat) == 1:
|
|
450
|
+
byteStart = facet["index"]["byteStart"]
|
|
451
|
+
if text[byteStart : byteStart + 1] == b"#":
|
|
452
|
+
byteEnd = facet["index"]["byteEnd"]
|
|
453
|
+
try:
|
|
454
|
+
tag = (
|
|
455
|
+
text[byteStart:byteEnd]
|
|
456
|
+
.decode("utf-8")
|
|
457
|
+
.strip()
|
|
458
|
+
.lstrip("#")
|
|
459
|
+
.lower()
|
|
460
|
+
)
|
|
461
|
+
except UnicodeDecodeError:
|
|
462
|
+
raise BlueskyPayloadError(
|
|
463
|
+
post["url"],
|
|
464
|
+
"unable to decode utf-8 slice for hashtag extraction: %s"
|
|
465
|
+
% facet,
|
|
466
|
+
)
|
|
467
|
+
else:
|
|
468
|
+
raise BlueskyPayloadError(
|
|
469
|
+
post["url"],
|
|
470
|
+
"unable to extract hashtag from text slice: %s" % facet,
|
|
471
|
+
)
|
|
472
|
+
hashtags.add(tag)
|
|
366
473
|
|
|
367
474
|
# Mentions
|
|
368
475
|
elif feat["$type"].endswith("#mention"):
|
|
@@ -392,12 +499,23 @@ def normalize_post(
|
|
|
392
499
|
]
|
|
393
500
|
.strip()
|
|
394
501
|
.lower()
|
|
395
|
-
.decode("utf-8")
|
|
396
502
|
)
|
|
503
|
+
while byteEnd >= byteStart:
|
|
504
|
+
try:
|
|
505
|
+
handle.decode("utf-8")
|
|
506
|
+
break
|
|
507
|
+
except UnicodeDecodeError:
|
|
508
|
+
handle = handle[:-1]
|
|
509
|
+
continue
|
|
510
|
+
handle = handle.decode("utf-8")
|
|
397
511
|
post["mentioned_user_handles"].append(handle)
|
|
398
512
|
|
|
399
513
|
# Links
|
|
400
|
-
elif
|
|
514
|
+
elif (
|
|
515
|
+
feat["$type"].endswith("#link")
|
|
516
|
+
or feat["$type"].endswith(".link")
|
|
517
|
+
or feat["$type"].endswith(".url")
|
|
518
|
+
):
|
|
401
519
|
# Handle native polls
|
|
402
520
|
if "https://poll.blue/" in feat["uri"]:
|
|
403
521
|
if feat["uri"].endswith("/0"):
|
|
@@ -420,57 +538,100 @@ def normalize_post(
|
|
|
420
538
|
byteStart = facet["index"]["byteStart"]
|
|
421
539
|
byteEnd = facet["index"]["byteEnd"]
|
|
422
540
|
|
|
423
|
-
|
|
424
|
-
|
|
541
|
+
# Skip overlapping links cases
|
|
542
|
+
# examples: https://bsky.app/profile/researchtrend.ai/post/3lbieylwwxs2b
|
|
543
|
+
# https://bsky.app/profile/dj-cyberspace.otoskey.tarbin.net.ap.brid.gy/post/3lchg3plpdjp2
|
|
544
|
+
for elt in links_to_replace:
|
|
545
|
+
if (byteStart >= elt["start"] and byteStart <= elt["end"]) or (
|
|
546
|
+
byteEnd >= elt["start"] and byteEnd <= elt["end"]
|
|
547
|
+
):
|
|
548
|
+
# Overlapping links, we skip this one
|
|
549
|
+
byteStart = -1
|
|
550
|
+
byteEnd = -1
|
|
551
|
+
break
|
|
552
|
+
|
|
553
|
+
# Meaning we will try to fix the link position
|
|
554
|
+
if byteStart != -1 or byteEnd != -1:
|
|
555
|
+
# It appears that some links end before they start... Bluesky please: what's going on?
|
|
556
|
+
# example: https://bsky.app/profile/ondarockwebzine.bsky.social/post/3lqxxejza6o2t
|
|
557
|
+
# if int(byteEnd) < int(byteStart) or byteStart < 0:
|
|
558
|
+
if int(byteEnd) < int(byteStart):
|
|
559
|
+
byteStart = -1
|
|
560
|
+
byteEnd = -1
|
|
561
|
+
|
|
562
|
+
# There are mentionned links which are positionned after the end of the text,
|
|
563
|
+
# so we put them at the end of the original text
|
|
564
|
+
elif byteStart >= len(post["original_text"].encode("utf-8")):
|
|
565
|
+
byteStart = -1
|
|
566
|
+
byteEnd = -1
|
|
567
|
+
|
|
568
|
+
elif not text[byteStart:byteEnd].startswith(b"http"):
|
|
569
|
+
new_byteStart = text.find(b"http", byteStart, byteEnd)
|
|
570
|
+
|
|
571
|
+
# means that the link is shifted, like on this post:
|
|
572
|
+
# https://bsky.app/profile/ecrime.ch/post/3lqotmopayr23
|
|
573
|
+
if new_byteStart != -1:
|
|
574
|
+
byteStart = new_byteStart
|
|
575
|
+
|
|
576
|
+
# Find the index of the first space character after byteStart in case the link is a personalized one
|
|
577
|
+
# but still with the link in it (somehow existing in some posts, such as this one:
|
|
578
|
+
# https://bsky.app/profile/did:plc:rkphrshyfiqe4n2hz5vj56ig/post/3ltmljz5blca2)
|
|
579
|
+
# In this case, we don't want to touch the position of the link given in the payload
|
|
580
|
+
byteEnd = min(
|
|
581
|
+
byteStart
|
|
582
|
+
- facet["index"]["byteStart"]
|
|
583
|
+
+ facet["index"]["byteEnd"],
|
|
584
|
+
len(post["original_text"].encode("utf-8")),
|
|
585
|
+
)
|
|
586
|
+
for i in range(byteStart, byteEnd):
|
|
587
|
+
if chr(text[i]).isspace():
|
|
588
|
+
byteStart = facet["index"]["byteStart"]
|
|
589
|
+
byteEnd = (
|
|
590
|
+
byteStart
|
|
591
|
+
- facet["index"]["byteStart"]
|
|
592
|
+
+ facet["index"]["byteEnd"]
|
|
593
|
+
)
|
|
425
594
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
595
|
+
# means that the link is a "personalized" one like on this post:
|
|
596
|
+
# https://bsky.app/profile/newyork.activitypub.awakari.com.ap.brid.gy/post/3ln33tx7bpdu2
|
|
597
|
+
else:
|
|
598
|
+
# we're looking for a link which could be valid if we add "https://" at the beginning,
|
|
599
|
+
# as in some cases the "http(s)://" part is missing in the post text
|
|
600
|
+
for starting in range(byteEnd - byteStart):
|
|
601
|
+
try:
|
|
602
|
+
if is_url(
|
|
603
|
+
"https://"
|
|
604
|
+
+ text[
|
|
605
|
+
byteStart + starting : byteEnd + starting
|
|
606
|
+
].decode("utf-8")
|
|
607
|
+
):
|
|
608
|
+
byteStart += starting
|
|
609
|
+
break
|
|
610
|
+
except UnicodeDecodeError:
|
|
611
|
+
pass
|
|
612
|
+
# If we did not find any valid link, we just keep the original position as it is
|
|
613
|
+
# meaning that we have a personalized link like in the example above
|
|
614
|
+
|
|
615
|
+
# Extend byteEnd to the right until we find a valid utf-8 ending,
|
|
616
|
+
# as in some cases the link is longer than the position given in the payload
|
|
617
|
+
# and it gets cut in the middle of a utf-8 char, leading to UnicodeDecodeError
|
|
618
|
+
# example: https://bsky.app/profile/radiogaspesie.bsky.social/post/3lmkzhvhtta22
|
|
619
|
+
while byteEnd <= len(post["original_text"].encode("utf-8")):
|
|
620
|
+
try:
|
|
621
|
+
text[byteStart:byteEnd].decode("utf-8")
|
|
622
|
+
break
|
|
623
|
+
except UnicodeDecodeError:
|
|
624
|
+
byteEnd += 1
|
|
625
|
+
continue
|
|
430
626
|
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
# In this case, we don't want to touch the position of the link given in the payload
|
|
435
|
-
byteEnd = min(
|
|
436
|
-
byteStart
|
|
437
|
-
- facet["index"]["byteStart"]
|
|
438
|
-
+ facet["index"]["byteEnd"],
|
|
439
|
-
len(post["original_text"].encode("utf-8")),
|
|
440
|
-
)
|
|
441
|
-
for i in range(byteStart, byteEnd):
|
|
442
|
-
if chr(text[i]).isspace():
|
|
443
|
-
byteStart = facet["index"]["byteStart"]
|
|
444
|
-
byteEnd = (
|
|
445
|
-
byteStart
|
|
446
|
-
- facet["index"]["byteStart"]
|
|
447
|
-
+ facet["index"]["byteEnd"]
|
|
448
|
-
)
|
|
627
|
+
# Meaning that we did not find a valid utf-8 ending, so we reset byteEnd to its original value
|
|
628
|
+
if byteEnd > len(post["original_text"].encode("utf-8")):
|
|
629
|
+
byteEnd = facet["index"]["byteEnd"]
|
|
449
630
|
|
|
450
|
-
|
|
451
|
-
# https://bsky.app/profile/newyork.activitypub.awakari.com.ap.brid.gy/post/3ln33tx7bpdu2
|
|
631
|
+
byteEnd += byteStart - facet["index"]["byteStart"]
|
|
452
632
|
else:
|
|
453
|
-
#
|
|
454
|
-
#
|
|
455
|
-
for starting in range(byteEnd - byteStart):
|
|
456
|
-
try:
|
|
457
|
-
if is_url(
|
|
458
|
-
"https://"
|
|
459
|
-
+ text[
|
|
460
|
-
byteStart + starting : byteEnd + starting
|
|
461
|
-
].decode("utf-8")
|
|
462
|
-
):
|
|
463
|
-
byteStart += starting
|
|
464
|
-
break
|
|
465
|
-
except UnicodeDecodeError:
|
|
466
|
-
pass
|
|
467
|
-
# If we did not find any valid link, we just keep the original position as it is
|
|
468
|
-
# meaning that we have a personalized link like in the example above
|
|
469
|
-
|
|
470
|
-
# Extend byteEnd to the right until we find a valid utf-8 ending,
|
|
471
|
-
# as in some cases the link is longer than the position given in the payload
|
|
472
|
-
# and it gets cut in the middle of a utf-8 char, leading to UnicodeDecodeError
|
|
473
|
-
# example: https://bsky.app/profile/radiogaspesie.bsky.social/post/3lmkzhvhtta22
|
|
633
|
+
# Handling case of errored byteEnd in the end of the text
|
|
634
|
+
# example: https://bsky.app/profile/twif.bsky.social/post/3lm4izkvbfm2r
|
|
474
635
|
while byteEnd <= len(post["original_text"].encode("utf-8")):
|
|
475
636
|
try:
|
|
476
637
|
text[byteStart:byteEnd].decode("utf-8")
|
|
@@ -482,8 +643,6 @@ def normalize_post(
|
|
|
482
643
|
if byteEnd > len(post["original_text"].encode("utf-8")):
|
|
483
644
|
byteEnd = facet["index"]["byteEnd"]
|
|
484
645
|
|
|
485
|
-
byteEnd += byteStart - facet["index"]["byteStart"]
|
|
486
|
-
|
|
487
646
|
# In some cases, the link is completely wrong in the post text,
|
|
488
647
|
# like in this post: https://bsky.app/profile/sudetsoleil.bsky.social/post/3ljf3h74wee2m
|
|
489
648
|
# So we chose to not replace anything in the text in this case
|
|
@@ -500,10 +659,66 @@ def normalize_post(
|
|
|
500
659
|
pass
|
|
501
660
|
# raise UnicodeDecodeError(e.encoding, e.object, e.start, e.end, f"{e.reason} in post {post['url']}.\nText to decode: {text}\nSlice of text to decode: {text[e.start:e.end]}")
|
|
502
661
|
|
|
503
|
-
elif
|
|
662
|
+
elif any(
|
|
663
|
+
feat["$type"].endswith(suffix)
|
|
664
|
+
for suffix in [
|
|
665
|
+
"#bold",
|
|
666
|
+
"#italic",
|
|
667
|
+
"#underline",
|
|
668
|
+
"#option",
|
|
669
|
+
"#encrypt",
|
|
670
|
+
"#text",
|
|
671
|
+
]
|
|
672
|
+
):
|
|
504
673
|
pass
|
|
505
|
-
|
|
674
|
+
# Bluesky seems to use format features for some internal purposes, but we ignore them
|
|
675
|
+
# e.g.: https://bsky.app/profile/ferromar.bsky.social/post/3lzyfaixayd2g
|
|
676
|
+
elif feat["$type"].endswith("format"):
|
|
506
677
|
pass
|
|
678
|
+
# Not normal feature type, but still existing in some posts
|
|
679
|
+
# Note that external features aren't visible on the Bluesky app, only external embeds are
|
|
680
|
+
# e.g.: https://bsky.app/profile/did:plc:4qvb4dpkg6tkbzym77j6jcm4/post/3lbjktt6tw52h
|
|
681
|
+
elif feat["$type"].endswith("external"):
|
|
682
|
+
link = feat["external"]["uri"]
|
|
683
|
+
|
|
684
|
+
# Handle native gifs as medias
|
|
685
|
+
if link.startswith("https://media.tenor.com/"):
|
|
686
|
+
media_data.append(
|
|
687
|
+
prepare_native_gif_as_media(
|
|
688
|
+
feat["external"], post["user_did"], post["url"]
|
|
689
|
+
)
|
|
690
|
+
)
|
|
691
|
+
# Extra card links sometimes missing from facets & text due to manual action in post form
|
|
692
|
+
else:
|
|
693
|
+
extra_links.append(link)
|
|
694
|
+
|
|
695
|
+
if isinstance(feat["external"].get("thumb"), dict):
|
|
696
|
+
post = process_card_data(feat["external"], post)
|
|
697
|
+
|
|
698
|
+
# Some people share code snippets using third party apps
|
|
699
|
+
# e.g.: https://bsky.app/profile/alexdln.com/post/3mbwzgrymow2o
|
|
700
|
+
elif (
|
|
701
|
+
"#" in feat["$type"]
|
|
702
|
+
and feat["$type"].split("#")[1].startswith("code")
|
|
703
|
+
and "code" in feat
|
|
704
|
+
):
|
|
705
|
+
language = (
|
|
706
|
+
feat["$type"].split("#")[1].split(".")[1]
|
|
707
|
+
if "." in feat["$type"].split("#")[1]
|
|
708
|
+
else "plain"
|
|
709
|
+
)
|
|
710
|
+
text += (
|
|
711
|
+
b"\n```"
|
|
712
|
+
+ language.encode("utf-8")
|
|
713
|
+
+ b"\n"
|
|
714
|
+
+ feat["code"].encode("utf-8")
|
|
715
|
+
+ b"\n```\n"
|
|
716
|
+
)
|
|
717
|
+
|
|
718
|
+
# We chose to ignore non Bluesky features for now (e.g. personalized features)
|
|
719
|
+
# example: https://bsky.app/profile/poll.blue/post/3kmuqjkkozh2r
|
|
720
|
+
elif "bsky" not in feat["$type"]:
|
|
721
|
+
continue
|
|
507
722
|
else:
|
|
508
723
|
raise BlueskyPayloadError(
|
|
509
724
|
post["url"], "unusual record facet feature $type: %s" % feat
|
|
@@ -543,21 +758,61 @@ def normalize_post(
|
|
|
543
758
|
|
|
544
759
|
# Handle quotes & medias
|
|
545
760
|
media_ids = set()
|
|
546
|
-
post["media_urls"] = []
|
|
547
761
|
post["media_thumbnails"] = []
|
|
548
762
|
post["media_types"] = []
|
|
549
763
|
post["media_alt_texts"] = []
|
|
550
764
|
if "embed" in data["record"]:
|
|
551
765
|
embed = data["record"]["embed"]
|
|
552
766
|
quoted_data = None
|
|
553
|
-
media_data = []
|
|
554
|
-
extra_links = []
|
|
555
767
|
|
|
556
768
|
if not valid_embed_type(embed["$type"]):
|
|
769
|
+
if "bsky" in embed["$type"]:
|
|
770
|
+
raise BlueskyPayloadError(
|
|
771
|
+
post["url"], "unusual record embed $type: %s" % embed
|
|
772
|
+
)
|
|
773
|
+
# Ignore non Bluesky embeds for now (e.g. personalized embeds)
|
|
774
|
+
|
|
775
|
+
# Empty embed (not usual, but seen in the Bluesky jungle, e.g.
|
|
776
|
+
# https://bsky.app/profile/did:plc:na6u3avvaz2x5wyzqrnviqiz/post/3lzf5qi2ra62k
|
|
777
|
+
# https://bsky.app/profile/dangelodario.it/post/3l3inqifqj42p
|
|
778
|
+
# or https://bsky.app/profile/soirilab.bsky.social/post/3lywaa7vhsu2c)
|
|
779
|
+
if embed["$type"].endswith(".post") or embed["$type"] == "N/A":
|
|
780
|
+
# Some posts have extra keys in their empty embed, certainly personalized ones.
|
|
781
|
+
|
|
782
|
+
# Personalized quote (not visible on Bluesky for the example)
|
|
783
|
+
# example: https://bsky.app/profile/jacksmithsocial.bsky.social/post/3lbca2nxy4f2a
|
|
784
|
+
if embed.get("$type") == "app.bsky.feed.post" and embed.get(
|
|
785
|
+
"record", {}
|
|
786
|
+
).get("uri"):
|
|
787
|
+
post, quoted_data, links = prepare_quote_data(
|
|
788
|
+
embed["record"], data.get("embed", {}).get("record"), post, links
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
# for the other ones we know up to now, we want to ignore them
|
|
792
|
+
# e.g.: https://bsky.app/profile/granmouse.bsky.social/post/3lwvh5xd2xk2p
|
|
793
|
+
# https://bsky.app/profile/flyingaubrey.bsky.social/post/3lxngessntk2p
|
|
794
|
+
elif len(embed.keys()) > 1 and embed.get("type") not in ["private", "list"]:
|
|
795
|
+
raise BlueskyPayloadError(
|
|
796
|
+
post["url"],
|
|
797
|
+
"unusual empty record embed with extra keys: %s" % embed,
|
|
798
|
+
)
|
|
799
|
+
# Nothing to do for empty embed
|
|
800
|
+
|
|
801
|
+
if (
|
|
802
|
+
embed["$type"].endswith(".embed")
|
|
803
|
+
and len(embed.keys()) > 2
|
|
804
|
+
and len(embed.get("images")) > 0
|
|
805
|
+
):
|
|
557
806
|
raise BlueskyPayloadError(
|
|
558
|
-
post["url"], "unusual record embed
|
|
807
|
+
post["url"], "unusual empty record embed with extra keys: %s" % embed
|
|
559
808
|
)
|
|
560
809
|
|
|
810
|
+
# Links from links embed
|
|
811
|
+
# e.g.: https://bsky.app/profile/sacredatoz.bsky.social/post/3lrqvemv7qe2f
|
|
812
|
+
if embed["$type"].endswith(".links"):
|
|
813
|
+
for link in embed["links"]:
|
|
814
|
+
extra_links.append(link)
|
|
815
|
+
|
|
561
816
|
# Links from cards
|
|
562
817
|
if embed["$type"].endswith(".external"):
|
|
563
818
|
link = embed["external"]["uri"]
|
|
@@ -577,13 +832,48 @@ def normalize_post(
|
|
|
577
832
|
if "embed" in data:
|
|
578
833
|
post = process_card_data(data["embed"]["external"], post)
|
|
579
834
|
|
|
835
|
+
# Not visible images
|
|
836
|
+
# examples: https://bsky.app/profile/lubosmichalik.bsky.social/post/3ltjvxsaej62c
|
|
837
|
+
# https://bsky.app/profile/lubosmichalik.bsky.social/post/3ltjvz52x7s2m
|
|
838
|
+
if embed["$type"].endswith(".viewImages"):
|
|
839
|
+
if "images" in embed:
|
|
840
|
+
for i in embed["images"]:
|
|
841
|
+
post["media_urls"].append(
|
|
842
|
+
i.get("viewImage", {}).get("thumb", {}).get("uri", "")
|
|
843
|
+
)
|
|
844
|
+
elif "viewImage" in embed:
|
|
845
|
+
for i in embed["viewImage"]:
|
|
846
|
+
if "viewImage" in i:
|
|
847
|
+
sub_image = "viewImage"
|
|
848
|
+
elif "image" in i:
|
|
849
|
+
sub_image = "image"
|
|
850
|
+
else:
|
|
851
|
+
raise BlueskyPayloadError(
|
|
852
|
+
post["url"],
|
|
853
|
+
"unusual viewImages embed content: %s" % embed,
|
|
854
|
+
)
|
|
855
|
+
post["media_urls"].append(
|
|
856
|
+
i[sub_image].get("thumb", {}).get("uri", "")
|
|
857
|
+
)
|
|
858
|
+
|
|
580
859
|
# Images
|
|
581
|
-
if embed["$type"].endswith(".images"):
|
|
582
|
-
media_data.extend(
|
|
860
|
+
if embed["$type"].endswith(".images") or embed["$type"].endswith("image"):
|
|
861
|
+
media_data.extend(
|
|
862
|
+
[prepare_image_as_media(i, post["url"]) for i in embed["images"]]
|
|
863
|
+
)
|
|
583
864
|
|
|
584
865
|
# Video
|
|
585
866
|
if embed["$type"].endswith(".video"):
|
|
586
867
|
media_data.append(prepare_video_as_media(embed["video"]))
|
|
868
|
+
elif embed["$type"].endswith(".videos"):
|
|
869
|
+
for elt in embed["videos"]:
|
|
870
|
+
media_data.append(prepare_video_as_media(elt["video"]))
|
|
871
|
+
elif embed["$type"].endswith(".media"):
|
|
872
|
+
if isinstance(embed["media"], dict):
|
|
873
|
+
media_data.append(prepare_video_as_media(embed["media"]["video"]))
|
|
874
|
+
elif isinstance(embed["media"], list):
|
|
875
|
+
for elt in embed["media"]:
|
|
876
|
+
media_data.append(prepare_video_as_media(elt["media"]))
|
|
587
877
|
|
|
588
878
|
# Quote & Starter-packs
|
|
589
879
|
if embed["$type"].endswith(".record"):
|
|
@@ -631,13 +921,21 @@ def normalize_post(
|
|
|
631
921
|
# Images
|
|
632
922
|
elif embed["media"]["$type"].endswith(".images"):
|
|
633
923
|
media_data.extend(
|
|
634
|
-
[
|
|
924
|
+
[
|
|
925
|
+
prepare_image_as_media(i, post["url"])
|
|
926
|
+
for i in embed["media"]["images"]
|
|
927
|
+
]
|
|
635
928
|
)
|
|
636
929
|
|
|
637
930
|
# Video
|
|
638
931
|
elif embed["media"]["$type"].endswith(".video"):
|
|
639
932
|
media_data.append(prepare_video_as_media(embed["media"]["video"]))
|
|
640
933
|
|
|
934
|
+
# A personalized record with media embed type, but video unavailable
|
|
935
|
+
# e.g.: https://bsky.app/profile/meteolatorregassa.bsky.social/post/3lhoxazzptj2b
|
|
936
|
+
elif embed["media"]["$type"].endswith("#media"):
|
|
937
|
+
pass
|
|
938
|
+
|
|
641
939
|
else:
|
|
642
940
|
raise BlueskyPayloadError(
|
|
643
941
|
post["url"],
|
|
@@ -751,8 +1049,13 @@ def normalize_post(
|
|
|
751
1049
|
"allow_from_" + rule["$type"].split("#")[1].split("Rule")[0]
|
|
752
1050
|
)
|
|
753
1051
|
if rule_string.endswith("_list") and "list" in rule:
|
|
754
|
-
|
|
755
|
-
post["replies_rules"].append(rule_string + ":" +
|
|
1052
|
+
if isinstance(rule["list"], str):
|
|
1053
|
+
post["replies_rules"].append(rule_string + ":" + rule["list"])
|
|
1054
|
+
else:
|
|
1055
|
+
for allowed_list in rule["list"]:
|
|
1056
|
+
post["replies_rules"].append(
|
|
1057
|
+
rule_string + ":" + allowed_list
|
|
1058
|
+
)
|
|
756
1059
|
else:
|
|
757
1060
|
post["replies_rules"].append(rule_string)
|
|
758
1061
|
if not data["threadgate"]["record"]["allow"]:
|
twitwi/bluesky/utils.py
CHANGED
|
@@ -37,7 +37,9 @@ def validate_post_payload(data):
|
|
|
37
37
|
post["record"],
|
|
38
38
|
)
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
# Splitting by '#' to ignore possible suffixes in $type
|
|
41
|
+
# e.g. https://bsky.app/profile/did:plc:k6acu4chiwkixvdedcmdgmal/post/3lagdncjsu22y
|
|
42
|
+
if post["record"].get("$type").split("#")[0] != "app.bsky.feed.post":
|
|
41
43
|
return False, "payload's record $type is not a post: %s" % post["record"].get(
|
|
42
44
|
"$type"
|
|
43
45
|
)
|
|
@@ -56,7 +58,7 @@ def validate_post_payload(data):
|
|
|
56
58
|
|
|
57
59
|
|
|
58
60
|
re_embed_types = re.compile(
|
|
59
|
-
r"
|
|
61
|
+
r"(?:\.(?:record|recordWithMedia|images|videos?|external|post|embed|links|media|file|viewImages)(?:#.*)?|N\/A|image)$"
|
|
60
62
|
)
|
|
61
63
|
|
|
62
64
|
|
|
@@ -88,17 +90,25 @@ def parse_post_url(url, source):
|
|
|
88
90
|
def parse_post_uri(uri, source=None):
|
|
89
91
|
"""Returns a tuple of (author_did, post_did) from an at:// post URI"""
|
|
90
92
|
|
|
91
|
-
known_splits = [
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
93
|
+
# known_splits = [
|
|
94
|
+
# "/app.bsky.feed.post/",
|
|
95
|
+
# "/app.bsky.graph.starterpack/",
|
|
96
|
+
# "/app.bsky.feed.generator/",
|
|
97
|
+
# "/app.bsky.graph.list/",
|
|
98
|
+
# "/app.bsky.graph.follow/", # This one is often found when a post is an anwser to a deleted post (e.g. https://bsky.app/profile/sydney-chat.bsky.social/post/3ltsph6kxfl25)
|
|
99
|
+
# ]
|
|
100
|
+
|
|
101
|
+
# if uri.startswith("at://"):
|
|
102
|
+
# for split in known_splits:
|
|
103
|
+
# if split in uri:
|
|
104
|
+
# return uri[5:].split(split)
|
|
105
|
+
|
|
106
|
+
# There's too much variability in the post URIs, and we cannot be exhaustive,
|
|
107
|
+
# so we do with the simple approach:
|
|
98
108
|
if uri.startswith("at://"):
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
109
|
+
# Using maxsplit=3 to avoid issues if future uris contain more slashes
|
|
110
|
+
author_did, _, post_did = uri[5:].split("/", 3)
|
|
111
|
+
return author_did, post_did
|
|
102
112
|
|
|
103
113
|
raise BlueskyPayloadError(source or uri, f"{uri} is not a usual Bluesky post uri")
|
|
104
114
|
|
|
@@ -112,18 +122,24 @@ def format_media_url(user_did, media_cid, mime_type, source):
|
|
|
112
122
|
if mime_type.startswith("image"):
|
|
113
123
|
media_url = f"https://cdn.bsky.app/img/feed_fullsize/plain/{user_did}/{media_cid}@{media_type}"
|
|
114
124
|
media_thumb = f"https://cdn.bsky.app/img/feed_thumbnail/plain/{user_did}/{media_cid}@{media_type}"
|
|
115
|
-
elif
|
|
125
|
+
elif (
|
|
126
|
+
mime_type.startswith("video")
|
|
127
|
+
or mime_type == "application/xml"
|
|
128
|
+
or mime_type == "*/*"
|
|
129
|
+
):
|
|
116
130
|
media_url = f"https://video.bsky.app/watch/{user_did}/{media_cid}/playlist.m3u8"
|
|
117
131
|
media_thumb = (
|
|
118
132
|
f"https://video.bsky.app/watch/{user_did}/{media_cid}/thumbnail.jpg"
|
|
119
133
|
)
|
|
120
|
-
elif mime_type in ["
|
|
134
|
+
elif any(mt in mime_type for mt in ["octet-stream", "text/plain", "text/html"]):
|
|
121
135
|
media_url = (
|
|
122
136
|
f"https://cdn.bsky.app/img/feed_fullsize/plain/{user_did}/{media_cid}@jpeg"
|
|
123
137
|
)
|
|
124
138
|
media_thumb = (
|
|
125
139
|
f"https://cdn.bsky.app/img/feed_thumbnail/plain/{user_did}/{media_cid}@jpeg"
|
|
126
140
|
)
|
|
141
|
+
elif "empty" in mime_type:
|
|
142
|
+
media_url, media_thumb = "", ""
|
|
127
143
|
else:
|
|
128
144
|
raise BlueskyPayloadError(source, f"{mime_type} is an unusual media mimeType")
|
|
129
145
|
return media_url, media_thumb
|
twitwi/exceptions.py
CHANGED
|
@@ -21,4 +21,4 @@ class BlueskyPayloadError(TwitwiError):
|
|
|
21
21
|
def __init__(self, source, message):
|
|
22
22
|
self.source = source
|
|
23
23
|
self.message = message
|
|
24
|
-
super().__init__(f"Error while processing Bluesky post {source}
|
|
24
|
+
super().__init__(f"Error while processing Bluesky post {source}.\n{message}")
|
twitwi/formatters.py
CHANGED
|
@@ -52,7 +52,9 @@ def make_transform_into_csv_dict(plural_fields, boolean_fields):
|
|
|
52
52
|
|
|
53
53
|
|
|
54
54
|
def make_format_as_csv_row(fields, plural_fields, boolean_fields):
|
|
55
|
-
def format_field_for_csv(
|
|
55
|
+
def format_field_for_csv(
|
|
56
|
+
field, item, item_id=None, plural_separator="|", allow_erroneous_plurals=False
|
|
57
|
+
):
|
|
56
58
|
if field == "id" and item_id is not None:
|
|
57
59
|
return item_id
|
|
58
60
|
|
|
@@ -63,6 +65,11 @@ def make_format_as_csv_row(fields, plural_fields, boolean_fields):
|
|
|
63
65
|
if field == "links":
|
|
64
66
|
v = item.get("proper_links", v)
|
|
65
67
|
|
|
68
|
+
# Clean None values that may have slipped in, such as in the 'domains' field when
|
|
69
|
+
# normalizing this Bluesky post: https://bsky.app/profile/did:plc:cs5qjcmnntogoahrrsagmg2z/post/3lvqhn7raq62v
|
|
70
|
+
if allow_erroneous_plurals:
|
|
71
|
+
v = [element if element is not None else "" for element in v]
|
|
72
|
+
|
|
66
73
|
return plural_separator.join(v)
|
|
67
74
|
|
|
68
75
|
if field in boolean_fields:
|
|
@@ -70,10 +77,16 @@ def make_format_as_csv_row(fields, plural_fields, boolean_fields):
|
|
|
70
77
|
|
|
71
78
|
return item.get(field, "")
|
|
72
79
|
|
|
73
|
-
def format_item_as_csv_row(
|
|
80
|
+
def format_item_as_csv_row(
|
|
81
|
+
item, item_id=None, plural_separator="|", allow_erroneous_plurals=False
|
|
82
|
+
):
|
|
74
83
|
return [
|
|
75
84
|
format_field_for_csv(
|
|
76
|
-
field,
|
|
85
|
+
field,
|
|
86
|
+
item,
|
|
87
|
+
item_id=item_id,
|
|
88
|
+
plural_separator=plural_separator,
|
|
89
|
+
allow_erroneous_plurals=allow_erroneous_plurals,
|
|
77
90
|
)
|
|
78
91
|
for field in fields
|
|
79
92
|
]
|
twitwi/utils.py
CHANGED
|
@@ -61,7 +61,9 @@ def get_dates(
|
|
|
61
61
|
locale = UTC_TIMEZONE
|
|
62
62
|
|
|
63
63
|
# Let's pray we never see a negative year...
|
|
64
|
-
year_zero = date_str.startswith("0000")
|
|
64
|
+
year_zero = date_str.startswith("0000") or all(
|
|
65
|
+
c == "0" for c in date_str.split("-")[0]
|
|
66
|
+
)
|
|
65
67
|
|
|
66
68
|
try:
|
|
67
69
|
parsed_datetime = datetime.strptime(
|
|
@@ -84,26 +86,30 @@ def get_dates(
|
|
|
84
86
|
utc_datetime = UTC_TIMEZONE.localize(parsed_datetime)
|
|
85
87
|
locale_datetime = utc_datetime.astimezone(locale)
|
|
86
88
|
|
|
89
|
+
formatted_date_str = datetime.strftime(
|
|
90
|
+
locale_datetime,
|
|
91
|
+
FORMATTED_FULL_DATETIME_FORMAT
|
|
92
|
+
if source == "bluesky"
|
|
93
|
+
else FORMATTED_TWEET_DATETIME_FORMAT,
|
|
94
|
+
)
|
|
95
|
+
|
|
87
96
|
timestamp = int(utc_datetime.timestamp())
|
|
88
97
|
|
|
89
98
|
if year_zero:
|
|
90
99
|
# Subtract one year (year 0001 is not a leap year) in seconds
|
|
91
100
|
timestamp -= 31536000
|
|
101
|
+
# Doing like so using split because on ubuntu, datetime.strftime on year with less than 4 digits
|
|
102
|
+
# only returns 1 digit for year 0 (e.g. "0-05-12...") instead of 4 digits ("0000-05-12..."),
|
|
103
|
+
# whereas on macOS and Windows it returns 4 digits.
|
|
104
|
+
formatted_date_str = "0000-" + formatted_date_str.split("-", 1)[1]
|
|
92
105
|
|
|
93
106
|
if millisecond_timestamp:
|
|
94
107
|
timestamp *= 1000
|
|
95
108
|
timestamp += utc_datetime.microsecond / 1000
|
|
96
109
|
|
|
97
|
-
formatted_date_str = datetime.strftime(
|
|
98
|
-
locale_datetime,
|
|
99
|
-
FORMATTED_FULL_DATETIME_FORMAT
|
|
100
|
-
if source == "bluesky"
|
|
101
|
-
else FORMATTED_TWEET_DATETIME_FORMAT,
|
|
102
|
-
)
|
|
103
|
-
|
|
104
110
|
return (
|
|
105
111
|
int(timestamp),
|
|
106
|
-
formatted_date_str
|
|
112
|
+
formatted_date_str,
|
|
107
113
|
)
|
|
108
114
|
|
|
109
115
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: twitwi
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.24.0
|
|
4
4
|
Summary: A collection of Twitter-related helper functions for python.
|
|
5
5
|
Home-page: http://github.com/medialab/twitwi
|
|
6
6
|
Author: Béatrice Mazoyer, Guillaume Plique, Benjamin Ooghe-Tabanou
|
|
@@ -260,7 +260,7 @@ List of a Bluesky user profile's normalized field names. Useful to declare heade
|
|
|
260
260
|
|
|
261
261
|
### PARTIAL_PROFILE_FIELDS
|
|
262
262
|
|
|
263
|
-
List of a Bluesky user partial profile's (retrieved from [`app.bsky.graph.getFollowers` HTTP endpoint](https://docs.bsky.app/docs/api/app-bsky-graph-get-followers#responses) for example) normalized field names. Useful to declare headers with csv writers. Be careful not to confuse with [PROFILE_FIELDS](#profile_fields) which correspond to the full version of the profile data, retrieved from [`app.bsky.actor.getProfiles` HTTP endpoint](docs.bsky.app/docs/api/app-bsky-actor-get-profiles#responses) for example.
|
|
263
|
+
List of a Bluesky user partial profile's (retrieved from [`app.bsky.graph.getFollowers` HTTP endpoint](https://docs.bsky.app/docs/api/app-bsky-graph-get-followers#responses) for example) normalized field names. Useful to declare headers with csv writers. Be careful not to confuse with [PROFILE_FIELDS](#profile_fields) which correspond to the full version of the profile data, retrieved from [`app.bsky.actor.getProfiles` HTTP endpoint](https://docs.bsky.app/docs/api/app-bsky-actor-get-profiles#responses) for example.
|
|
264
264
|
|
|
265
265
|
### POST_FIELDS
|
|
266
266
|
|
|
@@ -277,7 +277,7 @@ Will return datetimes as UTC but can take an optional second `locale` argument a
|
|
|
277
277
|
* **data** *(dict)*: user profile data payload coming from Twitter API v1.1 or v2.
|
|
278
278
|
* **locale** *(pytz.timezone as str, optional)*: timezone used to convert dates. If not given, will default to UTC.
|
|
279
279
|
* **pure** *(bool, optional)*: whether to allow the function to mutate its original `data` argument. Defaults to `True`.
|
|
280
|
-
|
|
280
|
+
|
|
281
281
|
### normalize_tweet
|
|
282
282
|
|
|
283
283
|
Function taking a nested dict describing a tweet from Twitter's JSON payload (API v1.1) and returning a flat "normalized" dict composed of all [TWEET_FIELDS](#tweet_fields) keys.
|
|
@@ -1,22 +1,22 @@
|
|
|
1
1
|
test/bluesky/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
test/bluesky/formatters_test.py,sha256=
|
|
2
|
+
test/bluesky/formatters_test.py,sha256=kUXoLNEep-mGRwLN0y5DqB9pAorV0PkVKMm_uVIvAQQ,5100
|
|
3
3
|
test/bluesky/normalizers_test.py,sha256=R4NziqErGW5MBdQEZ1vNxLGNRvJTyGnXfqo0v5gBCgw,5662
|
|
4
4
|
twitwi/__init__.py,sha256=y0bAx9gE3THtlWE1YpXDIhGwqJ5_I8DCStWyyiiXJkw,1095
|
|
5
5
|
twitwi/anonymizers.py,sha256=nkl6HL1BWLz00wJ060XSbqjN5JF8pvcpEPnRXt70TUY,1588
|
|
6
6
|
twitwi/constants.py,sha256=fvqCngJIGyz5CpdVWbcAfjmE3_kvcx9giN0rEljL7OU,16001
|
|
7
|
-
twitwi/exceptions.py,sha256=
|
|
8
|
-
twitwi/formatters.py,sha256=
|
|
7
|
+
twitwi/exceptions.py,sha256=xUikeIRmFcptQFlKGKXkbH9vbcQlQL3sviknhvSTcmw,696
|
|
8
|
+
twitwi/formatters.py,sha256=pwI4UYPDFUzjRPE9B36k8tK-Va-k0HFLwvmc8aIc8P0,3681
|
|
9
9
|
twitwi/normalizers.py,sha256=CWUK-XwhcEjLDjWH_qb6E03WZKsbIcwiRAVUjwXKQho,28438
|
|
10
|
-
twitwi/utils.py,sha256=
|
|
10
|
+
twitwi/utils.py,sha256=PPmbeMlKbHMTg07PgI4A0HRZw2QGuvCOGcP_FtqMyHQ,4774
|
|
11
11
|
twitwi/bluesky/__init__.py,sha256=SqeHZUzL2U9UpL3EB33vaowQWaKXSPkvsAkasRqmFpY,694
|
|
12
12
|
twitwi/bluesky/constants.py,sha256=CPkTIrDwyRWpkFTbaee1oFm_LWGj2WIC7A6xEGqDGB4,573
|
|
13
13
|
twitwi/bluesky/formatters.py,sha256=L_yROAPcBECifCGiFAGYFJwLq6re8UlJNoZ7R2DXm5g,1025
|
|
14
|
-
twitwi/bluesky/normalizers.py,sha256=
|
|
14
|
+
twitwi/bluesky/normalizers.py,sha256=m4oNWJt8eZK2iVREPIKC42yw3YNpZo3pf4OQGZz_1i8,48611
|
|
15
15
|
twitwi/bluesky/types.py,sha256=INe6R8eOqrOooWn25dtk61-Wqd_pUDwb737R7jY_vkc,13915
|
|
16
|
-
twitwi/bluesky/utils.py,sha256=
|
|
17
|
-
twitwi-0.
|
|
18
|
-
twitwi-0.
|
|
19
|
-
twitwi-0.
|
|
20
|
-
twitwi-0.
|
|
21
|
-
twitwi-0.
|
|
22
|
-
twitwi-0.
|
|
16
|
+
twitwi/bluesky/utils.py,sha256=zIofl7UHmIr0JgjoXRK3ekovkri3CVOvQvo8PmFrWGg,4895
|
|
17
|
+
twitwi-0.24.0.dist-info/licenses/LICENSE.txt,sha256=Ddg_PcGnl0qd2167o2dheCjE_rCZJOoBxjJnJhhOpX4,1099
|
|
18
|
+
twitwi-0.24.0.dist-info/METADATA,sha256=4cGwKAsqA9kXkG713fx0lLfoCb2znbLiTsqm-n_wI4g,21365
|
|
19
|
+
twitwi-0.24.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
20
|
+
twitwi-0.24.0.dist-info/top_level.txt,sha256=TaKyGU7j_EVbP5KI0UD6qjbaKv2Qn0OrkfUQ29a04kg,12
|
|
21
|
+
twitwi-0.24.0.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
22
|
+
twitwi-0.24.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|