PyPI - tumblrbot - Versions diffs - 1.4.7__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

tumblrbot 1.4.7py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tumblrbot/__main__.py +5 -1
tumblrbot/flow/download.py +12 -12
tumblrbot/flow/examples.py +16 -22
tumblrbot/flow/generate.py +33 -14
tumblrbot/utils/models.py +24 -3
tumblrbot/utils/tumblr.py +12 -5
{tumblrbot-1.4.7.dist-info → tumblrbot-1.6.0.dist-info}/METADATA +6 -5
tumblrbot-1.6.0.dist-info/RECORD +15 -0
tumblrbot-1.4.7.dist-info/RECORD +0 -15
{tumblrbot-1.4.7.dist-info → tumblrbot-1.6.0.dist-info}/WHEEL +0 -0
{tumblrbot-1.4.7.dist-info → tumblrbot-1.6.0.dist-info}/entry_points.txt +0 -0

tumblrbot/__main__.py CHANGED Viewed

@@ -19,8 +19,12 @@ def main() -> None:
         if Confirm.ask("Download latest posts?", default=False):
             PostDownloader(openai=openai, tumblr=tumblr).main()
+        examples_writer = ExamplesWriter(openai=openai, tumblr=tumblr)
         if Confirm.ask("Create training data?", default=False):
-            ExamplesWriter(openai=openai, tumblr=tumblr).main()
+            examples_writer.main()
+        if Confirm.ask("Remove training data flagged by the OpenAI moderation? [bold]This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
+            examples_writer.filter_examples()
         fine_tuner = FineTuner(openai=openai, tumblr=tumblr)
         fine_tuner.print_estimates()

tumblrbot/flow/download.py CHANGED Viewed

@@ -36,18 +36,18 @@ class PostDownloader(FlowClass):
         task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=completed)
         while True:
-            response = self.tumblr.retrieve_published_posts(blog_identifier, after=after).json()["response"]
-            live.progress.update(task_id, total=response["blog"]["posts"], completed=completed)
+            response = self.tumblr.retrieve_published_posts(blog_identifier, after=after)
+            live.progress.update(task_id, total=response.response.blog.posts, completed=completed)
-            if posts := response["posts"]:
-                for post in posts:
-                    dump(post, fp)
-                    fp.write("\n")
+            if not response.response.posts:
+                return
-                    model = Post.model_validate(post)
-                    after = model.timestamp
-                    live.custom_update(model)
+            for post in response.response.posts:
+                dump(post, fp)
+                fp.write("\n")
-                completed += len(posts)
-            else:
-                return
+                model = Post.model_validate(post)
+                after = model.timestamp
+                live.custom_update(model)
+            completed += len(response.response.posts)

tumblrbot/flow/examples.py CHANGED Viewed

@@ -7,7 +7,6 @@ from typing import IO, override
 import rich
 from openai import BadRequestError
-from rich.prompt import Confirm
 from tumblrbot.utils.common import FlowClass, PreviewLive
 from tumblrbot.utils.models import Example, Post
@@ -26,7 +25,7 @@ class ExamplesWriter(FlowClass):
                     fp,
                 )
-            for post in self.get_filtered_posts():
+            for post in self.get_valid_posts():
                 self.write_example(
                     self.config.user_message,
                     post.get_content_text(),
@@ -54,38 +53,33 @@ class ExamplesWriter(FlowClass):
                 data: dict[str, str] = loads(line)
                 yield from data.items()
-    def get_filtered_posts(self) -> Generator[Post]:
-        posts = self.get_valid_posts()
+    def get_valid_posts(self) -> Generator[Post]:
+        for data_path in self.get_data_paths():
+            with data_path.open("rb") as fp:
+                for line in fp:
+                    post = Post.model_validate_json(line)
+                    if post.valid_text_post():
+                        yield post
-        if Confirm.ask("[gray62]Remove posts flagged by the OpenAI moderation? This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
+    def filter_examples(self) -> None:
+        examples = self.config.examples_file.read_text("utf_8").splitlines()
+        with self.config.examples_file.open("w", encoding="utf_8") as fp:
             batch_size = self.get_moderation_batch_size()
-            posts = list(posts)
             removed = 0
             with PreviewLive() as live:
                 for batch in live.progress.track(
-                    batched(posts, batch_size, strict=False),
-                    ceil(len(posts) / batch_size),
+                    batched(examples, batch_size, strict=False),
+                    ceil(len(examples) / batch_size),
                     description="Removing flagged posts...",
                 ):
-                    response = self.openai.moderations.create(input=list(map(Post.get_content_text, batch)))
-                    for post, moderation in zip(batch, response.results, strict=True):
+                    response = self.openai.moderations.create(input=list(batch))
+                    for example, moderation in zip(batch, response.results, strict=True):
                         if moderation.flagged:
                             removed += 1
-                            live.custom_update(post)
                         else:
-                            yield post
+                            fp.write(f"{example}\n")
             rich.print(f"[red]Removed {removed} posts.\n")
-        else:
-            yield from posts
-    def get_valid_posts(self) -> Generator[Post]:
-        for data_path in self.get_data_paths():
-            with data_path.open("rb") as fp:
-                for line in fp:
-                    post = Post.model_validate_json(line)
-                    if post.valid_text_post():
-                        yield post
     def get_moderation_batch_size(self) -> int:
         try:

tumblrbot/flow/generate.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from random import random
+from random import random, randrange
 from typing import override
 import rich
@@ -28,28 +28,47 @@ class DraftGenerator(FlowClass):
         rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
     def generate_post(self) -> Post:
-        content = self.generate_content()
-        post = Post(content=[content])
-        if tags := self.generate_tags(content):
-            post.tags = tags.tags
-        return post
-    def generate_content(self) -> Post.Block:
-        content = self.openai.responses.create(
-            input=self.config.user_message,
+        if random() < self.config.reblog_chance:  # noqa: S311
+            original = self.get_random_post()
+            user_message = f"{self.config.reblog_user_message}\n\n{original.get_content_text()}"
+        else:
+            original = Post()
+            user_message = self.config.user_message
+        text = self.generate_text(user_message)
+        if tags := self.generate_tags(text):
+            tags = tags.tags
+        return Post(
+            content=[Post.Block(type="text", text=text)],
+            tags=tags or [],
+            state="draft",
+            parent_tumblelog_uuid=original.blog.uuid,
+            parent_post_id=original.id,
+            reblog_key=original.reblog_key,
+        )
+    def generate_text(self, user_message: str) -> str:
+        return self.openai.responses.create(
+            input=user_message,
             instructions=self.config.developer_message,
             model=self.config.fine_tuned_model,
         ).output_text
-        return Post.Block(text=content)
-    def generate_tags(self, content: Post.Block) -> Post | None:
+    def generate_tags(self, text: str) -> Post | None:
         if random() < self.config.tags_chance:  # noqa: S311
             return self.openai.responses.parse(
                 text_format=Post,
-                input=content.text,
+                input=text,
                 instructions=self.config.tags_developer_message,
                 model=self.config.base_model,
             ).output_parsed
         return None
+    def get_random_post(self) -> Post:
+        total = self.tumblr.retrieve_blog_info(self.config.upload_blog_identifier).response.blog.posts
+        post = self.tumblr.retrieve_published_posts(
+            self.config.upload_blog_identifier,
+            offset=randrange(total),  # noqa: S311
+        ).response.posts[0]
+        return Post.model_validate(post)

tumblrbot/utils/models.py CHANGED Viewed

@@ -74,6 +74,8 @@ class Config(FileSyncSettings):
     draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
     tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
     tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
+    reblog_chance: NonNegativeFloat = Field(0.05, description="The chance to generate a reblog of a random post.")
+    reblog_user_message: str = Field("Please write a comical Tumblr post in response to the following Tumblr post:", description="The prefix for the user message used to reblog posts.")
     @classmethod
     @override
@@ -100,7 +102,7 @@ class Config(FileSyncSettings):
             toml_table[name] = value
-        Path(self.toml_file).write_text(tomlkit.dumps(toml_table), encoding="utf_8")
+        self.toml_file.write_text(tomlkit.dumps(toml_table), encoding="utf_8")
         return self
@@ -170,15 +172,34 @@ class Tokens(FileSyncSettings):
         return self
+class Blog(FullyValidatedModel):
+    posts: int = 0
+    uuid: str = ""
+class ResponseModel(FullyValidatedModel):
+    class Response(FullyValidatedModel):
+        blog: Blog = Blog()
+        posts: list[Any] = []
+    response: Response
 class Post(FullyValidatedModel):
     class Block(FullyValidatedModel):
-        type: str = "text"
+        type: str = ""
         text: str = ""
         blocks: list[int] = []
+    blog: SkipJsonSchema[Blog] = Blog()
+    id: SkipJsonSchema[int] = 0
+    parent_tumblelog_uuid: SkipJsonSchema[str] = ""
+    parent_post_id: SkipJsonSchema[int] = 0
+    reblog_key: SkipJsonSchema[str] = ""
     timestamp: SkipJsonSchema[int] = 0
     tags: Annotated[list[str], PlainSerializer(",".join)] = []
-    state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
+    state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "published"
     content: SkipJsonSchema[list[Block]] = []
     layout: SkipJsonSchema[list[Block]] = []

tumblrbot/utils/tumblr.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Self
 from requests import HTTPError, Response
 from requests_oauthlib import OAuth1Session
-from tumblrbot.utils.models import Post, Tokens
+from tumblrbot.utils.models import Post, ResponseModel, Tokens
 class TumblrSession(OAuth1Session):
@@ -22,18 +22,25 @@ class TumblrSession(OAuth1Session):
             error.add_note(response.text)
             raise
-    def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
-        return self.get(
+    def retrieve_blog_info(self, blog_identifier: str) -> ResponseModel:
+        response = self.get(f"https://api.tumblr.com/v2/blog/{blog_identifier}/info")
+        return ResponseModel.model_validate_json(response.text)
+    def retrieve_published_posts(self, blog_identifier: str, offset: int | None = None, after: int | None = None) -> ResponseModel:
+        response = self.get(
             f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
             params={
+                "offset": offset,
                 "after": after,
                 "sort": "asc",
                 "npf": True,
             },
         )
+        return ResponseModel.model_validate_json(response.text)
-    def create_post(self, blog_identifier: str, post: Post) -> Response:
-        return self.post(
+    def create_post(self, blog_identifier: str, post: Post) -> ResponseModel:
+        response = self.post(
             f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
             json=post.model_dump(),
         )
+        return ResponseModel.model_validate_json(response.text)

{tumblrbot-1.4.7.dist-info → tumblrbot-1.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tumblrbot
-Version: 1.4.7
+Version: 1.6.0
 Summary: An updated bot that posts to Tumblr, based on your very own blog!
 Requires-Python: >= 3.13
 Description-Content-Type: text/markdown
@@ -13,6 +13,7 @@ Requires-Dist: requests-oauthlib
 Requires-Dist: rich
 Requires-Dist: tiktoken
 Requires-Dist: tomlkit
+Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
 Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
 # tumblrbot
@@ -30,8 +31,8 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
 [OpenAI]: https://pypi.org/project/openai
 [OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
 [OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
+[OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
 [Fine-Tuning Portal]: https://platform.openai.com/finetune
-[Moderation API]: https://platform.openai.com/docs/api-reference/moderations
 [Tumblr]: https://tumblr.com
 [Tumblr Tokens]: https://tumblr.com/oauth/apps
@@ -62,9 +63,8 @@ Features:
       - Shows progress and previews the current post.
    1. [Creates examples][Examples] to fine-tune the model from your posts.
       - Filters out posts that contain more than just text data.
-      - Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
-         - Shows progress and previews the current post.
       - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
+   1. Filters out any posts flagged by the [OpenAI Moderation API].
    1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
       - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
       - Resumes monitoring the same fine-tuning process when restarted.
@@ -73,13 +73,14 @@ Features:
    1. [Generates and uploads posts][Generate] to the [configured][config] [Tumblr] blog using the [configured][config] fine-tuned model.
       - Creates tags by extracting keywords at the [configured][config] frequency using the [configured][config] model.
       - Uploads posts as drafts to the [configured][config] [Tumblr] blog.
+      - Reblog posts at the [configured][config] frequency.
       - Shows progress and previews the current post.
 - Colorful output, progress bars, and post previews using [rich].
 - Automatically keeps the [config] file up-to-date and recreates it if missing.
 **To-Do:**
-- Add code documentation.
+- ...
 **Known Issues:**

tumblrbot-1.6.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
+tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tumblrbot/flow/download.py,sha256=PUs7eM-1kGOb2RKijy3lW0zyvfFDwbxzTGhVghrWIhc,2012
+tumblrbot/flow/examples.py,sha256=MlukrVdzpIwk_-37PpRsBGV5eX-lLlNUUYvuozXC_vw,3726
+tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
+tumblrbot/flow/generate.py,sha256=GPWd2dBFQa8t1vH9C3gDs8d8mGvLkKuFjDlYC7s0PnM,3001
+tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
+tumblrbot/utils/models.py,sha256=8A7gd4L0TZQwHJmD6YutV0WaIU_jkl39SRzO8OMrays,10429
+tumblrbot/utils/tumblr.py,sha256=NP_qRaB4A5cKcQdOErYhTqyQQcO3ffytULvQpQTtlM8,1725
+tumblrbot-1.6.0.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
+tumblrbot-1.6.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
+tumblrbot-1.6.0.dist-info/METADATA,sha256=0QHZRZ8KNja_rCMmE4zflOHDjykl_JVkI6KZzSlulkM,10138
+tumblrbot-1.6.0.dist-info/RECORD,,

tumblrbot-1.4.7.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tumblrbot/__main__.py,sha256=BenjVNlVZDy-ZlSWukEIguGLa6qXvZjhYSSWMqa8-0Q,1447
-tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tumblrbot/flow/download.py,sha256=wdXmtCnnArn8Zw7D2Hoa_KhH-k61j9w3cbYztgBkUlY,2036
-tumblrbot/flow/examples.py,sha256=Th6vgiu3D2VloOx7otZlk164h3ifkJEwDk21YHMEYP0,3976
-tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
-tumblrbot/flow/generate.py,sha256=Q6nUtmoj28-rGUCs4V0fuovJshvFMlmipyu9GGqnmzM,2147
-tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
-tumblrbot/utils/models.py,sha256=Z0k16qJsZEO8tfmPp7X3edz-RgGCDLRSm7HrSDLGh1Y,9663
-tumblrbot/utils/tumblr.py,sha256=6V9AjT-dyR2vuUkfqgqs52Ua5irhQJzhgQhV54xKyGM,1258
-tumblrbot-1.4.7.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
-tumblrbot-1.4.7.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
-tumblrbot-1.4.7.dist-info/METADATA,sha256=q_PH1oU-d0PRdXDuL5Bjh-Y8gccZJVvLCCho2E_CLvc,10104
-tumblrbot-1.4.7.dist-info/RECORD,,

{tumblrbot-1.4.7.dist-info → tumblrbot-1.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{tumblrbot-1.4.7.dist-info → tumblrbot-1.6.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

tumblrbot 1.4.7__py3-none-any.whl → 1.6.0__py3-none-any.whl

tumblrbot 1.4.7py3-none-any.whl → 1.6.0py3-none-any.whl