tumblrbot 1.4.7__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tumblrbot/__main__.py CHANGED
@@ -19,8 +19,12 @@ def main() -> None:
19
19
  if Confirm.ask("Download latest posts?", default=False):
20
20
  PostDownloader(openai=openai, tumblr=tumblr).main()
21
21
 
22
+ examples_writer = ExamplesWriter(openai=openai, tumblr=tumblr)
22
23
  if Confirm.ask("Create training data?", default=False):
23
- ExamplesWriter(openai=openai, tumblr=tumblr).main()
24
+ examples_writer.main()
25
+
26
+ if Confirm.ask("Remove training data flagged by the OpenAI moderation? [bold]This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
27
+ examples_writer.filter_examples()
24
28
 
25
29
  fine_tuner = FineTuner(openai=openai, tumblr=tumblr)
26
30
  fine_tuner.print_estimates()
@@ -36,18 +36,18 @@ class PostDownloader(FlowClass):
36
36
  task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=completed)
37
37
 
38
38
  while True:
39
- response = self.tumblr.retrieve_published_posts(blog_identifier, after=after).json()["response"]
40
- live.progress.update(task_id, total=response["blog"]["posts"], completed=completed)
39
+ response = self.tumblr.retrieve_published_posts(blog_identifier, after=after)
40
+ live.progress.update(task_id, total=response.response.blog.posts, completed=completed)
41
41
 
42
- if posts := response["posts"]:
43
- for post in posts:
44
- dump(post, fp)
45
- fp.write("\n")
42
+ if not response.response.posts:
43
+ return
46
44
 
47
- model = Post.model_validate(post)
48
- after = model.timestamp
49
- live.custom_update(model)
45
+ for post in response.response.posts:
46
+ dump(post, fp)
47
+ fp.write("\n")
50
48
 
51
- completed += len(posts)
52
- else:
53
- return
49
+ model = Post.model_validate(post)
50
+ after = model.timestamp
51
+ live.custom_update(model)
52
+
53
+ completed += len(response.response.posts)
@@ -7,7 +7,6 @@ from typing import IO, override
7
7
 
8
8
  import rich
9
9
  from openai import BadRequestError
10
- from rich.prompt import Confirm
11
10
 
12
11
  from tumblrbot.utils.common import FlowClass, PreviewLive
13
12
  from tumblrbot.utils.models import Example, Post
@@ -26,7 +25,7 @@ class ExamplesWriter(FlowClass):
26
25
  fp,
27
26
  )
28
27
 
29
- for post in self.get_filtered_posts():
28
+ for post in self.get_valid_posts():
30
29
  self.write_example(
31
30
  self.config.user_message,
32
31
  post.get_content_text(),
@@ -54,38 +53,33 @@ class ExamplesWriter(FlowClass):
54
53
  data: dict[str, str] = loads(line)
55
54
  yield from data.items()
56
55
 
57
- def get_filtered_posts(self) -> Generator[Post]:
58
- posts = self.get_valid_posts()
56
+ def get_valid_posts(self) -> Generator[Post]:
57
+ for data_path in self.get_data_paths():
58
+ with data_path.open("rb") as fp:
59
+ for line in fp:
60
+ post = Post.model_validate_json(line)
61
+ if post.valid_text_post():
62
+ yield post
59
63
 
60
- if Confirm.ask("[gray62]Remove posts flagged by the OpenAI moderation? This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
64
+ def filter_examples(self) -> None:
65
+ examples = self.config.examples_file.read_text("utf_8").splitlines()
66
+ with self.config.examples_file.open("w", encoding="utf_8") as fp:
61
67
  batch_size = self.get_moderation_batch_size()
62
- posts = list(posts)
63
68
  removed = 0
64
69
 
65
70
  with PreviewLive() as live:
66
71
  for batch in live.progress.track(
67
- batched(posts, batch_size, strict=False),
68
- ceil(len(posts) / batch_size),
72
+ batched(examples, batch_size, strict=False),
73
+ ceil(len(examples) / batch_size),
69
74
  description="Removing flagged posts...",
70
75
  ):
71
- response = self.openai.moderations.create(input=list(map(Post.get_content_text, batch)))
72
- for post, moderation in zip(batch, response.results, strict=True):
76
+ response = self.openai.moderations.create(input=list(batch))
77
+ for example, moderation in zip(batch, response.results, strict=True):
73
78
  if moderation.flagged:
74
79
  removed += 1
75
- live.custom_update(post)
76
80
  else:
77
- yield post
81
+ fp.write(f"{example}\n")
78
82
  rich.print(f"[red]Removed {removed} posts.\n")
79
- else:
80
- yield from posts
81
-
82
- def get_valid_posts(self) -> Generator[Post]:
83
- for data_path in self.get_data_paths():
84
- with data_path.open("rb") as fp:
85
- for line in fp:
86
- post = Post.model_validate_json(line)
87
- if post.valid_text_post():
88
- yield post
89
83
 
90
84
  def get_moderation_batch_size(self) -> int:
91
85
  try:
@@ -1,4 +1,4 @@
1
- from random import random
1
+ from random import random, randrange
2
2
  from typing import override
3
3
 
4
4
  import rich
@@ -28,28 +28,47 @@ class DraftGenerator(FlowClass):
28
28
  rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
29
29
 
30
30
  def generate_post(self) -> Post:
31
- content = self.generate_content()
32
- post = Post(content=[content])
33
- if tags := self.generate_tags(content):
34
- post.tags = tags.tags
35
- return post
36
-
37
- def generate_content(self) -> Post.Block:
38
- content = self.openai.responses.create(
39
- input=self.config.user_message,
31
+ if random() < self.config.reblog_chance: # noqa: S311
32
+ original = self.get_random_post()
33
+ user_message = f"{self.config.reblog_user_message}\n\n{original.get_content_text()}"
34
+ else:
35
+ original = Post()
36
+ user_message = self.config.user_message
37
+
38
+ text = self.generate_text(user_message)
39
+ if tags := self.generate_tags(text):
40
+ tags = tags.tags
41
+ return Post(
42
+ content=[Post.Block(type="text", text=text)],
43
+ tags=tags or [],
44
+ state="draft",
45
+ parent_tumblelog_uuid=original.blog.uuid,
46
+ parent_post_id=original.id,
47
+ reblog_key=original.reblog_key,
48
+ )
49
+
50
+ def generate_text(self, user_message: str) -> str:
51
+ return self.openai.responses.create(
52
+ input=user_message,
40
53
  instructions=self.config.developer_message,
41
54
  model=self.config.fine_tuned_model,
42
55
  ).output_text
43
56
 
44
- return Post.Block(text=content)
45
-
46
- def generate_tags(self, content: Post.Block) -> Post | None:
57
+ def generate_tags(self, text: str) -> Post | None:
47
58
  if random() < self.config.tags_chance: # noqa: S311
48
59
  return self.openai.responses.parse(
49
60
  text_format=Post,
50
- input=content.text,
61
+ input=text,
51
62
  instructions=self.config.tags_developer_message,
52
63
  model=self.config.base_model,
53
64
  ).output_parsed
54
65
 
55
66
  return None
67
+
68
+ def get_random_post(self) -> Post:
69
+ total = self.tumblr.retrieve_blog_info(self.config.upload_blog_identifier).response.blog.posts
70
+ post = self.tumblr.retrieve_published_posts(
71
+ self.config.upload_blog_identifier,
72
+ offset=randrange(total), # noqa: S311
73
+ ).response.posts[0]
74
+ return Post.model_validate(post)
tumblrbot/utils/models.py CHANGED
@@ -74,6 +74,8 @@ class Config(FileSyncSettings):
74
74
  draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
75
75
  tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
76
76
  tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
77
+ reblog_chance: NonNegativeFloat = Field(0.05, description="The chance to generate a reblog of a random post.")
78
+ reblog_user_message: str = Field("Please write a comical Tumblr post in response to the following Tumblr post:", description="The prefix for the user message used to reblog posts.")
77
79
 
78
80
  @classmethod
79
81
  @override
@@ -100,7 +102,7 @@ class Config(FileSyncSettings):
100
102
 
101
103
  toml_table[name] = value
102
104
 
103
- Path(self.toml_file).write_text(tomlkit.dumps(toml_table), encoding="utf_8")
105
+ self.toml_file.write_text(tomlkit.dumps(toml_table), encoding="utf_8")
104
106
 
105
107
  return self
106
108
 
@@ -170,15 +172,34 @@ class Tokens(FileSyncSettings):
170
172
  return self
171
173
 
172
174
 
175
+ class Blog(FullyValidatedModel):
176
+ posts: int = 0
177
+ uuid: str = ""
178
+
179
+
180
+ class ResponseModel(FullyValidatedModel):
181
+ class Response(FullyValidatedModel):
182
+ blog: Blog = Blog()
183
+ posts: list[Any] = []
184
+
185
+ response: Response
186
+
187
+
173
188
  class Post(FullyValidatedModel):
174
189
  class Block(FullyValidatedModel):
175
- type: str = "text"
190
+ type: str = ""
176
191
  text: str = ""
177
192
  blocks: list[int] = []
178
193
 
194
+ blog: SkipJsonSchema[Blog] = Blog()
195
+ id: SkipJsonSchema[int] = 0
196
+ parent_tumblelog_uuid: SkipJsonSchema[str] = ""
197
+ parent_post_id: SkipJsonSchema[int] = 0
198
+ reblog_key: SkipJsonSchema[str] = ""
199
+
179
200
  timestamp: SkipJsonSchema[int] = 0
180
201
  tags: Annotated[list[str], PlainSerializer(",".join)] = []
181
- state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
202
+ state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "published"
182
203
 
183
204
  content: SkipJsonSchema[list[Block]] = []
184
205
  layout: SkipJsonSchema[list[Block]] = []
tumblrbot/utils/tumblr.py CHANGED
@@ -3,7 +3,7 @@ from typing import Self
3
3
  from requests import HTTPError, Response
4
4
  from requests_oauthlib import OAuth1Session
5
5
 
6
- from tumblrbot.utils.models import Post, Tokens
6
+ from tumblrbot.utils.models import Post, ResponseModel, Tokens
7
7
 
8
8
 
9
9
  class TumblrSession(OAuth1Session):
@@ -22,18 +22,25 @@ class TumblrSession(OAuth1Session):
22
22
  error.add_note(response.text)
23
23
  raise
24
24
 
25
- def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
26
- return self.get(
25
+ def retrieve_blog_info(self, blog_identifier: str) -> ResponseModel:
26
+ response = self.get(f"https://api.tumblr.com/v2/blog/{blog_identifier}/info")
27
+ return ResponseModel.model_validate_json(response.text)
28
+
29
+ def retrieve_published_posts(self, blog_identifier: str, offset: int | None = None, after: int | None = None) -> ResponseModel:
30
+ response = self.get(
27
31
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
28
32
  params={
33
+ "offset": offset,
29
34
  "after": after,
30
35
  "sort": "asc",
31
36
  "npf": True,
32
37
  },
33
38
  )
39
+ return ResponseModel.model_validate_json(response.text)
34
40
 
35
- def create_post(self, blog_identifier: str, post: Post) -> Response:
36
- return self.post(
41
+ def create_post(self, blog_identifier: str, post: Post) -> ResponseModel:
42
+ response = self.post(
37
43
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
38
44
  json=post.model_dump(),
39
45
  )
46
+ return ResponseModel.model_validate_json(response.text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.4.7
3
+ Version: 1.6.0
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
5
  Requires-Python: >= 3.13
6
6
  Description-Content-Type: text/markdown
@@ -13,6 +13,7 @@ Requires-Dist: requests-oauthlib
13
13
  Requires-Dist: rich
14
14
  Requires-Dist: tiktoken
15
15
  Requires-Dist: tomlkit
16
+ Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
16
17
  Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
17
18
 
18
19
  # tumblrbot
@@ -30,8 +31,8 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
30
31
  [OpenAI]: https://pypi.org/project/openai
31
32
  [OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
32
33
  [OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
34
+ [OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
33
35
  [Fine-Tuning Portal]: https://platform.openai.com/finetune
34
- [Moderation API]: https://platform.openai.com/docs/api-reference/moderations
35
36
 
36
37
  [Tumblr]: https://tumblr.com
37
38
  [Tumblr Tokens]: https://tumblr.com/oauth/apps
@@ -62,9 +63,8 @@ Features:
62
63
  - Shows progress and previews the current post.
63
64
  1. [Creates examples][Examples] to fine-tune the model from your posts.
64
65
  - Filters out posts that contain more than just text data.
65
- - Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
66
- - Shows progress and previews the current post.
67
66
  - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
67
+ 1. Filters out any posts flagged by the [OpenAI Moderation API].
68
68
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
69
69
  - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
70
70
  - Resumes monitoring the same fine-tuning process when restarted.
@@ -73,13 +73,14 @@ Features:
73
73
  1. [Generates and uploads posts][Generate] to the [configured][config] [Tumblr] blog using the [configured][config] fine-tuned model.
74
74
  - Creates tags by extracting keywords at the [configured][config] frequency using the [configured][config] model.
75
75
  - Uploads posts as drafts to the [configured][config] [Tumblr] blog.
76
+ - Reblog posts at the [configured][config] frequency.
76
77
  - Shows progress and previews the current post.
77
78
  - Colorful output, progress bars, and post previews using [rich].
78
79
  - Automatically keeps the [config] file up-to-date and recreates it if missing.
79
80
 
80
81
  **To-Do:**
81
82
 
82
- - Add code documentation.
83
+ - ...
83
84
 
84
85
  **Known Issues:**
85
86
 
@@ -0,0 +1,15 @@
1
+ tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
3
+ tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ tumblrbot/flow/download.py,sha256=PUs7eM-1kGOb2RKijy3lW0zyvfFDwbxzTGhVghrWIhc,2012
5
+ tumblrbot/flow/examples.py,sha256=MlukrVdzpIwk_-37PpRsBGV5eX-lLlNUUYvuozXC_vw,3726
6
+ tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
7
+ tumblrbot/flow/generate.py,sha256=GPWd2dBFQa8t1vH9C3gDs8d8mGvLkKuFjDlYC7s0PnM,3001
8
+ tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
10
+ tumblrbot/utils/models.py,sha256=8A7gd4L0TZQwHJmD6YutV0WaIU_jkl39SRzO8OMrays,10429
11
+ tumblrbot/utils/tumblr.py,sha256=NP_qRaB4A5cKcQdOErYhTqyQQcO3ffytULvQpQTtlM8,1725
12
+ tumblrbot-1.6.0.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
13
+ tumblrbot-1.6.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
14
+ tumblrbot-1.6.0.dist-info/METADATA,sha256=0QHZRZ8KNja_rCMmE4zflOHDjykl_JVkI6KZzSlulkM,10138
15
+ tumblrbot-1.6.0.dist-info/RECORD,,
@@ -1,15 +0,0 @@
1
- tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- tumblrbot/__main__.py,sha256=BenjVNlVZDy-ZlSWukEIguGLa6qXvZjhYSSWMqa8-0Q,1447
3
- tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- tumblrbot/flow/download.py,sha256=wdXmtCnnArn8Zw7D2Hoa_KhH-k61j9w3cbYztgBkUlY,2036
5
- tumblrbot/flow/examples.py,sha256=Th6vgiu3D2VloOx7otZlk164h3ifkJEwDk21YHMEYP0,3976
6
- tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
7
- tumblrbot/flow/generate.py,sha256=Q6nUtmoj28-rGUCs4V0fuovJshvFMlmipyu9GGqnmzM,2147
8
- tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
10
- tumblrbot/utils/models.py,sha256=Z0k16qJsZEO8tfmPp7X3edz-RgGCDLRSm7HrSDLGh1Y,9663
11
- tumblrbot/utils/tumblr.py,sha256=6V9AjT-dyR2vuUkfqgqs52Ua5irhQJzhgQhV54xKyGM,1258
12
- tumblrbot-1.4.7.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
13
- tumblrbot-1.4.7.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
14
- tumblrbot-1.4.7.dist-info/METADATA,sha256=q_PH1oU-d0PRdXDuL5Bjh-Y8gccZJVvLCCho2E_CLvc,10104
15
- tumblrbot-1.4.7.dist-info/RECORD,,