tumblrbot 1.9.0__tar.gz → 1.9.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,3 @@
1
- # Custom
2
- data
3
- *.lnk
4
- config.toml
5
- custom_prompts.jsonl
6
- examples.jsonl
7
-
8
1
  # Byte-compiled / optimized / DLL files
9
2
  __pycache__/
10
3
  *.py[codz]
@@ -138,6 +131,19 @@ __pypackages__/
138
131
  celerybeat-schedule
139
132
  celerybeat.pid
140
133
 
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
141
147
  # SageMath parsed files
142
148
  *.sage.py
143
149
 
@@ -189,11 +195,11 @@ cython_debug/
189
195
  .abstra/
190
196
 
191
197
  # Visual Studio Code
192
- # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
193
199
  # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
194
- # and can be added to the global gitignore or merged into this file. However, if you prefer,
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
195
201
  # you could uncomment the following to ignore the entire vscode folder
196
- .vscode/
202
+ # .vscode/
197
203
 
198
204
  # Ruff stuff:
199
205
  .ruff_cache/
@@ -207,4 +213,10 @@ marimo/_lsp/
207
213
  __marimo__/
208
214
 
209
215
  # Streamlit
210
- .streamlit/secrets.toml
216
+ .streamlit/secrets.toml
217
+
218
+ .vscode
219
+ data
220
+ *.jsonl
221
+ config.toml
222
+ tumblrbot.exe.lnk
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.9.0
3
+ Version: 1.9.1
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
5
  Requires-Python: >= 3.13
6
6
  Description-Content-Type: text/markdown
@@ -39,13 +39,15 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
39
39
  [Tumblr API Documentation on Blog Identifiers]: https://tumblr.com/docs/en/api/v2#blog-identifiers
40
40
  [Tumblr API Documentation on Rate Limits]: https://tumblr.com/docs/en/api/v2#rate-limits
41
41
 
42
+ [Format String]: https://docs.python.org/3/library/string.html#format-string-syntax
43
+
42
44
  [Download]: src/tumblrbot/flow/download.py
43
45
  [Examples]: src/tumblrbot/flow/examples.py
44
46
  [Fine-Tune]: src/tumblrbot/flow/fine_tune.py
45
47
  [Generate]: src/tumblrbot/flow/generate.py
46
48
  [Main]: src/tumblrbot/__main__.py
47
49
 
48
- [Config]: #configuration
50
+ [Configurable]: #configuration
49
51
  [Fine-Tuning]: #manual-fine-tuning
50
52
  [![PyPI - Version](https://img.shields.io/pypi/v/tumblrbot)](https://python.org/pypi/tumblrbot)
51
53
 
@@ -60,33 +62,34 @@ Features:
60
62
  1. Asks for [OpenAI] and [Tumblr] tokens.
61
63
  - Stores API tokens using [keyring].
62
64
  1. Retrieves [Tumblr] [OAuth] tokens.
63
- 1. [Downloads posts][Download] from the [configured][config] blogs.
65
+ 1. [Downloads posts][Download] from specified blogs ([configurable]).
64
66
  - Skips redownloading already downloaded posts.
65
67
  - Shows progress and previews the current post.
66
- 1. [Creates examples][Examples] to fine-tune the model from your posts.
68
+ 1. [Creates examples][Examples] to fine-tune the model from the downloaded posts.
67
69
  - Filters out posts that contain more than just text data.
68
- - Filters out posts that contain [configured][config] regular expressions.
69
- - Only uses the most recent posts from each blog as [configured][config].
70
- - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
70
+ - Filters out posts that contain regular expressions ([configurable]).
71
+ - Only uses the most recent posts from each blog ([configurable]).
72
+ - Adds custom user messages and assistant responses to the dataset ([configurable]).
71
73
  1. Filters out any posts flagged by the [OpenAI Moderation API].
72
74
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
73
- - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
75
+ - Provides cost estimates if the currently saved examples are used to fine-tune a base model ([configurable]).
74
76
  - Resumes monitoring the same fine-tuning process when restarted.
75
77
  - Deletes the uploaded examples file if fine-tuning does not succeed (optional).
76
78
  - Stores the output model automatically when fine-tuning is completed.
77
- 1. [Generates and uploads posts][Generate] to the [configured][config] blog using the [configured][config] fine-tuned model.
78
- - Creates tags by extracting keywords at the [configured][config] frequency using the [configured][config] model.
79
- - Uploads posts as drafts to the [configured][config] blog.
80
- - Reblogs posts from the [configured][config] blogs at the [configured][config] frequency.
79
+ 1. [Generates and uploads posts][Generate] to a blog using the fine-tuned model ([configurable]).
80
+ - Creates tags by extracting keywords using the base model ([configurable]).
81
+ - Uploads posts as drafts.
82
+ - Reblogs posts from allowed blogs ([configurable]).
81
83
  - Shows progress and previews the current post.
82
84
  - Colorful output, progress bars, and post previews using [rich].
83
- - Automatically keeps the [config] file up-to-date and recreates it if missing.
85
+ - Automatically keeps the [config][configurable] file up-to-date and recreates it if missing (without overriding user settings).
84
86
 
85
87
  **Known Issues:**
86
88
 
87
89
  - Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
88
90
  - Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
89
91
  - During post downloading or post generation, you may receive a "Limit Exceeded" error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
92
+ - Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
90
93
 
91
94
  **Please submit an issue or contact us for features you want added/reimplemented.**
92
95
 
@@ -103,7 +106,7 @@ Features:
103
106
 
104
107
  ## Usage
105
108
 
106
- Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config].
109
+ Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
107
110
 
108
111
  ## Obtaining Tokens
109
112
 
@@ -171,16 +174,17 @@ Specific Options:
171
174
  - **`tags_chance`** - This should be between 0 and 1. Setting it to 0 corresponds to a 0% chance (never) to add tags to a post. 1 corresponds to a 100% chance (always) to add tags to a post. Adding tags incurs a very small token cost.
172
175
  - **`reblog_blog_identifiers`** - Whenever a reblog is attempted, a random blog from this list will be chosen to be reblogged from.
173
176
  - **`reblog_chance`** - This setting works the same way as `tags_chance`.
174
- - **`reblog_user_message`** - This setting is a prefix that is directly prepended to the contents of the post being reblogged.
177
+ - **`reblog_user_message`** - This setting is a [format string]. The only argument it is formatted with is the content of the post being reblogged. In simple terms, the `{}` will be replaced with said content.
178
+ - *Note: The bot is only given the latest message in a reblog chain due to the required complexity and added costs of including the entire chain.*
175
179
 
176
180
  ## Manual Fine-Tuning
177
181
 
178
182
  You can manually upload the examples file to [OpenAI] and start the fine-tuning here: [fine-tuning portal].
179
183
 
180
184
  1. Press `+ Create`.
181
- 1. Select the desired `Base Model` from the dropdown. This should ideally match the model set in the [config].
182
- 1. Upload the generated examples file to the section under `Training data`. You can find the path for this in the [config].
185
+ 1. Select the desired `Base Model` from the dropdown. This should ideally match the model set in the [config][configurable].
186
+ 1. Upload the generated examples file to the section under `Training data`. You can find the path for this in the [config][configurable].
183
187
  1. Press `Create`.
184
- 1. (Optional) Copy the value next to `Job ID` and paste it into the [config] under `job_id`. You can then run the program and monitor its progress as usual.
185
- 1. If you do not do the above, you will have to copy the value next to `Output model` once the job is complete and paste it into the [config] under `fine_tuned_model`.
188
+ 1. (Optional) Copy the value next to `Job ID` and paste it into the [config][configurable] under `job_id`. You can then run the program and monitor its progress as usual.
189
+ 1. If you do not do the above, you will have to copy the value next to `Output model` once the job is complete and paste it into the [config][configurable] under `fine_tuned_model`.
186
190
 
@@ -21,13 +21,15 @@
21
21
  [Tumblr API Documentation on Blog Identifiers]: https://tumblr.com/docs/en/api/v2#blog-identifiers
22
22
  [Tumblr API Documentation on Rate Limits]: https://tumblr.com/docs/en/api/v2#rate-limits
23
23
 
24
+ [Format String]: https://docs.python.org/3/library/string.html#format-string-syntax
25
+
24
26
  [Download]: src/tumblrbot/flow/download.py
25
27
  [Examples]: src/tumblrbot/flow/examples.py
26
28
  [Fine-Tune]: src/tumblrbot/flow/fine_tune.py
27
29
  [Generate]: src/tumblrbot/flow/generate.py
28
30
  [Main]: src/tumblrbot/__main__.py
29
31
 
30
- [Config]: #configuration
32
+ [Configurable]: #configuration
31
33
  [Fine-Tuning]: #manual-fine-tuning
32
34
  [![PyPI - Version](https://img.shields.io/pypi/v/tumblrbot)](https://python.org/pypi/tumblrbot)
33
35
 
@@ -42,33 +44,34 @@ Features:
42
44
  1. Asks for [OpenAI] and [Tumblr] tokens.
43
45
  - Stores API tokens using [keyring].
44
46
  1. Retrieves [Tumblr] [OAuth] tokens.
45
- 1. [Downloads posts][Download] from the [configured][config] blogs.
47
+ 1. [Downloads posts][Download] from specified blogs ([configurable]).
46
48
  - Skips redownloading already downloaded posts.
47
49
  - Shows progress and previews the current post.
48
- 1. [Creates examples][Examples] to fine-tune the model from your posts.
50
+ 1. [Creates examples][Examples] to fine-tune the model from the downloaded posts.
49
51
  - Filters out posts that contain more than just text data.
50
- - Filters out posts that contain [configured][config] regular expressions.
51
- - Only uses the most recent posts from each blog as [configured][config].
52
- - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
52
+ - Filters out posts that contain regular expressions ([configurable]).
53
+ - Only uses the most recent posts from each blog ([configurable]).
54
+ - Adds custom user messages and assistant responses to the dataset ([configurable]).
53
55
  1. Filters out any posts flagged by the [OpenAI Moderation API].
54
56
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
55
- - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
57
+ - Provides cost estimates if the currently saved examples are used to fine-tune a base model ([configurable]).
56
58
  - Resumes monitoring the same fine-tuning process when restarted.
57
59
  - Deletes the uploaded examples file if fine-tuning does not succeed (optional).
58
60
  - Stores the output model automatically when fine-tuning is completed.
59
- 1. [Generates and uploads posts][Generate] to the [configured][config] blog using the [configured][config] fine-tuned model.
60
- - Creates tags by extracting keywords at the [configured][config] frequency using the [configured][config] model.
61
- - Uploads posts as drafts to the [configured][config] blog.
62
- - Reblogs posts from the [configured][config] blogs at the [configured][config] frequency.
61
+ 1. [Generates and uploads posts][Generate] to a blog using the fine-tuned model ([configurable]).
62
+ - Creates tags by extracting keywords using the base model ([configurable]).
63
+ - Uploads posts as drafts.
64
+ - Reblogs posts from allowed blogs ([configurable]).
63
65
  - Shows progress and previews the current post.
64
66
  - Colorful output, progress bars, and post previews using [rich].
65
- - Automatically keeps the [config] file up-to-date and recreates it if missing.
67
+ - Automatically keeps the [config][configurable] file up-to-date and recreates it if missing (without overriding user settings).
66
68
 
67
69
  **Known Issues:**
68
70
 
69
71
  - Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
70
72
  - Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
71
73
  - During post downloading or post generation, you may receive a "Limit Exceeded" error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
74
+ - Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
72
75
 
73
76
  **Please submit an issue or contact us for features you want added/reimplemented.**
74
77
 
@@ -85,7 +88,7 @@ Features:
85
88
 
86
89
  ## Usage
87
90
 
88
- Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config].
91
+ Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
89
92
 
90
93
  ## Obtaining Tokens
91
94
 
@@ -153,15 +156,16 @@ Specific Options:
153
156
  - **`tags_chance`** - This should be between 0 and 1. Setting it to 0 corresponds to a 0% chance (never) to add tags to a post. 1 corresponds to a 100% chance (always) to add tags to a post. Adding tags incurs a very small token cost.
154
157
  - **`reblog_blog_identifiers`** - Whenever a reblog is attempted, a random blog from this list will be chosen to be reblogged from.
155
158
  - **`reblog_chance`** - This setting works the same way as `tags_chance`.
156
- - **`reblog_user_message`** - This setting is a prefix that is directly prepended to the contents of the post being reblogged.
159
+ - **`reblog_user_message`** - This setting is a [format string]. The only argument it is formatted with is the content of the post being reblogged. In simple terms, the `{}` will be replaced with said content.
160
+ - *Note: The bot is only given the latest message in a reblog chain due to the required complexity and added costs of including the entire chain.*
157
161
 
158
162
  ## Manual Fine-Tuning
159
163
 
160
164
  You can manually upload the examples file to [OpenAI] and start the fine-tuning here: [fine-tuning portal].
161
165
 
162
166
  1. Press `+ Create`.
163
- 1. Select the desired `Base Model` from the dropdown. This should ideally match the model set in the [config].
164
- 1. Upload the generated examples file to the section under `Training data`. You can find the path for this in the [config].
167
+ 1. Select the desired `Base Model` from the dropdown. This should ideally match the model set in the [config][configurable].
168
+ 1. Upload the generated examples file to the section under `Training data`. You can find the path for this in the [config][configurable].
165
169
  1. Press `Create`.
166
- 1. (Optional) Copy the value next to `Job ID` and paste it into the [config] under `job_id`. You can then run the program and monitor its progress as usual.
167
- 1. If you do not do the above, you will have to copy the value next to `Output model` once the job is complete and paste it into the [config] under `fine_tuned_model`.
170
+ 1. (Optional) Copy the value next to `Job ID` and paste it into the [config][configurable] under `job_id`. You can then run the program and monitor its progress as usual.
171
+ 1. If you do not do the above, you will have to copy the value next to `Output model` once the job is complete and paste it into the [config][configurable] under `fine_tuned_model`.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "tumblrbot"
3
- version = "1.9.0"
3
+ version = "1.9.1"
4
4
  description = "An updated bot that posts to Tumblr, based on your very own blog!"
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.13"
@@ -30,7 +30,7 @@ class ExamplesWriter(FlowClass):
30
30
  for post in self.get_valid_posts():
31
31
  self.write_example(
32
32
  self.config.user_message,
33
- post.get_content_text(),
33
+ str(post),
34
34
  fp,
35
35
  )
36
36
 
@@ -65,7 +65,7 @@ class ExamplesWriter(FlowClass):
65
65
  with path.open("rb") as fp:
66
66
  for line in fp:
67
67
  post = Post.model_validate_json(line)
68
- if post.valid_text_post() and not (self.config.filtered_words and pattern.search(post.get_content_text())):
68
+ if post.valid_text_post() and not (post.trail and self.config.filtered_words and pattern.search(str(post))):
69
69
  yield post
70
70
 
71
71
  def filter_examples(self) -> None:
@@ -34,7 +34,7 @@ class DraftGenerator(FlowClass):
34
34
 
35
35
  def generate_post(self) -> Post:
36
36
  if original := self.get_random_post():
37
- user_message = f"{self.config.reblog_user_message}\n\n{original.get_content_text()}"
37
+ user_message = self.config.reblog_user_message.format(original)
38
38
  else:
39
39
  original = Post()
40
40
  user_message = self.config.user_message
@@ -79,7 +79,7 @@ class DraftGenerator(FlowClass):
79
79
  offset,
80
80
  ).response.posts:
81
81
  post = Post.model_validate(raw_post)
82
- if post.valid_text_post():
82
+ if post.valid_text_post() and self.is_trail_valid(post.trail):
83
83
  return post
84
84
 
85
85
  return None
@@ -89,3 +89,7 @@ class DraftGenerator(FlowClass):
89
89
  total = self.tumblr.retrieve_blog_info(blog_identifier).response.blog.posts
90
90
  # The same Iterable object is cached, so reading an element will effectively discard it. This prevents checking the same offsets twice.
91
91
  return iter(sample(range(total), total))
92
+
93
+ def is_trail_valid(self, trail: list[Post]) -> bool:
94
+ # Checks if every post in the reblog trail is valid and that the blog that created the post is in the allowed reblog list.
95
+ return all(post.valid_text_post() and post.blog.name in self.config.reblog_blog_identifiers for post in trail)
@@ -78,7 +78,7 @@ class Config(FileSyncSettings):
78
78
  tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
79
79
  reblog_blog_identifiers: list[str] = Field([], description="The identifiers of blogs that can be reblogged from when generating drafts.")
80
80
  reblog_chance: NonNegativeFloat = Field(0.1, description="The chance to generate a reblog of a random post. This will use more OpenAI tokens.")
81
- reblog_user_message: str = Field("Please write a comical Tumblr post in response to the following post:", description="The prefix for the user message used to reblog posts.")
81
+ reblog_user_message: str = Field("Please write a comical Tumblr post in response to the following post:\n{}", description="The format string for the user message used to reblog posts.")
82
82
 
83
83
  @classmethod
84
84
  @override
@@ -145,12 +145,16 @@ class Tokens(FileSyncSettings):
145
145
  @model_validator(mode="after")
146
146
  @override
147
147
  def write(self) -> Self:
148
+ # Check if any tokens are missing or if the user wants to reset them, then set tokens if necessary.
148
149
  if not self.openai_api_key or Confirm.ask("Reset OpenAI API key?", default=False):
149
150
  (self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
150
151
 
151
152
  if not all(self.tumblr.model_dump().values()) or Confirm.ask("Reset Tumblr API tokens?", default=False):
152
153
  self.tumblr.client_key, self.tumblr.client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
153
154
 
155
+ # This is the whole OAuth 1.0 process.
156
+ # https://requests-oauthlib.readthedocs.io/en/latest/examples/tumblr.html
157
+ # We tried setting up OAuth 2.0, but the token refresh process is far too unreliable for this sort of program.
154
158
  with OAuth1Session(
155
159
  self.tumblr.client_key,
156
160
  self.tumblr.client_secret,
@@ -170,12 +174,15 @@ class Tokens(FileSyncSettings):
170
174
 
171
175
  self.tumblr.resource_owner_key, self.tumblr.resource_owner_secret = self.get_oauth_tokens(oauth_tokens)
172
176
 
177
+ # Regardless of whether any values were changed, we may as well write to the keyring.
178
+ # Any unchanged values will be set to the value they already were, since this is run after reading from the keyring.
173
179
  set_password(self.service_name, self.username, self.model_dump_json())
174
180
 
175
181
  return self
176
182
 
177
183
 
178
184
  class Blog(FullyValidatedModel):
185
+ name: str = ""
179
186
  posts: int = 0
180
187
  uuid: str = ""
181
188
 
@@ -206,24 +213,31 @@ class Post(FullyValidatedModel):
206
213
 
207
214
  content: SkipJsonSchema[list[Block]] = []
208
215
  layout: SkipJsonSchema[list[Block]] = []
209
- trail: SkipJsonSchema[list[Any]] = []
216
+ trail: SkipJsonSchema[list[Self]] = []
210
217
 
211
218
  is_submission: SkipJsonSchema[bool] = False
212
219
 
213
220
  def __rich__(self) -> Panel:
214
221
  return Panel(
215
- self.get_content_text(),
222
+ str(self),
216
223
  title="Preview",
217
224
  subtitle=" ".join(f"#{tag}" for tag in self.tags),
218
225
  subtitle_align="left",
219
226
  )
220
227
 
221
- def valid_text_post(self) -> bool:
222
- return bool(self.content) and all(block.type == "text" for block in self.content) and not (self.is_submission or self.trail or any(block.type == "ask" for block in self.layout))
223
-
224
- def get_content_text(self) -> str:
228
+ def __str__(self) -> str:
229
+ # This function is really only relevant when a post is already valid, so we don't have to check the block types.
230
+ # If it is called on an invalid post, it would also work, but might give strange data.
225
231
  return "\n\n".join(block.text for block in self.content)
226
232
 
233
+ def valid_text_post(self) -> bool:
234
+ # Checks if this post:
235
+ # - has any content blocks (some glitched empty posts have no content)
236
+ # - only has content blocks of type 'text' (this excludes photo/video/poll/etc posts)
237
+ # - is not a submitted post
238
+ # - has no ask blocks in the content
239
+ return bool(self.content) and all(block.type == "text" for block in self.content) and not (self.is_submission or any(block.type == "ask" for block in self.layout))
240
+
227
241
 
228
242
  class Example(FullyValidatedModel):
229
243
  class Message(FullyValidatedModel):
File without changes
File without changes