tumblrbot 1.0.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tumblrbot-1.1.0/PKG-INFO +114 -0
- tumblrbot-1.1.0/README.md +94 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/pyproject.toml +1 -1
- tumblrbot-1.1.0/src/tumblrbot/__main__.py +39 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/flow/download.py +4 -4
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/flow/examples.py +4 -4
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/flow/fine_tune.py +11 -15
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/flow/generate.py +6 -6
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/utils/models.py +1 -1
- tumblrbot-1.1.0/src/tumblrbot/utils/settings.py +131 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/utils/tumblr.py +10 -7
- tumblrbot-1.0.0/PKG-INFO +0 -369
- tumblrbot-1.0.0/README.md +0 -348
- tumblrbot-1.0.0/src/tumblrbot/__main__.py +0 -63
- tumblrbot-1.0.0/src/tumblrbot/utils/settings.py +0 -118
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/UNLICENSE +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/__init__.py +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/flow/__init__.py +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/utils/__init__.py +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.0}/src/tumblrbot/utils/common.py +0 -0
tumblrbot-1.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tumblrbot
|
|
3
|
+
Version: 1.1.0
|
|
4
|
+
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
|
+
Requires-Python: >= 3.13
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
License-Expression: Unlicense
|
|
8
|
+
License-File: UNLICENSE
|
|
9
|
+
Requires-Dist: more-itertools
|
|
10
|
+
Requires-Dist: openai
|
|
11
|
+
Requires-Dist: pydantic
|
|
12
|
+
Requires-Dist: pydantic-settings
|
|
13
|
+
Requires-Dist: requests
|
|
14
|
+
Requires-Dist: requests-oauthlib
|
|
15
|
+
Requires-Dist: rich
|
|
16
|
+
Requires-Dist: tiktoken
|
|
17
|
+
Requires-Dist: tomlkit
|
|
18
|
+
Project-URL: Issues, https://github.com/MaidThatPrograms/tumblrbot/issues
|
|
19
|
+
Project-URL: Repository, https://github.com/MaidThatPrograms/tumblrbot
|
|
20
|
+
|
|
21
|
+
[OpenAI]: https://pypi.org/project/openai
|
|
22
|
+
[Python]: https://python.org/download
|
|
23
|
+
[Rich]: https://pypi.org/project/rich
|
|
24
|
+
|
|
25
|
+
[gpt-4.1-nano-2025-04-14]: https://platform.openai.com/docs/models/gpt-4.1-nano
|
|
26
|
+
[Moderation API]: https://platform.openai.com/docs/api-reference/moderations
|
|
27
|
+
[New Post Format]: https://tumblr.com/docs/npf
|
|
28
|
+
[OAuth 2.0]: https://www.tumblr.com/docs/en/api/v2#oauth2-authorization
|
|
29
|
+
[pip]: https://pypi.org
|
|
30
|
+
|
|
31
|
+
[Download]: tumblrbot/flow/download.py
|
|
32
|
+
[Examples]: tumblrbot/flow/examples.py
|
|
33
|
+
[Fine-Tune]: tumblrbot/flow/fine_tune.py
|
|
34
|
+
[Generate]: tumblrbot/flow/generate.py
|
|
35
|
+
[Utils]: tumblrbot/utils/common.py
|
|
36
|
+
[Models]: tumblrbot/utils/models.py
|
|
37
|
+
[Settings]: tumblrbot/utils/settings.py
|
|
38
|
+
[Tumblr]: tumblrbot/utils/tumblr.py
|
|
39
|
+
[Main]: __main__.py
|
|
40
|
+
[README.md]: README.md
|
|
41
|
+
|
|
42
|
+
# tumblrbot
|
|
43
|
+

|
|
44
|
+
|
|
45
|
+
Description of original project:
|
|
46
|
+
> 4tv-tumblrbot was a collaborative project I embarked on with my close friend Dima, who goes by @smoqueen on Tumblr. The aim of this endeavor was straightforward yet silly: to develop a Tumblr bot powered by a machine-learning model. This bot would be specifically trained on the content from a particular Tumblr blog or a selected set of blogs, allowing it to mimic the style, tone, and thematic essence of the original posts.
|
|
47
|
+
|
|
48
|
+
This fork is largely a rewrite of the source code with similarities in its structure and process:
|
|
49
|
+
- Updates:
|
|
50
|
+
- Updated to [OAuth 2.0].
|
|
51
|
+
- Updated to the [New Post Format].
|
|
52
|
+
- Updated to the latest version of [OpenAI].
|
|
53
|
+
- Updated the [base model version][Settings] to [gpt-4.1-nano-2025-04-14].
|
|
54
|
+
- Removed features:
|
|
55
|
+
- [Generation][Generate]:
|
|
56
|
+
- Removed clearing drafts behavior.
|
|
57
|
+
- [Training][Examples]:
|
|
58
|
+
- Removed exports that had HTML or reblogs.
|
|
59
|
+
- Removed special word-replacement behavior.
|
|
60
|
+
- Removed filtering by year.
|
|
61
|
+
- Removed setup and related files.
|
|
62
|
+
- Changed/Added features:
|
|
63
|
+
- [Generation][Generate]:
|
|
64
|
+
- Added a link to the blog's draft page.
|
|
65
|
+
- Added error checking for uploading drafts.
|
|
66
|
+
- [Training][Examples]:
|
|
67
|
+
- Added the option to [Download] the latest posts from the [specified blogs][Settings].
|
|
68
|
+
- Added the option to remove posts flagged by the [Moderation API].
|
|
69
|
+
- Added the option to automatically [Fine-Tune] the examples on the [specified base model][Settings].
|
|
70
|
+
- Changed to now escape examples automatically.
|
|
71
|
+
- Set encoding for reading post data to `UTF-8` to fix decoding errors.
|
|
72
|
+
- Added newlines between paragraphs.
|
|
73
|
+
- Removed "ALT", submission, ask, and poll text from posts.
|
|
74
|
+
- Improved the estimated token counts and costs.
|
|
75
|
+
- Changed to [Rich] for output.
|
|
76
|
+
- Added progress bars.
|
|
77
|
+
- Added post previews.
|
|
78
|
+
- Added color, formatting, and more information to output.
|
|
79
|
+
- Created a [guided utility][Main] for every step of building your bot blog.
|
|
80
|
+
- Maid scripts wait for user input before the console closes.
|
|
81
|
+
- Added comand-line options to override [Settings] options.
|
|
82
|
+
- Added behavior to regenerate the default [config.toml][Settings] and [env.toml][Settings] if missing.
|
|
83
|
+
- Renamed several files.
|
|
84
|
+
- Renamed several [Settings] options.
|
|
85
|
+
- Changed the value of several [Settings] options.
|
|
86
|
+
- Added full type-checking coverage (fully importable from third-party scripts).
|
|
87
|
+
|
|
88
|
+
To-Do:
|
|
89
|
+
- Add documentation.
|
|
90
|
+
- Finish updating [README.md].
|
|
91
|
+
- Look into places more-itertools can help.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
**Please submit an issue or contact us for features you want to added/reimplemented.**
|
|
95
|
+
|
|
96
|
+
## Installation
|
|
97
|
+
1. Install the latest version of [Python]:
|
|
98
|
+
- Windows: `winget install python3`
|
|
99
|
+
- Linux (apt): `apt install python-pip`
|
|
100
|
+
- Linux (pacman): `pacman install python-pip`
|
|
101
|
+
1. Install the [pip] package: `pip install tumblrbot`
|
|
102
|
+
- On Linux, you will have to make a virtual environment.
|
|
103
|
+
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidThatPrograms/tumblrbot.git`
|
|
104
|
+
|
|
105
|
+
## Usage
|
|
106
|
+
If you cloned the repo, either:
|
|
107
|
+
- Double-click `__main__.py` in the root directory.
|
|
108
|
+
- Run `python __main__.py` in the root directory.
|
|
109
|
+
|
|
110
|
+
If you installed through [Pip]:
|
|
111
|
+
- Run `tumblrbot` from anywhere.
|
|
112
|
+
|
|
113
|
+
## More Information
|
|
114
|
+
- WIP
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
[OpenAI]: https://pypi.org/project/openai
|
|
2
|
+
[Python]: https://python.org/download
|
|
3
|
+
[Rich]: https://pypi.org/project/rich
|
|
4
|
+
|
|
5
|
+
[gpt-4.1-nano-2025-04-14]: https://platform.openai.com/docs/models/gpt-4.1-nano
|
|
6
|
+
[Moderation API]: https://platform.openai.com/docs/api-reference/moderations
|
|
7
|
+
[New Post Format]: https://tumblr.com/docs/npf
|
|
8
|
+
[OAuth 2.0]: https://www.tumblr.com/docs/en/api/v2#oauth2-authorization
|
|
9
|
+
[pip]: https://pypi.org
|
|
10
|
+
|
|
11
|
+
[Download]: tumblrbot/flow/download.py
|
|
12
|
+
[Examples]: tumblrbot/flow/examples.py
|
|
13
|
+
[Fine-Tune]: tumblrbot/flow/fine_tune.py
|
|
14
|
+
[Generate]: tumblrbot/flow/generate.py
|
|
15
|
+
[Utils]: tumblrbot/utils/common.py
|
|
16
|
+
[Models]: tumblrbot/utils/models.py
|
|
17
|
+
[Settings]: tumblrbot/utils/settings.py
|
|
18
|
+
[Tumblr]: tumblrbot/utils/tumblr.py
|
|
19
|
+
[Main]: __main__.py
|
|
20
|
+
[README.md]: README.md
|
|
21
|
+
|
|
22
|
+
# tumblrbot
|
|
23
|
+

|
|
24
|
+
|
|
25
|
+
Description of original project:
|
|
26
|
+
> 4tv-tumblrbot was a collaborative project I embarked on with my close friend Dima, who goes by @smoqueen on Tumblr. The aim of this endeavor was straightforward yet silly: to develop a Tumblr bot powered by a machine-learning model. This bot would be specifically trained on the content from a particular Tumblr blog or a selected set of blogs, allowing it to mimic the style, tone, and thematic essence of the original posts.
|
|
27
|
+
|
|
28
|
+
This fork is largely a rewrite of the source code with similarities in its structure and process:
|
|
29
|
+
- Updates:
|
|
30
|
+
- Updated to [OAuth 2.0].
|
|
31
|
+
- Updated to the [New Post Format].
|
|
32
|
+
- Updated to the latest version of [OpenAI].
|
|
33
|
+
- Updated the [base model version][Settings] to [gpt-4.1-nano-2025-04-14].
|
|
34
|
+
- Removed features:
|
|
35
|
+
- [Generation][Generate]:
|
|
36
|
+
- Removed clearing drafts behavior.
|
|
37
|
+
- [Training][Examples]:
|
|
38
|
+
- Removed exports that had HTML or reblogs.
|
|
39
|
+
- Removed special word-replacement behavior.
|
|
40
|
+
- Removed filtering by year.
|
|
41
|
+
- Removed setup and related files.
|
|
42
|
+
- Changed/Added features:
|
|
43
|
+
- [Generation][Generate]:
|
|
44
|
+
- Added a link to the blog's draft page.
|
|
45
|
+
- Added error checking for uploading drafts.
|
|
46
|
+
- [Training][Examples]:
|
|
47
|
+
- Added the option to [Download] the latest posts from the [specified blogs][Settings].
|
|
48
|
+
- Added the option to remove posts flagged by the [Moderation API].
|
|
49
|
+
- Added the option to automatically [Fine-Tune] the examples on the [specified base model][Settings].
|
|
50
|
+
- Changed to now escape examples automatically.
|
|
51
|
+
- Set encoding for reading post data to `UTF-8` to fix decoding errors.
|
|
52
|
+
- Added newlines between paragraphs.
|
|
53
|
+
- Removed "ALT", submission, ask, and poll text from posts.
|
|
54
|
+
- Improved the estimated token counts and costs.
|
|
55
|
+
- Changed to [Rich] for output.
|
|
56
|
+
- Added progress bars.
|
|
57
|
+
- Added post previews.
|
|
58
|
+
- Added color, formatting, and more information to output.
|
|
59
|
+
- Created a [guided utility][Main] for every step of building your bot blog.
|
|
60
|
+
- Maid scripts wait for user input before the console closes.
|
|
61
|
+
- Added comand-line options to override [Settings] options.
|
|
62
|
+
- Added behavior to regenerate the default [config.toml][Settings] and [env.toml][Settings] if missing.
|
|
63
|
+
- Renamed several files.
|
|
64
|
+
- Renamed several [Settings] options.
|
|
65
|
+
- Changed the value of several [Settings] options.
|
|
66
|
+
- Added full type-checking coverage (fully importable from third-party scripts).
|
|
67
|
+
|
|
68
|
+
To-Do:
|
|
69
|
+
- Add documentation.
|
|
70
|
+
- Finish updating [README.md].
|
|
71
|
+
- Look into places more-itertools can help.
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
**Please submit an issue or contact us for features you want to added/reimplemented.**
|
|
75
|
+
|
|
76
|
+
## Installation
|
|
77
|
+
1. Install the latest version of [Python]:
|
|
78
|
+
- Windows: `winget install python3`
|
|
79
|
+
- Linux (apt): `apt install python-pip`
|
|
80
|
+
- Linux (pacman): `pacman install python-pip`
|
|
81
|
+
1. Install the [pip] package: `pip install tumblrbot`
|
|
82
|
+
- On Linux, you will have to make a virtual environment.
|
|
83
|
+
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidThatPrograms/tumblrbot.git`
|
|
84
|
+
|
|
85
|
+
## Usage
|
|
86
|
+
If you cloned the repo, either:
|
|
87
|
+
- Double-click `__main__.py` in the root directory.
|
|
88
|
+
- Run `python __main__.py` in the root directory.
|
|
89
|
+
|
|
90
|
+
If you installed through [Pip]:
|
|
91
|
+
- Run `tumblrbot` from anywhere.
|
|
92
|
+
|
|
93
|
+
## More Information
|
|
94
|
+
- WIP
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
from openai import OpenAI
|
|
4
|
+
from rich.prompt import Confirm
|
|
5
|
+
from rich.traceback import install
|
|
6
|
+
|
|
7
|
+
from tumblrbot.flow.download import PostDownloader
|
|
8
|
+
from tumblrbot.flow.examples import ExamplesWriter
|
|
9
|
+
from tumblrbot.flow.fine_tune import FineTuner
|
|
10
|
+
from tumblrbot.flow.generate import DraftGenerator
|
|
11
|
+
from tumblrbot.utils.settings import Tokens
|
|
12
|
+
from tumblrbot.utils.tumblr import TumblrClient
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def main() -> None:
|
|
16
|
+
install()
|
|
17
|
+
tokens = Tokens()
|
|
18
|
+
with OpenAI(api_key=tokens.openai_api_key.get_secret_value()) as openai, TumblrClient(tokens) as tumblr:
|
|
19
|
+
post_downloader = PostDownloader(openai, tumblr)
|
|
20
|
+
if Confirm.ask("Download latest posts?", default=False):
|
|
21
|
+
post_downloader.download()
|
|
22
|
+
download_paths = post_downloader.get_data_paths()
|
|
23
|
+
|
|
24
|
+
examples_writer = ExamplesWriter(openai, tumblr, download_paths)
|
|
25
|
+
if Confirm.ask("Create training data?", default=False):
|
|
26
|
+
examples_writer.write_examples()
|
|
27
|
+
estimated_tokens = sum(examples_writer.count_tokens())
|
|
28
|
+
|
|
29
|
+
fine_tuner = FineTuner(openai, tumblr, estimated_tokens)
|
|
30
|
+
fine_tuner.print_estimates()
|
|
31
|
+
if Confirm.ask("Upload data to OpenAI for fine-tuning? [bold]You must do this to set the model to generate drafts from. Alternatively, manually enter a model into the config.", default=False):
|
|
32
|
+
fine_tuner.fine_tune()
|
|
33
|
+
|
|
34
|
+
if Confirm.ask("Generate drafts?", default=False):
|
|
35
|
+
DraftGenerator(openai, tumblr).create_drafts()
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
sys.exit(main())
|
|
@@ -30,16 +30,16 @@ class PostDownloader(UtilClass):
|
|
|
30
30
|
break
|
|
31
31
|
|
|
32
32
|
def get_data_path(self, blog_name: str) -> Path:
|
|
33
|
-
return (self.config.
|
|
33
|
+
return (self.config.data_directory / blog_name).with_suffix(".jsonl")
|
|
34
34
|
|
|
35
35
|
def get_data_paths(self) -> list[Path]:
|
|
36
|
-
return list(map(self.get_data_path, self.config.
|
|
36
|
+
return list(map(self.get_data_path, self.config.download_blog_identifiers))
|
|
37
37
|
|
|
38
38
|
def download(self) -> None:
|
|
39
|
-
self.config.
|
|
39
|
+
self.config.data_directory.mkdir(parents=True, exist_ok=True)
|
|
40
40
|
|
|
41
41
|
with PreviewLive() as live:
|
|
42
|
-
for blog_name in self.config.
|
|
42
|
+
for blog_name in self.config.download_blog_identifiers:
|
|
43
43
|
data_path = self.get_data_path(blog_name)
|
|
44
44
|
lines = data_path.read_text("utf_8").splitlines() if data_path.exists() else []
|
|
45
45
|
|
|
@@ -28,7 +28,7 @@ class ExamplesWriter(UtilClass):
|
|
|
28
28
|
encoding = get_encoding("o200k_base")
|
|
29
29
|
Console(stderr=True, style="logging.level.warning").print(f"[Warning] Using encoding '{encoding.name}': {''.join(error.args)}\n")
|
|
30
30
|
|
|
31
|
-
with self.config.
|
|
31
|
+
with self.config.examples_file.open(encoding="utf_8") as fp:
|
|
32
32
|
for line in fp:
|
|
33
33
|
example = Example.model_validate_json(line)
|
|
34
34
|
yield len(encoding.encode("assistant")) # every reply is primed with <|start|>assistant<|message|>
|
|
@@ -77,8 +77,8 @@ class ExamplesWriter(UtilClass):
|
|
|
77
77
|
yield from posts
|
|
78
78
|
|
|
79
79
|
def write_examples(self) -> None:
|
|
80
|
-
self.config.
|
|
81
|
-
with self.config.
|
|
80
|
+
self.config.examples_file.parent.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
82
82
|
for post in self.get_filtered_posts():
|
|
83
83
|
example = Example(
|
|
84
84
|
messages=[
|
|
@@ -89,4 +89,4 @@ class ExamplesWriter(UtilClass):
|
|
|
89
89
|
)
|
|
90
90
|
fp.write(f"{example.model_dump_json()}\n")
|
|
91
91
|
|
|
92
|
-
rich.print(f"[bold]The examples file can be found at: '{self.config.
|
|
92
|
+
rich.print(f"[bold]The examples file can be found at: '{self.config.examples_file}'\n")
|
|
@@ -24,22 +24,19 @@ class FineTuner(UtilClass):
|
|
|
24
24
|
Cost: {self.get_cost_string(job.trained_tokens)}
|
|
25
25
|
""")
|
|
26
26
|
|
|
27
|
-
self.config.
|
|
28
|
-
self.config.model_post_init()
|
|
27
|
+
self.config.job_id = ""
|
|
29
28
|
|
|
30
29
|
if job.status == "failed" and job.error is not None:
|
|
31
30
|
raise RuntimeError(job.error.message)
|
|
32
31
|
|
|
33
32
|
if job.fine_tuned_model:
|
|
34
|
-
self.config.
|
|
35
|
-
self.config.model_post_init()
|
|
33
|
+
self.config.fine_tuned_model = job.fine_tuned_model or ""
|
|
36
34
|
|
|
37
35
|
def poll_job_status(self) -> FineTuningJob:
|
|
38
|
-
job = self.openai.fine_tuning.jobs.retrieve(self.config.
|
|
36
|
+
job = self.openai.fine_tuning.jobs.retrieve(self.config.job_id)
|
|
39
37
|
|
|
40
|
-
if self.config.
|
|
41
|
-
self.config.
|
|
42
|
-
self.config.model_post_init()
|
|
38
|
+
if self.config.expected_epochs != job.hyperparameters.n_epochs and isinstance(job.hyperparameters.n_epochs, int):
|
|
39
|
+
self.config.expected_epochs = job.hyperparameters.n_epochs
|
|
43
40
|
|
|
44
41
|
self.dedent_print(f"""
|
|
45
42
|
The number of epochs has been updated to {job.hyperparameters.n_epochs}!
|
|
@@ -50,11 +47,11 @@ class FineTuner(UtilClass):
|
|
|
50
47
|
return job
|
|
51
48
|
|
|
52
49
|
def create_job(self) -> FineTuningJob:
|
|
53
|
-
if self.config.
|
|
50
|
+
if self.config.job_id:
|
|
54
51
|
return self.poll_job_status()
|
|
55
52
|
|
|
56
53
|
file = self.openai.files.create(
|
|
57
|
-
file=self.config.
|
|
54
|
+
file=self.config.examples_file,
|
|
58
55
|
purpose="fine-tune",
|
|
59
56
|
)
|
|
60
57
|
job = self.openai.fine_tuning.jobs.create(
|
|
@@ -62,8 +59,7 @@ class FineTuner(UtilClass):
|
|
|
62
59
|
training_file=file.id,
|
|
63
60
|
)
|
|
64
61
|
|
|
65
|
-
self.config.
|
|
66
|
-
self.config.model_post_init()
|
|
62
|
+
self.config.job_id = job.id
|
|
67
63
|
return job
|
|
68
64
|
|
|
69
65
|
def fine_tune(self) -> None:
|
|
@@ -94,15 +90,15 @@ class FineTuner(UtilClass):
|
|
|
94
90
|
self.process_completed_job(job)
|
|
95
91
|
|
|
96
92
|
def get_cost_string(self, total_tokens: int) -> str:
|
|
97
|
-
return f"${self.config.
|
|
93
|
+
return f"${self.config.token_price / 1000000 * total_tokens:.2f}"
|
|
98
94
|
|
|
99
95
|
def print_estimates(self) -> None:
|
|
100
|
-
total_tokens = self.config.
|
|
96
|
+
total_tokens = self.config.expected_epochs * self.estimated_tokens
|
|
101
97
|
cost_string = self.get_cost_string(total_tokens)
|
|
102
98
|
|
|
103
99
|
self.dedent_print(f"""
|
|
104
100
|
Tokens {self.estimated_tokens:,}:
|
|
105
|
-
Total tokens for [bold orange1]{self.config.
|
|
101
|
+
Total tokens for [bold orange1]{self.config.expected_epochs}[/] epoch(s): {total_tokens:,}
|
|
106
102
|
Expected cost when trained with [bold purple]{self.config.base_model}[/]: {cost_string}
|
|
107
103
|
NOTE: Token values are approximate and may not be 100% accurate, please be aware of this when using the data.
|
|
108
104
|
[italic red]Neither Amelia nor Mutsumi are responsible for any inaccuracies in the token count or estimated price.[/]
|
|
@@ -8,7 +8,7 @@ from tumblrbot.utils.models import Post
|
|
|
8
8
|
|
|
9
9
|
class DraftGenerator(UtilClass):
|
|
10
10
|
def generate_tags(self, content: Post.Block) -> Post | None:
|
|
11
|
-
if random() < self.config.
|
|
11
|
+
if random() < self.config.tags_chance: # noqa: S311
|
|
12
12
|
return self.openai.responses.parse(
|
|
13
13
|
input=content.text,
|
|
14
14
|
model=self.config.base_model,
|
|
@@ -23,7 +23,7 @@ class DraftGenerator(UtilClass):
|
|
|
23
23
|
content = self.openai.responses.create(
|
|
24
24
|
input=self.config.user_input,
|
|
25
25
|
instructions=self.config.developer_message,
|
|
26
|
-
model=self.config.
|
|
26
|
+
model=self.config.fine_tuned_model,
|
|
27
27
|
).output_text
|
|
28
28
|
|
|
29
29
|
return Post.Block(type="text", text=content)
|
|
@@ -36,16 +36,16 @@ class DraftGenerator(UtilClass):
|
|
|
36
36
|
return post
|
|
37
37
|
|
|
38
38
|
def create_drafts(self) -> None:
|
|
39
|
-
message = f"View drafts here: https://tumblr.com/blog/{self.config.
|
|
39
|
+
message = f"View drafts here: https://tumblr.com/blog/{self.config.upload_blog_identifier}/drafts"
|
|
40
40
|
|
|
41
41
|
with PreviewLive() as live:
|
|
42
|
-
for i in live.progress.track(range(self.config.
|
|
42
|
+
for i in live.progress.track(range(self.config.draft_count), description="Generating drafts..."):
|
|
43
43
|
try:
|
|
44
44
|
post = self.generate_post()
|
|
45
|
-
self.tumblr.create_draft_post(self.config.
|
|
45
|
+
self.tumblr.create_draft_post(self.config.upload_blog_identifier, post)
|
|
46
46
|
live.custom_update(post)
|
|
47
47
|
except BaseException as exc:
|
|
48
48
|
exc.add_note(f"📉 An error occurred! Generated {i} draft(s) before failing. {message}")
|
|
49
49
|
raise
|
|
50
50
|
|
|
51
|
-
rich.print(f":chart_increasing: [bold green]Generated {self.config.
|
|
51
|
+
rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
|
|
@@ -22,7 +22,7 @@ class Post(FullyValidatedModel):
|
|
|
22
22
|
text: str = ""
|
|
23
23
|
blocks: set[int] = set() # noqa: RUF012
|
|
24
24
|
|
|
25
|
-
tags: Annotated[
|
|
25
|
+
tags: Annotated[list[str], PlainSerializer(",".join)] = [] # noqa: RUF012
|
|
26
26
|
content: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
|
|
27
27
|
layout: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
|
|
28
28
|
trail: SkipJsonSchema[list[Any]] = [] # noqa: RUF012
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from collections.abc import Generator, Sequence
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Self, override
|
|
4
|
+
|
|
5
|
+
import rich
|
|
6
|
+
from openai.types import ChatModel
|
|
7
|
+
from pydantic import Field, PositiveFloat, PositiveInt, Secret, model_validator
|
|
8
|
+
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
|
|
9
|
+
from rich.prompt import Prompt
|
|
10
|
+
from tomlkit import comment, document, dumps
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from _typeshed import StrPath
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class TOMLSettings(BaseSettings):
|
|
17
|
+
model_config = SettingsConfigDict(
|
|
18
|
+
extra="ignore",
|
|
19
|
+
validate_assignment=True,
|
|
20
|
+
validate_return=True,
|
|
21
|
+
validate_by_name=True,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
@override
|
|
25
|
+
@classmethod
|
|
26
|
+
def settings_customise_sources(cls, settings_cls: type[BaseSettings], *args: PydanticBaseSettingsSource, **kwargs: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]:
|
|
27
|
+
return (TomlConfigSettingsSource(settings_cls),)
|
|
28
|
+
|
|
29
|
+
@model_validator(mode="after")
|
|
30
|
+
def write_to_file(self) -> Self:
|
|
31
|
+
# Make sure to call this if updating values in nested models.
|
|
32
|
+
toml_files = self.model_config.get("toml_file")
|
|
33
|
+
if isinstance(toml_files, (Path, str)):
|
|
34
|
+
self.dump_toml(toml_files)
|
|
35
|
+
elif isinstance(toml_files, Sequence):
|
|
36
|
+
for toml_file in toml_files:
|
|
37
|
+
self.dump_toml(toml_file)
|
|
38
|
+
|
|
39
|
+
return self
|
|
40
|
+
|
|
41
|
+
def dump_toml(self, toml_file: "StrPath") -> None:
|
|
42
|
+
toml_table = document()
|
|
43
|
+
|
|
44
|
+
dumped_model = self.model_dump(mode="json")
|
|
45
|
+
for name, field in self.__class__.model_fields.items():
|
|
46
|
+
if field.description:
|
|
47
|
+
for line in field.description.split(". "):
|
|
48
|
+
toml_table.add(comment(f"{line.removesuffix('.')}."))
|
|
49
|
+
|
|
50
|
+
value = getattr(self, name)
|
|
51
|
+
toml_table[name] = value.get_secret_value() if isinstance(value, Secret) else dumped_model[name]
|
|
52
|
+
|
|
53
|
+
Path(toml_file).write_text(
|
|
54
|
+
dumps(toml_table),
|
|
55
|
+
encoding="utf_8",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class Config(TOMLSettings):
|
|
60
|
+
model_config = SettingsConfigDict(
|
|
61
|
+
cli_parse_args=True,
|
|
62
|
+
cli_avoid_json=True,
|
|
63
|
+
cli_kebab_case=True,
|
|
64
|
+
toml_file="config.toml",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
|
|
68
|
+
upload_blog_identifier: str = Field(
|
|
69
|
+
"",
|
|
70
|
+
description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.",
|
|
71
|
+
)
|
|
72
|
+
draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
|
|
73
|
+
tags_chance: float = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
|
|
74
|
+
|
|
75
|
+
download_blog_identifiers: list[str] = Field(
|
|
76
|
+
[],
|
|
77
|
+
description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.",
|
|
78
|
+
)
|
|
79
|
+
data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
|
|
80
|
+
examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
|
|
81
|
+
job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
|
|
82
|
+
expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
|
|
83
|
+
token_price: PositiveFloat = Field(1.50, description="The expected price in USD per million tokens during fine-tuning for the current model.")
|
|
84
|
+
|
|
85
|
+
base_model: ChatModel = Field("gpt-4.1-nano-2025-04-14", description="The name of the model that will be fine-tuned by the generated training data.")
|
|
86
|
+
developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
|
|
87
|
+
user_input: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
|
|
88
|
+
|
|
89
|
+
@override
|
|
90
|
+
def model_post_init(self, context: object) -> None:
|
|
91
|
+
super().model_post_init(context)
|
|
92
|
+
|
|
93
|
+
if not self.download_blog_identifiers:
|
|
94
|
+
rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
|
|
95
|
+
self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold]Example: staff.tumblr.com,changes").split(",")))
|
|
96
|
+
|
|
97
|
+
if not self.upload_blog_identifier:
|
|
98
|
+
rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
|
|
99
|
+
self.upload_blog_identifier = Prompt.ask("[bold]Examples: staff.tumblr.com or changes").strip()
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class Tokens(TOMLSettings):
|
|
103
|
+
model_config = SettingsConfigDict(toml_file="env.toml")
|
|
104
|
+
|
|
105
|
+
openai_api_key: Secret[str] = Secret("")
|
|
106
|
+
|
|
107
|
+
tumblr_client_id: Secret[str] = Secret("")
|
|
108
|
+
tumblr_client_secret: Secret[str] = Secret("")
|
|
109
|
+
tumblr_token: Secret[Any] = Secret({})
|
|
110
|
+
|
|
111
|
+
@staticmethod
|
|
112
|
+
def online_token_prompt(url: str, *tokens: str) -> Generator[Secret[str]]:
|
|
113
|
+
formatted_tokens = [f"[cyan]{token}[/]" for token in tokens]
|
|
114
|
+
formatted_token_string = " and ".join(formatted_tokens)
|
|
115
|
+
|
|
116
|
+
rich.print(f"Retrieve your {formatted_token_string} from: {url}")
|
|
117
|
+
for token in formatted_tokens:
|
|
118
|
+
prompt = f"Enter your {token} [yellow](hidden)"
|
|
119
|
+
yield Secret(Prompt.ask(prompt, password=True).strip())
|
|
120
|
+
|
|
121
|
+
rich.print()
|
|
122
|
+
|
|
123
|
+
@override
|
|
124
|
+
def model_post_init(self, context: object) -> None:
|
|
125
|
+
super().model_post_init(context)
|
|
126
|
+
|
|
127
|
+
if not self.openai_api_key.get_secret_value():
|
|
128
|
+
(self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
|
|
129
|
+
|
|
130
|
+
if not (self.tumblr_client_id.get_secret_value() and self.tumblr_client_secret.get_secret_value()):
|
|
131
|
+
self.tumblr_client_id, self.tumblr_client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
|
|
@@ -17,11 +17,15 @@ class TumblrClient(OAuth2Session):
|
|
|
17
17
|
|
|
18
18
|
def __post_init__(self) -> None:
|
|
19
19
|
super().__init__(
|
|
20
|
-
self.tokens.
|
|
20
|
+
self.tokens.tumblr_client_id.get_secret_value(),
|
|
21
21
|
auto_refresh_url="https://api.tumblr.com/v2/oauth2/token",
|
|
22
|
-
auto_refresh_kwargs=
|
|
22
|
+
auto_refresh_kwargs={
|
|
23
|
+
"client_id": self.tokens.tumblr_client_id.get_secret_value(),
|
|
24
|
+
"client_secret": self.tokens.tumblr_client_secret.get_secret_value(),
|
|
25
|
+
"token": self.tokens.tumblr_token.get_secret_value(),
|
|
26
|
+
},
|
|
23
27
|
scope=["basic", "write", "offline_access"],
|
|
24
|
-
token=self.tokens.
|
|
28
|
+
token=self.tokens.tumblr_token.get_secret_value(),
|
|
25
29
|
token_updater=self.token_saver,
|
|
26
30
|
)
|
|
27
31
|
|
|
@@ -30,7 +34,7 @@ class TumblrClient(OAuth2Session):
|
|
|
30
34
|
def __enter__(self) -> Self:
|
|
31
35
|
super().__enter__()
|
|
32
36
|
|
|
33
|
-
if not self.tokens.
|
|
37
|
+
if not self.tokens.tumblr_token.get_secret_value():
|
|
34
38
|
authorization_url, _ = self.authorization_url("https://tumblr.com/oauth2/authorize")
|
|
35
39
|
|
|
36
40
|
rich.print(f"Please go to {authorization_url} and authorize access.")
|
|
@@ -41,15 +45,14 @@ class TumblrClient(OAuth2Session):
|
|
|
41
45
|
self.fetch_token(
|
|
42
46
|
"https://api.tumblr.com/v2/oauth2/token",
|
|
43
47
|
authorization_response=authorization_response,
|
|
44
|
-
client_secret=self.tokens.
|
|
48
|
+
client_secret=self.tokens.tumblr_client_secret.get_secret_value(),
|
|
45
49
|
),
|
|
46
50
|
)
|
|
47
51
|
|
|
48
52
|
return self
|
|
49
53
|
|
|
50
54
|
def token_saver(self, token: object) -> None:
|
|
51
|
-
self.tokens.
|
|
52
|
-
self.tokens.model_post_init()
|
|
55
|
+
self.tokens.tumblr_token = Secret(token)
|
|
53
56
|
|
|
54
57
|
def response_hook(self, response: Response, **_: object) -> None:
|
|
55
58
|
try:
|