tumblrbot 1.3.0__tar.gz → 1.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/PKG-INFO +7 -4
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/README.md +5 -3
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/pyproject.toml +2 -1
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/flow/examples.py +4 -6
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/flow/fine_tune.py +33 -18
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/flow/generate.py +2 -2
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/utils/config.py +4 -4
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/utils/models.py +4 -12
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/.github/dependabot.yml +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/.gitignore +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/UNLICENSE +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/__init__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/__main__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/flow/__init__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/flow/download.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/utils/__init__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/utils/common.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.2}/src/tumblrbot/utils/tumblr.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tumblrbot
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.2
|
|
4
4
|
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
5
|
Requires-Python: >= 3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -12,6 +12,7 @@ Requires-Dist: pydantic-settings
|
|
|
12
12
|
Requires-Dist: requests
|
|
13
13
|
Requires-Dist: requests-oauthlib
|
|
14
14
|
Requires-Dist: rich
|
|
15
|
+
Requires-Dist: tenacity
|
|
15
16
|
Requires-Dist: tiktoken
|
|
16
17
|
Requires-Dist: tomlkit
|
|
17
18
|
Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
|
|
@@ -57,7 +58,6 @@ Features:
|
|
|
57
58
|
- Filters out posts that contain more than just text data.
|
|
58
59
|
- Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
|
|
59
60
|
- Shows progress and previews the current post.
|
|
60
|
-
- Formats asks as the user message and the responses as the assistant response.
|
|
61
61
|
- Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
|
|
62
62
|
1. Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
|
|
63
63
|
1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
|
|
@@ -90,11 +90,14 @@ Features:
|
|
|
90
90
|
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config](#configuration).
|
|
91
91
|
|
|
92
92
|
## Obtaining Tokens
|
|
93
|
-
|
|
93
|
+
### OpenAI
|
|
94
|
+
API token can be created [here](https://platform.openai.com/settings/organization/api-keys).
|
|
94
95
|
1. Leave everything at the defaults and set `Project` to `Default Project`.
|
|
95
96
|
1. Press `Create secret key`.
|
|
96
97
|
1. Press `Copy` to copy the API token to your clipboard.
|
|
97
|
-
|
|
98
|
+
|
|
99
|
+
### Tumblr
|
|
100
|
+
API tokens can be created [here](https://tumblr.com/oauth/apps).
|
|
98
101
|
1. Press `+ Register Application`.
|
|
99
102
|
1. Enter anything for `Application Name` and `Application Description`.
|
|
100
103
|
1. Enter any URL for `Application Website` and `Default callback URL`, like `https://example.com`.
|
|
@@ -39,7 +39,6 @@ Features:
|
|
|
39
39
|
- Filters out posts that contain more than just text data.
|
|
40
40
|
- Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
|
|
41
41
|
- Shows progress and previews the current post.
|
|
42
|
-
- Formats asks as the user message and the responses as the assistant response.
|
|
43
42
|
- Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
|
|
44
43
|
1. Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
|
|
45
44
|
1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
|
|
@@ -72,11 +71,14 @@ Features:
|
|
|
72
71
|
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config](#configuration).
|
|
73
72
|
|
|
74
73
|
## Obtaining Tokens
|
|
75
|
-
|
|
74
|
+
### OpenAI
|
|
75
|
+
API token can be created [here](https://platform.openai.com/settings/organization/api-keys).
|
|
76
76
|
1. Leave everything at the defaults and set `Project` to `Default Project`.
|
|
77
77
|
1. Press `Create secret key`.
|
|
78
78
|
1. Press `Copy` to copy the API token to your clipboard.
|
|
79
|
-
|
|
79
|
+
|
|
80
|
+
### Tumblr
|
|
81
|
+
API tokens can be created [here](https://tumblr.com/oauth/apps).
|
|
80
82
|
1. Press `+ Register Application`.
|
|
81
83
|
1. Enter anything for `Application Name` and `Application Description`.
|
|
82
84
|
1. Enter any URL for `Application Website` and `Default callback URL`, like `https://example.com`.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tumblrbot"
|
|
3
|
-
version = "1.3.
|
|
3
|
+
version = "1.3.2"
|
|
4
4
|
description = "An updated bot that posts to Tumblr, based on your very own blog!"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">= 3.13"
|
|
@@ -13,6 +13,7 @@ dependencies = [
|
|
|
13
13
|
"requests",
|
|
14
14
|
"requests-oauthlib",
|
|
15
15
|
"rich",
|
|
16
|
+
"tenacity",
|
|
16
17
|
"tiktoken",
|
|
17
18
|
"tomlkit",
|
|
18
19
|
]
|
|
@@ -53,7 +53,7 @@ class ExamplesWriter(FlowClass):
|
|
|
53
53
|
with data_path.open(encoding="utf_8") as fp:
|
|
54
54
|
for line in fp:
|
|
55
55
|
post = Post.model_validate_json(line)
|
|
56
|
-
if not (post.is_submission or post.trail) and post.only_text_blocks() and post.
|
|
56
|
+
if not (post.is_submission or post.trail) and post.only_text_blocks() and post.get_content_text():
|
|
57
57
|
yield post
|
|
58
58
|
|
|
59
59
|
def get_filtered_posts(self) -> Generator[Post]:
|
|
@@ -68,7 +68,7 @@ class ExamplesWriter(FlowClass):
|
|
|
68
68
|
ceil(len(posts) / chunk_size),
|
|
69
69
|
description="Removing flagged posts...",
|
|
70
70
|
):
|
|
71
|
-
response = self.openai.moderations.create(input=
|
|
71
|
+
response = self.openai.moderations.create(input=list(map(Post.get_content_text, chunk)))
|
|
72
72
|
for post, moderation in zip(chunk, response.results, strict=True):
|
|
73
73
|
if moderation.flagged:
|
|
74
74
|
removed += 1
|
|
@@ -89,11 +89,9 @@ class ExamplesWriter(FlowClass):
|
|
|
89
89
|
|
|
90
90
|
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
91
91
|
for post in self.get_filtered_posts():
|
|
92
|
-
ask_content, response_content = post.get_text_content()
|
|
93
|
-
|
|
94
92
|
self.write_example(
|
|
95
|
-
|
|
96
|
-
|
|
93
|
+
self.config.user_message,
|
|
94
|
+
post.get_content_text(),
|
|
97
95
|
fp,
|
|
98
96
|
)
|
|
99
97
|
|
|
@@ -4,7 +4,10 @@ from textwrap import dedent
|
|
|
4
4
|
from time import sleep
|
|
5
5
|
|
|
6
6
|
import rich
|
|
7
|
+
from openai import BadRequestError
|
|
8
|
+
from openai.types import FileObject
|
|
7
9
|
from openai.types.fine_tuning import FineTuningJob
|
|
10
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed, wait_random
|
|
8
11
|
|
|
9
12
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
10
13
|
|
|
@@ -46,35 +49,46 @@ class FineTuner(FlowClass):
|
|
|
46
49
|
|
|
47
50
|
return job
|
|
48
51
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
job = self.openai.fine_tuning.jobs.create(
|
|
52
|
+
@retry(
|
|
53
|
+
stop=stop_after_attempt(5),
|
|
54
|
+
wait=wait_fixed(1.5) + wait_random(),
|
|
55
|
+
retry=retry_if_exception_type(BadRequestError),
|
|
56
|
+
reraise=True,
|
|
57
|
+
)
|
|
58
|
+
def attempt_submit_job(self, file: FileObject) -> FineTuningJob:
|
|
59
|
+
return self.openai.fine_tuning.jobs.create(
|
|
58
60
|
model=self.config.base_model,
|
|
59
61
|
training_file=file.id,
|
|
60
62
|
)
|
|
61
63
|
|
|
64
|
+
def create_job(self, live: PreviewLive) -> FineTuningJob:
|
|
65
|
+
if self.config.job_id:
|
|
66
|
+
return self.poll_job_status()
|
|
67
|
+
|
|
68
|
+
with live.progress.open(self.config.examples_file, "rb", description=f"Uploading {self.config.examples_file}...") as fp:
|
|
69
|
+
file = self.openai.files.create(
|
|
70
|
+
file=fp,
|
|
71
|
+
purpose="fine-tune",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
job = self.attempt_submit_job(file)
|
|
75
|
+
|
|
62
76
|
self.config.job_id = job.id
|
|
63
77
|
return job
|
|
64
78
|
|
|
65
79
|
def fine_tune(self) -> None:
|
|
66
|
-
|
|
80
|
+
with PreviewLive() as live:
|
|
81
|
+
job = self.create_job(live)
|
|
67
82
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
83
|
+
self.dedent_print(f"""
|
|
84
|
+
[bold]Fine-tuning is starting...[/]
|
|
85
|
+
View it online at: https://platform.openai.com/finetune/{job.id}
|
|
86
|
+
Created at: {datetime.fromtimestamp(job.created_at)}
|
|
87
|
+
Base Model: {job.model}
|
|
73
88
|
|
|
74
|
-
|
|
75
|
-
|
|
89
|
+
[italic dim]Closing this terminal will not stop the fine-tuning. This will take a while...
|
|
90
|
+
""") # noqa: DTZ006
|
|
76
91
|
|
|
77
|
-
with PreviewLive() as live:
|
|
78
92
|
task_id = live.progress.add_task("", total=None)
|
|
79
93
|
|
|
80
94
|
while job.status not in {"succeeded", "failed", "cancelled"}:
|
|
@@ -82,6 +96,7 @@ class FineTuner(FlowClass):
|
|
|
82
96
|
|
|
83
97
|
live.progress.update(
|
|
84
98
|
task_id,
|
|
99
|
+
total=job.estimated_finish,
|
|
85
100
|
description=f"Fine-tuning: [italic]{job.status.replace('_', ' ').title()}[/]...",
|
|
86
101
|
)
|
|
87
102
|
|
|
@@ -10,10 +10,10 @@ class DraftGenerator(FlowClass):
|
|
|
10
10
|
def generate_tags(self, content: Post.Block) -> Post | None:
|
|
11
11
|
if random() < self.config.tags_chance: # noqa: S311
|
|
12
12
|
return self.openai.responses.parse(
|
|
13
|
-
input=content.text,
|
|
13
|
+
input=f"Extract the most important subjects from the following text:\n\n{content.text}",
|
|
14
14
|
model=self.config.base_model,
|
|
15
15
|
text_format=Post,
|
|
16
|
-
instructions="You are an advanced text summarization tool.
|
|
16
|
+
instructions="You are an advanced text summarization tool. You return the requested data to the user as a list of comma-separated strings.",
|
|
17
17
|
temperature=0.5,
|
|
18
18
|
).output_parsed
|
|
19
19
|
|
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Self, override
|
|
|
5
5
|
import rich
|
|
6
6
|
import tomlkit
|
|
7
7
|
from openai.types import ChatModel
|
|
8
|
-
from pydantic import Field, PositiveFloat, PositiveInt, Secret, model_validator
|
|
8
|
+
from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt, Secret, model_validator
|
|
9
9
|
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
|
|
10
10
|
from rich.prompt import Prompt
|
|
11
11
|
from tomlkit import comment, document
|
|
@@ -32,7 +32,7 @@ class Config(BaseSettings):
|
|
|
32
32
|
description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.",
|
|
33
33
|
)
|
|
34
34
|
draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
|
|
35
|
-
tags_chance:
|
|
35
|
+
tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
|
|
36
36
|
|
|
37
37
|
download_blog_identifiers: list[str] = Field(
|
|
38
38
|
[],
|
|
@@ -43,9 +43,9 @@ class Config(BaseSettings):
|
|
|
43
43
|
examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
|
|
44
44
|
job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
|
|
45
45
|
expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
|
|
46
|
-
token_price: PositiveFloat = Field(
|
|
46
|
+
token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
|
|
47
47
|
|
|
48
|
-
base_model: ChatModel = Field("gpt-
|
|
48
|
+
base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
|
|
49
49
|
developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
|
|
50
50
|
user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
|
|
51
51
|
|
|
@@ -102,24 +102,16 @@ class Post(FullyValidatedModel):
|
|
|
102
102
|
|
|
103
103
|
def __rich__(self) -> Panel:
|
|
104
104
|
return Panel(
|
|
105
|
-
self.
|
|
105
|
+
self.get_content_text(),
|
|
106
106
|
title="Preview",
|
|
107
107
|
subtitle=" ".join(f"#{tag}" for tag in self.tags),
|
|
108
108
|
subtitle_align="left",
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
super().model_post_init(context)
|
|
114
|
-
|
|
115
|
-
indices: set[int] = set()
|
|
116
|
-
for block in self.layout:
|
|
117
|
-
if block.type == "ask":
|
|
118
|
-
indices.update(block.blocks)
|
|
119
|
-
|
|
120
|
-
self.content = [block for i, block in enumerate(self.content) if i not in indices and block.type == "text"]
|
|
111
|
+
def only_text_blocks(self) -> bool:
|
|
112
|
+
return all(block.type == "text" for block in self.content) and not any(block.type == "ask" for block in self.layout)
|
|
121
113
|
|
|
122
|
-
def
|
|
114
|
+
def get_content_text(self) -> str:
|
|
123
115
|
return "\n\n".join(block.text for block in self.content)
|
|
124
116
|
|
|
125
117
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|