tumblrbot 1.4.6__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tumblrbot/__main__.py +6 -2
- tumblrbot/flow/download.py +13 -13
- tumblrbot/flow/examples.py +17 -23
- tumblrbot/flow/fine_tune.py +2 -2
- tumblrbot/flow/generate.py +13 -12
- tumblrbot/utils/common.py +3 -3
- tumblrbot/utils/models.py +112 -18
- tumblrbot/utils/tumblr.py +8 -7
- {tumblrbot-1.4.6.dist-info → tumblrbot-1.5.0.dist-info}/METADATA +7 -8
- tumblrbot-1.5.0.dist-info/RECORD +15 -0
- tumblrbot/utils/config.py +0 -97
- tumblrbot-1.4.6.dist-info/RECORD +0 -16
- {tumblrbot-1.4.6.dist-info → tumblrbot-1.5.0.dist-info}/WHEEL +0 -0
- {tumblrbot-1.4.6.dist-info → tumblrbot-1.5.0.dist-info}/entry_points.txt +0 -0
tumblrbot/__main__.py
CHANGED
|
@@ -14,13 +14,17 @@ from tumblrbot.utils.tumblr import TumblrSession
|
|
|
14
14
|
def main() -> None:
|
|
15
15
|
install()
|
|
16
16
|
|
|
17
|
-
tokens = Tokens.
|
|
17
|
+
tokens = Tokens.load()
|
|
18
18
|
with OpenAI(api_key=tokens.openai_api_key) as openai, TumblrSession(tokens) as tumblr:
|
|
19
19
|
if Confirm.ask("Download latest posts?", default=False):
|
|
20
20
|
PostDownloader(openai=openai, tumblr=tumblr).main()
|
|
21
21
|
|
|
22
|
+
examples_writer = ExamplesWriter(openai=openai, tumblr=tumblr)
|
|
22
23
|
if Confirm.ask("Create training data?", default=False):
|
|
23
|
-
|
|
24
|
+
examples_writer.main()
|
|
25
|
+
|
|
26
|
+
if Confirm.ask("Remove training data flagged by the OpenAI moderation? [bold]This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
|
|
27
|
+
examples_writer.filter_examples()
|
|
24
28
|
|
|
25
29
|
fine_tuner = FineTuner(openai=openai, tumblr=tumblr)
|
|
26
30
|
fine_tuner.print_estimates()
|
tumblrbot/flow/download.py
CHANGED
|
@@ -18,7 +18,7 @@ class PostDownloader(FlowClass):
|
|
|
18
18
|
completed = 0
|
|
19
19
|
after = 0
|
|
20
20
|
if data_path.exists():
|
|
21
|
-
lines = data_path.
|
|
21
|
+
lines = data_path.read_bytes().splitlines() if data_path.exists() else []
|
|
22
22
|
completed = len(lines)
|
|
23
23
|
if lines:
|
|
24
24
|
after = Post.model_validate_json(lines[-1]).timestamp
|
|
@@ -36,18 +36,18 @@ class PostDownloader(FlowClass):
|
|
|
36
36
|
task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=completed)
|
|
37
37
|
|
|
38
38
|
while True:
|
|
39
|
-
response = self.tumblr.retrieve_published_posts(blog_identifier, after=after)
|
|
40
|
-
live.progress.update(task_id, total=response
|
|
39
|
+
response = self.tumblr.retrieve_published_posts(blog_identifier, after=after)
|
|
40
|
+
live.progress.update(task_id, total=response.response.blog.posts, completed=completed)
|
|
41
41
|
|
|
42
|
-
if
|
|
43
|
-
|
|
44
|
-
dump(post, fp)
|
|
45
|
-
fp.write("\n")
|
|
42
|
+
if not response.response.posts:
|
|
43
|
+
return
|
|
46
44
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
45
|
+
for post in response.response.posts:
|
|
46
|
+
dump(post, fp)
|
|
47
|
+
fp.write("\n")
|
|
50
48
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
model = Post.model_validate(post)
|
|
50
|
+
after = model.timestamp
|
|
51
|
+
live.custom_update(model)
|
|
52
|
+
|
|
53
|
+
completed += len(response.response.posts)
|
tumblrbot/flow/examples.py
CHANGED
|
@@ -7,7 +7,6 @@ from typing import IO, override
|
|
|
7
7
|
|
|
8
8
|
import rich
|
|
9
9
|
from openai import BadRequestError
|
|
10
|
-
from rich.prompt import Confirm
|
|
11
10
|
|
|
12
11
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
13
12
|
from tumblrbot.utils.models import Example, Post
|
|
@@ -26,7 +25,7 @@ class ExamplesWriter(FlowClass):
|
|
|
26
25
|
fp,
|
|
27
26
|
)
|
|
28
27
|
|
|
29
|
-
for post in self.
|
|
28
|
+
for post in self.get_valid_posts():
|
|
30
29
|
self.write_example(
|
|
31
30
|
self.config.user_message,
|
|
32
31
|
post.get_content_text(),
|
|
@@ -49,43 +48,38 @@ class ExamplesWriter(FlowClass):
|
|
|
49
48
|
self.config.custom_prompts_file.parent.mkdir(parents=True, exist_ok=True)
|
|
50
49
|
self.config.custom_prompts_file.touch(exist_ok=True)
|
|
51
50
|
|
|
52
|
-
with self.config.custom_prompts_file.open("
|
|
51
|
+
with self.config.custom_prompts_file.open("rb") as fp:
|
|
53
52
|
for line in fp:
|
|
54
53
|
data: dict[str, str] = loads(line)
|
|
55
54
|
yield from data.items()
|
|
56
55
|
|
|
57
|
-
def
|
|
58
|
-
|
|
56
|
+
def get_valid_posts(self) -> Generator[Post]:
|
|
57
|
+
for data_path in self.get_data_paths():
|
|
58
|
+
with data_path.open("rb") as fp:
|
|
59
|
+
for line in fp:
|
|
60
|
+
post = Post.model_validate_json(line)
|
|
61
|
+
if post.valid_text_post():
|
|
62
|
+
yield post
|
|
59
63
|
|
|
60
|
-
|
|
64
|
+
def filter_examples(self) -> None:
|
|
65
|
+
examples = self.config.examples_file.read_text("utf_8").splitlines()
|
|
66
|
+
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
61
67
|
batch_size = self.get_moderation_batch_size()
|
|
62
|
-
posts = list(posts)
|
|
63
68
|
removed = 0
|
|
64
69
|
|
|
65
70
|
with PreviewLive() as live:
|
|
66
71
|
for batch in live.progress.track(
|
|
67
|
-
batched(
|
|
68
|
-
ceil(len(
|
|
72
|
+
batched(examples, batch_size, strict=False),
|
|
73
|
+
ceil(len(examples) / batch_size),
|
|
69
74
|
description="Removing flagged posts...",
|
|
70
75
|
):
|
|
71
|
-
response = self.openai.moderations.create(input=list(
|
|
72
|
-
for
|
|
76
|
+
response = self.openai.moderations.create(input=list(batch))
|
|
77
|
+
for example, moderation in zip(batch, response.results, strict=True):
|
|
73
78
|
if moderation.flagged:
|
|
74
79
|
removed += 1
|
|
75
|
-
live.custom_update(post)
|
|
76
80
|
else:
|
|
77
|
-
|
|
81
|
+
fp.write(f"{example}\n")
|
|
78
82
|
rich.print(f"[red]Removed {removed} posts.\n")
|
|
79
|
-
else:
|
|
80
|
-
yield from posts
|
|
81
|
-
|
|
82
|
-
def get_valid_posts(self) -> Generator[Post]:
|
|
83
|
-
for data_path in self.get_data_paths():
|
|
84
|
-
with data_path.open(encoding="utf_8") as fp:
|
|
85
|
-
for line in fp:
|
|
86
|
-
post = Post.model_validate_json(line)
|
|
87
|
-
if post.valid_text_post():
|
|
88
|
-
yield post
|
|
89
83
|
|
|
90
84
|
def get_moderation_batch_size(self) -> int:
|
|
91
85
|
try:
|
tumblrbot/flow/fine_tune.py
CHANGED
|
@@ -98,8 +98,8 @@ class FineTuner(FlowClass):
|
|
|
98
98
|
if job.status == "failed" and job.error is not None:
|
|
99
99
|
raise RuntimeError(job.error.message)
|
|
100
100
|
|
|
101
|
-
if job.fine_tuned_model:
|
|
102
|
-
self.config.fine_tuned_model = job.fine_tuned_model
|
|
101
|
+
if job.fine_tuned_model is not None:
|
|
102
|
+
self.config.fine_tuned_model = job.fine_tuned_model
|
|
103
103
|
|
|
104
104
|
def print_estimates(self) -> None:
|
|
105
105
|
estimated_tokens = sum(self.count_tokens())
|
tumblrbot/flow/generate.py
CHANGED
|
@@ -28,26 +28,27 @@ class DraftGenerator(FlowClass):
|
|
|
28
28
|
rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
|
|
29
29
|
|
|
30
30
|
def generate_post(self) -> Post:
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
31
|
+
text = self.generate_text()
|
|
32
|
+
if tags := self.generate_tags(text):
|
|
33
|
+
tags = tags.tags
|
|
34
|
+
return Post(
|
|
35
|
+
content=[Post.Block(type="text", text=text)],
|
|
36
|
+
tags=tags or [],
|
|
37
|
+
state="draft",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def generate_text(self) -> str:
|
|
41
|
+
return self.openai.responses.create(
|
|
39
42
|
input=self.config.user_message,
|
|
40
43
|
instructions=self.config.developer_message,
|
|
41
44
|
model=self.config.fine_tuned_model,
|
|
42
45
|
).output_text
|
|
43
46
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def generate_tags(self, content: Post.Block) -> Post | None:
|
|
47
|
+
def generate_tags(self, text: str) -> Post | None:
|
|
47
48
|
if random() < self.config.tags_chance: # noqa: S311
|
|
48
49
|
return self.openai.responses.parse(
|
|
49
50
|
text_format=Post,
|
|
50
|
-
input=
|
|
51
|
+
input=text,
|
|
51
52
|
instructions=self.config.tags_developer_message,
|
|
52
53
|
model=self.config.base_model,
|
|
53
54
|
).output_parsed
|
tumblrbot/utils/common.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
+
from pathlib import Path
|
|
2
3
|
from random import choice
|
|
3
4
|
from typing import ClassVar, Self, override
|
|
4
5
|
|
|
@@ -10,15 +11,14 @@ from rich.live import Live
|
|
|
10
11
|
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
|
|
11
12
|
from rich.table import Table
|
|
12
13
|
|
|
13
|
-
from tumblrbot.utils.
|
|
14
|
-
from tumblrbot.utils.models import FullyValidatedModel
|
|
14
|
+
from tumblrbot.utils.models import Config, FullyValidatedModel
|
|
15
15
|
from tumblrbot.utils.tumblr import TumblrSession
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class FlowClass(FullyValidatedModel):
|
|
19
19
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
20
20
|
|
|
21
|
-
config: ClassVar = Config()
|
|
21
|
+
config: ClassVar = Config.load()
|
|
22
22
|
|
|
23
23
|
openai: OpenAI
|
|
24
24
|
tumblr: TumblrSession
|
tumblrbot/utils/models.py
CHANGED
|
@@ -1,15 +1,20 @@
|
|
|
1
|
+
import tomllib
|
|
2
|
+
from abc import abstractmethod
|
|
1
3
|
from collections.abc import Generator
|
|
4
|
+
from pathlib import Path
|
|
2
5
|
from typing import Annotated, Any, ClassVar, Literal, Self, override
|
|
3
6
|
|
|
4
7
|
import rich
|
|
8
|
+
import tomlkit
|
|
5
9
|
from keyring import get_password, set_password
|
|
6
|
-
from openai import
|
|
10
|
+
from openai.types import ChatModel
|
|
7
11
|
from pwinput import pwinput
|
|
8
|
-
from pydantic import ConfigDict, PlainSerializer
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, Field, NonNegativeFloat, PlainSerializer, PositiveFloat, PositiveInt, model_validator
|
|
9
13
|
from pydantic.json_schema import SkipJsonSchema
|
|
10
14
|
from requests_oauthlib import OAuth1Session
|
|
11
15
|
from rich.panel import Panel
|
|
12
|
-
from rich.prompt import Confirm
|
|
16
|
+
from rich.prompt import Confirm, Prompt
|
|
17
|
+
from tomlkit import comment, document
|
|
13
18
|
|
|
14
19
|
|
|
15
20
|
class FullyValidatedModel(BaseModel):
|
|
@@ -22,7 +27,85 @@ class FullyValidatedModel(BaseModel):
|
|
|
22
27
|
)
|
|
23
28
|
|
|
24
29
|
|
|
25
|
-
class
|
|
30
|
+
class FileSyncSettings(FullyValidatedModel):
|
|
31
|
+
@classmethod
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def read(cls) -> Self | dict[str, object] | str | None: ...
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def load(cls) -> Self:
|
|
37
|
+
data = cls.read() or {}
|
|
38
|
+
return cls.model_validate_json(data) if isinstance(data, str) else cls.model_validate(data)
|
|
39
|
+
|
|
40
|
+
@model_validator(mode="after")
|
|
41
|
+
@abstractmethod
|
|
42
|
+
def write(self) -> Self: ...
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class Config(FileSyncSettings):
|
|
46
|
+
toml_file: ClassVar = Path("config.toml")
|
|
47
|
+
|
|
48
|
+
# Downloading Posts & Writing Examples
|
|
49
|
+
download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
|
|
50
|
+
data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
|
|
51
|
+
|
|
52
|
+
# Writing Examples
|
|
53
|
+
max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
|
|
54
|
+
custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
|
|
55
|
+
|
|
56
|
+
# Writing Examples & Fine-Tuning
|
|
57
|
+
examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
|
|
58
|
+
|
|
59
|
+
# Writing Examples & Generating
|
|
60
|
+
developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
|
|
61
|
+
user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
|
|
62
|
+
|
|
63
|
+
# Fine-Tuning
|
|
64
|
+
expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
|
|
65
|
+
token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
|
|
66
|
+
job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
|
|
67
|
+
|
|
68
|
+
# Fine-Tuning & Generating
|
|
69
|
+
base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
|
|
70
|
+
fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
|
|
71
|
+
|
|
72
|
+
# Generating
|
|
73
|
+
upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
|
|
74
|
+
draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
|
|
75
|
+
tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
|
|
76
|
+
tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
@override
|
|
80
|
+
def read(cls) -> dict[str, object] | None:
|
|
81
|
+
return tomllib.loads(cls.toml_file.read_text("utf_8")) if cls.toml_file.exists() else None
|
|
82
|
+
|
|
83
|
+
@model_validator(mode="after")
|
|
84
|
+
@override
|
|
85
|
+
def write(self) -> Self:
|
|
86
|
+
if not self.download_blog_identifiers:
|
|
87
|
+
rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
|
|
88
|
+
self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
|
|
89
|
+
|
|
90
|
+
if not self.upload_blog_identifier:
|
|
91
|
+
rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
|
|
92
|
+
self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
|
|
93
|
+
|
|
94
|
+
toml_table = document()
|
|
95
|
+
|
|
96
|
+
for (name, field), value in zip(self.__class__.model_fields.items(), self.model_dump(mode="json").values(), strict=True):
|
|
97
|
+
if field.description is not None:
|
|
98
|
+
for line in field.description.split(". "):
|
|
99
|
+
toml_table.add(comment(f"{line.removesuffix('.')}."))
|
|
100
|
+
|
|
101
|
+
toml_table[name] = value
|
|
102
|
+
|
|
103
|
+
Path(self.toml_file).write_text(tomlkit.dumps(toml_table), encoding="utf_8")
|
|
104
|
+
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class Tokens(FileSyncSettings):
|
|
26
109
|
class Tumblr(FullyValidatedModel):
|
|
27
110
|
client_key: str = ""
|
|
28
111
|
client_secret: str = ""
|
|
@@ -50,15 +133,13 @@ class Tokens(FullyValidatedModel):
|
|
|
50
133
|
rich.print()
|
|
51
134
|
|
|
52
135
|
@classmethod
|
|
53
|
-
def read_from_keyring(cls) -> Self:
|
|
54
|
-
if json_data := get_password(cls.service_name, cls.username):
|
|
55
|
-
return cls.model_validate_json(json_data)
|
|
56
|
-
return cls()
|
|
57
|
-
|
|
58
136
|
@override
|
|
59
|
-
def
|
|
60
|
-
|
|
137
|
+
def read(cls) -> str | None:
|
|
138
|
+
return get_password(cls.service_name, cls.username)
|
|
61
139
|
|
|
140
|
+
@model_validator(mode="after")
|
|
141
|
+
@override
|
|
142
|
+
def write(self) -> Self:
|
|
62
143
|
if not self.openai_api_key or Confirm.ask("Reset OpenAI API key?", default=False):
|
|
63
144
|
(self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
|
|
64
145
|
|
|
@@ -86,20 +167,33 @@ class Tokens(FullyValidatedModel):
|
|
|
86
167
|
|
|
87
168
|
set_password(self.service_name, self.username, self.model_dump_json())
|
|
88
169
|
|
|
170
|
+
return self
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class ResponseModel(FullyValidatedModel):
|
|
174
|
+
class Response(FullyValidatedModel):
|
|
175
|
+
class Blog(FullyValidatedModel):
|
|
176
|
+
posts: int
|
|
177
|
+
|
|
178
|
+
blog: Blog = Blog(posts=0)
|
|
179
|
+
posts: list[Any] = []
|
|
180
|
+
|
|
181
|
+
response: Response
|
|
182
|
+
|
|
89
183
|
|
|
90
184
|
class Post(FullyValidatedModel):
|
|
91
185
|
class Block(FullyValidatedModel):
|
|
92
|
-
type: str
|
|
186
|
+
type: str
|
|
93
187
|
text: str = ""
|
|
94
|
-
blocks: list[int] = []
|
|
188
|
+
blocks: list[int] = []
|
|
95
189
|
|
|
96
190
|
timestamp: SkipJsonSchema[int] = 0
|
|
97
|
-
tags: Annotated[list[str], PlainSerializer(",".join)]
|
|
98
|
-
state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "
|
|
191
|
+
tags: Annotated[list[str], PlainSerializer(",".join)]
|
|
192
|
+
state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "published"
|
|
99
193
|
|
|
100
|
-
content: SkipJsonSchema[list[Block]] = []
|
|
101
|
-
layout: SkipJsonSchema[list[Block]] = []
|
|
102
|
-
trail: SkipJsonSchema[list[Any]] = []
|
|
194
|
+
content: SkipJsonSchema[list[Block]] = []
|
|
195
|
+
layout: SkipJsonSchema[list[Block]] = []
|
|
196
|
+
trail: SkipJsonSchema[list[Any]] = []
|
|
103
197
|
|
|
104
198
|
is_submission: SkipJsonSchema[bool] = False
|
|
105
199
|
|
tumblrbot/utils/tumblr.py
CHANGED
|
@@ -3,7 +3,7 @@ from typing import Self
|
|
|
3
3
|
from requests import HTTPError, Response
|
|
4
4
|
from requests_oauthlib import OAuth1Session
|
|
5
5
|
|
|
6
|
-
from tumblrbot.utils.models import Post, Tokens
|
|
6
|
+
from tumblrbot.utils.models import Post, ResponseModel, Tokens
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class TumblrSession(OAuth1Session):
|
|
@@ -19,12 +19,11 @@ class TumblrSession(OAuth1Session):
|
|
|
19
19
|
try:
|
|
20
20
|
response.raise_for_status()
|
|
21
21
|
except HTTPError as error:
|
|
22
|
-
|
|
23
|
-
error.add_note(response.text)
|
|
22
|
+
error.add_note(response.text)
|
|
24
23
|
raise
|
|
25
24
|
|
|
26
|
-
def retrieve_published_posts(self, blog_identifier: str, after: int) ->
|
|
27
|
-
|
|
25
|
+
def retrieve_published_posts(self, blog_identifier: str, after: int) -> ResponseModel:
|
|
26
|
+
response = self.get(
|
|
28
27
|
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
29
28
|
params={
|
|
30
29
|
"after": after,
|
|
@@ -32,9 +31,11 @@ class TumblrSession(OAuth1Session):
|
|
|
32
31
|
"npf": True,
|
|
33
32
|
},
|
|
34
33
|
)
|
|
34
|
+
return ResponseModel.model_validate_json(response.content)
|
|
35
35
|
|
|
36
|
-
def create_post(self, blog_identifier: str, post: Post) ->
|
|
37
|
-
|
|
36
|
+
def create_post(self, blog_identifier: str, post: Post) -> ResponseModel:
|
|
37
|
+
response = self.post(
|
|
38
38
|
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
39
39
|
json=post.model_dump(),
|
|
40
40
|
)
|
|
41
|
+
return ResponseModel.model_validate_json(response.content)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tumblrbot
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
5
|
Requires-Python: >= 3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -8,13 +8,13 @@ Requires-Dist: keyring
|
|
|
8
8
|
Requires-Dist: openai
|
|
9
9
|
Requires-Dist: pwinput
|
|
10
10
|
Requires-Dist: pydantic
|
|
11
|
-
Requires-Dist: pydantic-settings
|
|
12
11
|
Requires-Dist: requests
|
|
13
12
|
Requires-Dist: requests-oauthlib
|
|
14
13
|
Requires-Dist: rich
|
|
15
14
|
Requires-Dist: tiktoken
|
|
16
15
|
Requires-Dist: tomlkit
|
|
17
|
-
Project-URL:
|
|
16
|
+
Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
|
|
17
|
+
Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
18
18
|
|
|
19
19
|
# tumblrbot
|
|
20
20
|
|
|
@@ -31,8 +31,8 @@ Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
|
|
|
31
31
|
[OpenAI]: https://pypi.org/project/openai
|
|
32
32
|
[OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
|
|
33
33
|
[OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
|
|
34
|
+
[OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
|
|
34
35
|
[Fine-Tuning Portal]: https://platform.openai.com/finetune
|
|
35
|
-
[Moderation API]: https://platform.openai.com/docs/api-reference/moderations
|
|
36
36
|
|
|
37
37
|
[Tumblr]: https://tumblr.com
|
|
38
38
|
[Tumblr Tokens]: https://tumblr.com/oauth/apps
|
|
@@ -57,18 +57,16 @@ Features:
|
|
|
57
57
|
- An [interactive console][Main] for all steps of generating posts for the blog:
|
|
58
58
|
1. Asks for [OpenAI] and [Tumblr] tokens.
|
|
59
59
|
- Stores API tokens using [keyring].
|
|
60
|
-
- Prevents API tokens from printing to the console.
|
|
61
60
|
1. Retrieves [Tumblr] [OAuth] tokens.
|
|
62
61
|
1. [Downloads posts][Download] from the [configured][config] [Tumblr] blogs.
|
|
63
62
|
- Skips redownloading already downloaded posts.
|
|
64
63
|
- Shows progress and previews the current post.
|
|
65
64
|
1. [Creates examples][Examples] to fine-tune the model from your posts.
|
|
66
65
|
- Filters out posts that contain more than just text data.
|
|
67
|
-
- Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
|
|
68
|
-
- Shows progress and previews the current post.
|
|
69
66
|
- Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
|
|
70
|
-
1.
|
|
67
|
+
1. Filters out any posts flagged by the [OpenAI Moderation API].
|
|
71
68
|
1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
|
|
69
|
+
- Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
|
|
72
70
|
- Resumes monitoring the same fine-tuning process when restarted.
|
|
73
71
|
- Deletes the uploaded examples file if fine-tuning does not succeed (optional).
|
|
74
72
|
- Stores the output model automatically when fine-tuning is completed.
|
|
@@ -82,6 +80,7 @@ Features:
|
|
|
82
80
|
**To-Do:**
|
|
83
81
|
|
|
84
82
|
- Add code documentation.
|
|
83
|
+
- Add reblog generation logic.
|
|
85
84
|
|
|
86
85
|
**Known Issues:**
|
|
87
86
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
|
|
3
|
+
tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
tumblrbot/flow/download.py,sha256=PUs7eM-1kGOb2RKijy3lW0zyvfFDwbxzTGhVghrWIhc,2012
|
|
5
|
+
tumblrbot/flow/examples.py,sha256=MlukrVdzpIwk_-37PpRsBGV5eX-lLlNUUYvuozXC_vw,3726
|
|
6
|
+
tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
|
|
7
|
+
tumblrbot/flow/generate.py,sha256=cfIdmLFNuuKjUgk4Jtp0aTf2u86jOAUFuziq71zjDME,2148
|
|
8
|
+
tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
|
|
10
|
+
tumblrbot/utils/models.py,sha256=wAS3ptbaQX3J6IlixAdhBD2wcs4BO64HD6JcCB7W6lg,9903
|
|
11
|
+
tumblrbot/utils/tumblr.py,sha256=AgrczLFyrxES66N4PwIrjxX3QcpGvh8HP-jw0lwtmc0,1427
|
|
12
|
+
tumblrbot-1.5.0.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
|
|
13
|
+
tumblrbot-1.5.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
14
|
+
tumblrbot-1.5.0.dist-info/METADATA,sha256=-mGGhfyRyVcO1M0PtNAoVakPqqMw0sAevp2XkaYrfgw,10129
|
|
15
|
+
tumblrbot-1.5.0.dist-info/RECORD,,
|
tumblrbot/utils/config.py
DELETED
|
@@ -1,97 +0,0 @@
|
|
|
1
|
-
from collections.abc import Sequence
|
|
2
|
-
from pathlib import Path
|
|
3
|
-
from typing import TYPE_CHECKING, Self, override
|
|
4
|
-
|
|
5
|
-
import rich
|
|
6
|
-
import tomlkit
|
|
7
|
-
from openai.types import ChatModel
|
|
8
|
-
from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt, model_validator
|
|
9
|
-
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
|
|
10
|
-
from rich.prompt import Prompt
|
|
11
|
-
from tomlkit import comment, document
|
|
12
|
-
|
|
13
|
-
if TYPE_CHECKING:
|
|
14
|
-
from _typeshed import StrPath
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Config(BaseSettings):
|
|
18
|
-
model_config = SettingsConfigDict(
|
|
19
|
-
extra="ignore",
|
|
20
|
-
validate_assignment=True,
|
|
21
|
-
validate_return=True,
|
|
22
|
-
validate_by_name=True,
|
|
23
|
-
cli_parse_args=True,
|
|
24
|
-
cli_avoid_json=True,
|
|
25
|
-
cli_kebab_case=True,
|
|
26
|
-
toml_file="config.toml",
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
# Downloading Posts & Writing Examples
|
|
30
|
-
download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
|
|
31
|
-
data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
|
|
32
|
-
|
|
33
|
-
# Writing Examples
|
|
34
|
-
max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
|
|
35
|
-
custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
|
|
36
|
-
|
|
37
|
-
# Writing Examples & Fine-Tuning
|
|
38
|
-
examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
|
|
39
|
-
|
|
40
|
-
# Writing Examples & Generating
|
|
41
|
-
developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
|
|
42
|
-
user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
|
|
43
|
-
|
|
44
|
-
# Fine-Tuning
|
|
45
|
-
expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
|
|
46
|
-
token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
|
|
47
|
-
job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
|
|
48
|
-
|
|
49
|
-
# Fine-Tuning & Generating
|
|
50
|
-
base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
|
|
51
|
-
fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
|
|
52
|
-
|
|
53
|
-
# Generating
|
|
54
|
-
upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
|
|
55
|
-
draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
|
|
56
|
-
tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
|
|
57
|
-
tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
|
|
58
|
-
|
|
59
|
-
@override
|
|
60
|
-
@classmethod
|
|
61
|
-
def settings_customise_sources(cls, settings_cls: type[BaseSettings], *args: PydanticBaseSettingsSource, **kwargs: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]:
|
|
62
|
-
return (TomlConfigSettingsSource(settings_cls),)
|
|
63
|
-
|
|
64
|
-
@model_validator(mode="after")
|
|
65
|
-
def write_to_file(self) -> Self:
|
|
66
|
-
if not self.download_blog_identifiers:
|
|
67
|
-
rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
|
|
68
|
-
self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
|
|
69
|
-
|
|
70
|
-
if not self.upload_blog_identifier:
|
|
71
|
-
rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
|
|
72
|
-
self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
|
|
73
|
-
|
|
74
|
-
toml_files = self.model_config.get("toml_file")
|
|
75
|
-
if isinstance(toml_files, (Path, str)):
|
|
76
|
-
self.dump_toml(toml_files)
|
|
77
|
-
elif isinstance(toml_files, Sequence):
|
|
78
|
-
for toml_file in toml_files:
|
|
79
|
-
self.dump_toml(toml_file)
|
|
80
|
-
|
|
81
|
-
return self
|
|
82
|
-
|
|
83
|
-
def dump_toml(self, toml_file: "StrPath") -> None:
|
|
84
|
-
toml_table = document()
|
|
85
|
-
|
|
86
|
-
dumped_model = self.model_dump(mode="json")
|
|
87
|
-
for name, field in self.__class__.model_fields.items():
|
|
88
|
-
if field.description:
|
|
89
|
-
for line in field.description.split(". "):
|
|
90
|
-
toml_table.add(comment(f"{line.removesuffix('.')}."))
|
|
91
|
-
|
|
92
|
-
toml_table[name] = dumped_model[name]
|
|
93
|
-
|
|
94
|
-
Path(toml_file).write_text(
|
|
95
|
-
tomlkit.dumps(toml_table),
|
|
96
|
-
encoding="utf_8",
|
|
97
|
-
)
|
tumblrbot-1.4.6.dist-info/RECORD
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
tumblrbot/__main__.py,sha256=fV0lBm5FqTvBoq5g8soG1x0w0qebyR_oL5_339z4GpM,1460
|
|
3
|
-
tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
tumblrbot/flow/download.py,sha256=xSfic66FR-LIG0KtJMcM5KVd2hiijCH3cyD7dzFKwLM,2042
|
|
5
|
-
tumblrbot/flow/examples.py,sha256=6A6bqHwLIUtEr-dv-FcWbMdVVMFImHhuMmlJLyuSW6U,4005
|
|
6
|
-
tumblrbot/flow/fine_tune.py,sha256=X9xR9uPgiK6DKQ-lZ6oqwTPypUx5D5S1MiKrFKSm5ng,5381
|
|
7
|
-
tumblrbot/flow/generate.py,sha256=Q6nUtmoj28-rGUCs4V0fuovJshvFMlmipyu9GGqnmzM,2147
|
|
8
|
-
tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
tumblrbot/utils/common.py,sha256=tnKBBiEGQPOWByLi0vT31BzpntndbIP_JekGkgjy15U,1841
|
|
10
|
-
tumblrbot/utils/config.py,sha256=AlHZLx74-202BNwELNE3MzTg2Ru7R_Q5u8h9S4QGkJk,5460
|
|
11
|
-
tumblrbot/utils/models.py,sha256=bLB96qrHBr18_X6-zHatc-bczej0kLUrZxyceITAWqo,4936
|
|
12
|
-
tumblrbot/utils/tumblr.py,sha256=9lAbjO-27cpju9Wewv26lyQoogybs8b1y8mvYIuHkqw,1293
|
|
13
|
-
tumblrbot-1.4.6.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
|
|
14
|
-
tumblrbot-1.4.6.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
15
|
-
tumblrbot-1.4.6.dist-info/METADATA,sha256=IotLVzlM8nw8XwxwBt-TjGozYHEKuJRD1n1hdbevs1Q,10183
|
|
16
|
-
tumblrbot-1.4.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|