tumblrbot 1.3.0__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/PKG-INFO +1 -1
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/pyproject.toml +1 -1
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/flow/examples.py +4 -6
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/flow/fine_tune.py +16 -14
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/flow/generate.py +2 -2
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/utils/config.py +2 -2
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/utils/models.py +8 -11
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/.github/dependabot.yml +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/.gitignore +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/README.md +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/UNLICENSE +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/__init__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/__main__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/flow/__init__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/flow/download.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/utils/__init__.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/utils/common.py +0 -0
- {tumblrbot-1.3.0 → tumblrbot-1.3.1}/src/tumblrbot/utils/tumblr.py +0 -0
|
@@ -53,7 +53,7 @@ class ExamplesWriter(FlowClass):
|
|
|
53
53
|
with data_path.open(encoding="utf_8") as fp:
|
|
54
54
|
for line in fp:
|
|
55
55
|
post = Post.model_validate_json(line)
|
|
56
|
-
if not (post.is_submission or post.trail) and post.only_text_blocks() and post.
|
|
56
|
+
if not (post.is_submission or post.trail) and post.only_text_blocks() and post.get_content_text():
|
|
57
57
|
yield post
|
|
58
58
|
|
|
59
59
|
def get_filtered_posts(self) -> Generator[Post]:
|
|
@@ -68,7 +68,7 @@ class ExamplesWriter(FlowClass):
|
|
|
68
68
|
ceil(len(posts) / chunk_size),
|
|
69
69
|
description="Removing flagged posts...",
|
|
70
70
|
):
|
|
71
|
-
response = self.openai.moderations.create(input=
|
|
71
|
+
response = self.openai.moderations.create(input=list(map(Post.get_content_text, chunk)))
|
|
72
72
|
for post, moderation in zip(chunk, response.results, strict=True):
|
|
73
73
|
if moderation.flagged:
|
|
74
74
|
removed += 1
|
|
@@ -89,11 +89,9 @@ class ExamplesWriter(FlowClass):
|
|
|
89
89
|
|
|
90
90
|
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
91
91
|
for post in self.get_filtered_posts():
|
|
92
|
-
ask_content, response_content = post.get_text_content()
|
|
93
|
-
|
|
94
92
|
self.write_example(
|
|
95
|
-
|
|
96
|
-
|
|
93
|
+
self.config.user_message,
|
|
94
|
+
post.get_content_text(),
|
|
97
95
|
fp,
|
|
98
96
|
)
|
|
99
97
|
|
|
@@ -46,14 +46,16 @@ class FineTuner(FlowClass):
|
|
|
46
46
|
|
|
47
47
|
return job
|
|
48
48
|
|
|
49
|
-
def create_job(self) -> FineTuningJob:
|
|
49
|
+
def create_job(self, live: PreviewLive) -> FineTuningJob:
|
|
50
50
|
if self.config.job_id:
|
|
51
51
|
return self.poll_job_status()
|
|
52
52
|
|
|
53
|
-
|
|
54
|
-
file=self.
|
|
55
|
-
|
|
56
|
-
|
|
53
|
+
with live.progress.open(self.config.examples_file, "rb", description=f"Uploading {self.config.examples_file}...") as fp:
|
|
54
|
+
file = self.openai.files.create(
|
|
55
|
+
file=fp,
|
|
56
|
+
purpose="fine-tune",
|
|
57
|
+
)
|
|
58
|
+
|
|
57
59
|
job = self.openai.fine_tuning.jobs.create(
|
|
58
60
|
model=self.config.base_model,
|
|
59
61
|
training_file=file.id,
|
|
@@ -63,18 +65,18 @@ class FineTuner(FlowClass):
|
|
|
63
65
|
return job
|
|
64
66
|
|
|
65
67
|
def fine_tune(self) -> None:
|
|
66
|
-
|
|
68
|
+
with PreviewLive() as live:
|
|
69
|
+
job = self.create_job(live)
|
|
67
70
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
71
|
+
self.dedent_print(f"""
|
|
72
|
+
[bold]Fine-tuning is starting...[/]
|
|
73
|
+
View it online at: https://platform.openai.com/finetune/{job.id}
|
|
74
|
+
Created at: {datetime.fromtimestamp(job.created_at)}
|
|
75
|
+
Base Model: {job.model}
|
|
73
76
|
|
|
74
|
-
|
|
75
|
-
|
|
77
|
+
[italic dim]Closing this terminal will not stop the fine-tuning. This will take a while...
|
|
78
|
+
""") # noqa: DTZ006
|
|
76
79
|
|
|
77
|
-
with PreviewLive() as live:
|
|
78
80
|
task_id = live.progress.add_task("", total=None)
|
|
79
81
|
|
|
80
82
|
while job.status not in {"succeeded", "failed", "cancelled"}:
|
|
@@ -10,10 +10,10 @@ class DraftGenerator(FlowClass):
|
|
|
10
10
|
def generate_tags(self, content: Post.Block) -> Post | None:
|
|
11
11
|
if random() < self.config.tags_chance: # noqa: S311
|
|
12
12
|
return self.openai.responses.parse(
|
|
13
|
-
input=content.text,
|
|
13
|
+
input=f"Extract the most important subjects from the following text:\n\n{content.text}",
|
|
14
14
|
model=self.config.base_model,
|
|
15
15
|
text_format=Post,
|
|
16
|
-
instructions="You are an advanced text summarization tool.
|
|
16
|
+
instructions="You are an advanced text summarization tool. You return the requested data to the user as a list of comma-separated strings.",
|
|
17
17
|
temperature=0.5,
|
|
18
18
|
).output_parsed
|
|
19
19
|
|
|
@@ -43,9 +43,9 @@ class Config(BaseSettings):
|
|
|
43
43
|
examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
|
|
44
44
|
job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
|
|
45
45
|
expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
|
|
46
|
-
token_price: PositiveFloat = Field(
|
|
46
|
+
token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
|
|
47
47
|
|
|
48
|
-
base_model: ChatModel = Field("gpt-
|
|
48
|
+
base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
|
|
49
49
|
developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
|
|
50
50
|
user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
|
|
51
51
|
|
|
@@ -102,25 +102,22 @@ class Post(FullyValidatedModel):
|
|
|
102
102
|
|
|
103
103
|
def __rich__(self) -> Panel:
|
|
104
104
|
return Panel(
|
|
105
|
-
self.
|
|
105
|
+
self.get_content_text(),
|
|
106
106
|
title="Preview",
|
|
107
107
|
subtitle=" ".join(f"#{tag}" for tag in self.tags),
|
|
108
108
|
subtitle_align="left",
|
|
109
109
|
)
|
|
110
110
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
super().model_post_init(context)
|
|
111
|
+
def only_text_blocks(self) -> bool:
|
|
112
|
+
return all(block.type == "text" for block in self.content)
|
|
114
113
|
|
|
115
|
-
|
|
114
|
+
def get_content_text(self) -> str:
|
|
115
|
+
blocks = dict(enumerate(block.text for block in self.content))
|
|
116
116
|
for block in self.layout:
|
|
117
117
|
if block.type == "ask":
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
def get_text_content(self) -> str:
|
|
123
|
-
return "\n\n".join(block.text for block in self.content)
|
|
118
|
+
for i in block.blocks:
|
|
119
|
+
del blocks[i]
|
|
120
|
+
return "\n\n".join(blocks.values())
|
|
124
121
|
|
|
125
122
|
|
|
126
123
|
class Example(FullyValidatedModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|