tumblrbot 1.4.6__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tumblrbot/__main__.py CHANGED
@@ -14,13 +14,17 @@ from tumblrbot.utils.tumblr import TumblrSession
14
14
  def main() -> None:
15
15
  install()
16
16
 
17
- tokens = Tokens.read_from_keyring()
17
+ tokens = Tokens.load()
18
18
  with OpenAI(api_key=tokens.openai_api_key) as openai, TumblrSession(tokens) as tumblr:
19
19
  if Confirm.ask("Download latest posts?", default=False):
20
20
  PostDownloader(openai=openai, tumblr=tumblr).main()
21
21
 
22
+ examples_writer = ExamplesWriter(openai=openai, tumblr=tumblr)
22
23
  if Confirm.ask("Create training data?", default=False):
23
- ExamplesWriter(openai=openai, tumblr=tumblr).main()
24
+ examples_writer.main()
25
+
26
+ if Confirm.ask("Remove training data flagged by the OpenAI moderation? [bold]This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
27
+ examples_writer.filter_examples()
24
28
 
25
29
  fine_tuner = FineTuner(openai=openai, tumblr=tumblr)
26
30
  fine_tuner.print_estimates()
@@ -18,7 +18,7 @@ class PostDownloader(FlowClass):
18
18
  completed = 0
19
19
  after = 0
20
20
  if data_path.exists():
21
- lines = data_path.read_text("utf_8").splitlines() if data_path.exists() else []
21
+ lines = data_path.read_bytes().splitlines() if data_path.exists() else []
22
22
  completed = len(lines)
23
23
  if lines:
24
24
  after = Post.model_validate_json(lines[-1]).timestamp
@@ -36,18 +36,18 @@ class PostDownloader(FlowClass):
36
36
  task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=completed)
37
37
 
38
38
  while True:
39
- response = self.tumblr.retrieve_published_posts(blog_identifier, after=after).json()["response"]
40
- live.progress.update(task_id, total=response["blog"]["posts"], completed=completed)
39
+ response = self.tumblr.retrieve_published_posts(blog_identifier, after=after)
40
+ live.progress.update(task_id, total=response.response.blog.posts, completed=completed)
41
41
 
42
- if posts := response["posts"]:
43
- for post in posts:
44
- dump(post, fp)
45
- fp.write("\n")
42
+ if not response.response.posts:
43
+ return
46
44
 
47
- model = Post.model_validate(post)
48
- after = model.timestamp
49
- live.custom_update(model)
45
+ for post in response.response.posts:
46
+ dump(post, fp)
47
+ fp.write("\n")
50
48
 
51
- completed += len(posts)
52
- else:
53
- return
49
+ model = Post.model_validate(post)
50
+ after = model.timestamp
51
+ live.custom_update(model)
52
+
53
+ completed += len(response.response.posts)
@@ -7,7 +7,6 @@ from typing import IO, override
7
7
 
8
8
  import rich
9
9
  from openai import BadRequestError
10
- from rich.prompt import Confirm
11
10
 
12
11
  from tumblrbot.utils.common import FlowClass, PreviewLive
13
12
  from tumblrbot.utils.models import Example, Post
@@ -26,7 +25,7 @@ class ExamplesWriter(FlowClass):
26
25
  fp,
27
26
  )
28
27
 
29
- for post in self.get_filtered_posts():
28
+ for post in self.get_valid_posts():
30
29
  self.write_example(
31
30
  self.config.user_message,
32
31
  post.get_content_text(),
@@ -49,43 +48,38 @@ class ExamplesWriter(FlowClass):
49
48
  self.config.custom_prompts_file.parent.mkdir(parents=True, exist_ok=True)
50
49
  self.config.custom_prompts_file.touch(exist_ok=True)
51
50
 
52
- with self.config.custom_prompts_file.open("r", encoding="utf_8") as fp:
51
+ with self.config.custom_prompts_file.open("rb") as fp:
53
52
  for line in fp:
54
53
  data: dict[str, str] = loads(line)
55
54
  yield from data.items()
56
55
 
57
- def get_filtered_posts(self) -> Generator[Post]:
58
- posts = self.get_valid_posts()
56
+ def get_valid_posts(self) -> Generator[Post]:
57
+ for data_path in self.get_data_paths():
58
+ with data_path.open("rb") as fp:
59
+ for line in fp:
60
+ post = Post.model_validate_json(line)
61
+ if post.valid_text_post():
62
+ yield post
59
63
 
60
- if Confirm.ask("[gray62]Remove posts flagged by the OpenAI moderation? This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
64
+ def filter_examples(self) -> None:
65
+ examples = self.config.examples_file.read_text("utf_8").splitlines()
66
+ with self.config.examples_file.open("w", encoding="utf_8") as fp:
61
67
  batch_size = self.get_moderation_batch_size()
62
- posts = list(posts)
63
68
  removed = 0
64
69
 
65
70
  with PreviewLive() as live:
66
71
  for batch in live.progress.track(
67
- batched(posts, batch_size, strict=False),
68
- ceil(len(posts) / batch_size),
72
+ batched(examples, batch_size, strict=False),
73
+ ceil(len(examples) / batch_size),
69
74
  description="Removing flagged posts...",
70
75
  ):
71
- response = self.openai.moderations.create(input=list(map(Post.get_content_text, batch)))
72
- for post, moderation in zip(batch, response.results, strict=True):
76
+ response = self.openai.moderations.create(input=list(batch))
77
+ for example, moderation in zip(batch, response.results, strict=True):
73
78
  if moderation.flagged:
74
79
  removed += 1
75
- live.custom_update(post)
76
80
  else:
77
- yield post
81
+ fp.write(f"{example}\n")
78
82
  rich.print(f"[red]Removed {removed} posts.\n")
79
- else:
80
- yield from posts
81
-
82
- def get_valid_posts(self) -> Generator[Post]:
83
- for data_path in self.get_data_paths():
84
- with data_path.open(encoding="utf_8") as fp:
85
- for line in fp:
86
- post = Post.model_validate_json(line)
87
- if post.valid_text_post():
88
- yield post
89
83
 
90
84
  def get_moderation_batch_size(self) -> int:
91
85
  try:
@@ -98,8 +98,8 @@ class FineTuner(FlowClass):
98
98
  if job.status == "failed" and job.error is not None:
99
99
  raise RuntimeError(job.error.message)
100
100
 
101
- if job.fine_tuned_model:
102
- self.config.fine_tuned_model = job.fine_tuned_model or ""
101
+ if job.fine_tuned_model is not None:
102
+ self.config.fine_tuned_model = job.fine_tuned_model
103
103
 
104
104
  def print_estimates(self) -> None:
105
105
  estimated_tokens = sum(self.count_tokens())
@@ -28,26 +28,27 @@ class DraftGenerator(FlowClass):
28
28
  rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
29
29
 
30
30
  def generate_post(self) -> Post:
31
- content = self.generate_content()
32
- post = Post(content=[content])
33
- if tags := self.generate_tags(content):
34
- post.tags = tags.tags
35
- return post
36
-
37
- def generate_content(self) -> Post.Block:
38
- content = self.openai.responses.create(
31
+ text = self.generate_text()
32
+ if tags := self.generate_tags(text):
33
+ tags = tags.tags
34
+ return Post(
35
+ content=[Post.Block(type="text", text=text)],
36
+ tags=tags or [],
37
+ state="draft",
38
+ )
39
+
40
+ def generate_text(self) -> str:
41
+ return self.openai.responses.create(
39
42
  input=self.config.user_message,
40
43
  instructions=self.config.developer_message,
41
44
  model=self.config.fine_tuned_model,
42
45
  ).output_text
43
46
 
44
- return Post.Block(text=content)
45
-
46
- def generate_tags(self, content: Post.Block) -> Post | None:
47
+ def generate_tags(self, text: str) -> Post | None:
47
48
  if random() < self.config.tags_chance: # noqa: S311
48
49
  return self.openai.responses.parse(
49
50
  text_format=Post,
50
- input=content.text,
51
+ input=text,
51
52
  instructions=self.config.tags_developer_message,
52
53
  model=self.config.base_model,
53
54
  ).output_parsed
tumblrbot/utils/common.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from abc import abstractmethod
2
+ from pathlib import Path
2
3
  from random import choice
3
4
  from typing import ClassVar, Self, override
4
5
 
@@ -10,15 +11,14 @@ from rich.live import Live
10
11
  from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
11
12
  from rich.table import Table
12
13
 
13
- from tumblrbot.utils.config import Config, Path
14
- from tumblrbot.utils.models import FullyValidatedModel
14
+ from tumblrbot.utils.models import Config, FullyValidatedModel
15
15
  from tumblrbot.utils.tumblr import TumblrSession
16
16
 
17
17
 
18
18
  class FlowClass(FullyValidatedModel):
19
19
  model_config = ConfigDict(arbitrary_types_allowed=True)
20
20
 
21
- config: ClassVar = Config() # pyright: ignore[reportCallIssue]
21
+ config: ClassVar = Config.load()
22
22
 
23
23
  openai: OpenAI
24
24
  tumblr: TumblrSession
tumblrbot/utils/models.py CHANGED
@@ -1,15 +1,20 @@
1
+ import tomllib
2
+ from abc import abstractmethod
1
3
  from collections.abc import Generator
4
+ from pathlib import Path
2
5
  from typing import Annotated, Any, ClassVar, Literal, Self, override
3
6
 
4
7
  import rich
8
+ import tomlkit
5
9
  from keyring import get_password, set_password
6
- from openai import BaseModel
10
+ from openai.types import ChatModel
7
11
  from pwinput import pwinput
8
- from pydantic import ConfigDict, PlainSerializer
12
+ from pydantic import BaseModel, ConfigDict, Field, NonNegativeFloat, PlainSerializer, PositiveFloat, PositiveInt, model_validator
9
13
  from pydantic.json_schema import SkipJsonSchema
10
14
  from requests_oauthlib import OAuth1Session
11
15
  from rich.panel import Panel
12
- from rich.prompt import Confirm
16
+ from rich.prompt import Confirm, Prompt
17
+ from tomlkit import comment, document
13
18
 
14
19
 
15
20
  class FullyValidatedModel(BaseModel):
@@ -22,7 +27,85 @@ class FullyValidatedModel(BaseModel):
22
27
  )
23
28
 
24
29
 
25
- class Tokens(FullyValidatedModel):
30
+ class FileSyncSettings(FullyValidatedModel):
31
+ @classmethod
32
+ @abstractmethod
33
+ def read(cls) -> Self | dict[str, object] | str | None: ...
34
+
35
+ @classmethod
36
+ def load(cls) -> Self:
37
+ data = cls.read() or {}
38
+ return cls.model_validate_json(data) if isinstance(data, str) else cls.model_validate(data)
39
+
40
+ @model_validator(mode="after")
41
+ @abstractmethod
42
+ def write(self) -> Self: ...
43
+
44
+
45
+ class Config(FileSyncSettings):
46
+ toml_file: ClassVar = Path("config.toml")
47
+
48
+ # Downloading Posts & Writing Examples
49
+ download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
50
+ data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
51
+
52
+ # Writing Examples
53
+ max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
54
+ custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
55
+
56
+ # Writing Examples & Fine-Tuning
57
+ examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
58
+
59
+ # Writing Examples & Generating
60
+ developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
61
+ user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
62
+
63
+ # Fine-Tuning
64
+ expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
65
+ token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
66
+ job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
67
+
68
+ # Fine-Tuning & Generating
69
+ base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
70
+ fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
71
+
72
+ # Generating
73
+ upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
74
+ draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
75
+ tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
76
+ tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
77
+
78
+ @classmethod
79
+ @override
80
+ def read(cls) -> dict[str, object] | None:
81
+ return tomllib.loads(cls.toml_file.read_text("utf_8")) if cls.toml_file.exists() else None
82
+
83
+ @model_validator(mode="after")
84
+ @override
85
+ def write(self) -> Self:
86
+ if not self.download_blog_identifiers:
87
+ rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
88
+ self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
89
+
90
+ if not self.upload_blog_identifier:
91
+ rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
92
+ self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
93
+
94
+ toml_table = document()
95
+
96
+ for (name, field), value in zip(self.__class__.model_fields.items(), self.model_dump(mode="json").values(), strict=True):
97
+ if field.description is not None:
98
+ for line in field.description.split(". "):
99
+ toml_table.add(comment(f"{line.removesuffix('.')}."))
100
+
101
+ toml_table[name] = value
102
+
103
+ Path(self.toml_file).write_text(tomlkit.dumps(toml_table), encoding="utf_8")
104
+
105
+ return self
106
+
107
+
108
+ class Tokens(FileSyncSettings):
26
109
  class Tumblr(FullyValidatedModel):
27
110
  client_key: str = ""
28
111
  client_secret: str = ""
@@ -50,15 +133,13 @@ class Tokens(FullyValidatedModel):
50
133
  rich.print()
51
134
 
52
135
  @classmethod
53
- def read_from_keyring(cls) -> Self:
54
- if json_data := get_password(cls.service_name, cls.username):
55
- return cls.model_validate_json(json_data)
56
- return cls()
57
-
58
136
  @override
59
- def model_post_init(self, context: object) -> None:
60
- super().model_post_init(context)
137
+ def read(cls) -> str | None:
138
+ return get_password(cls.service_name, cls.username)
61
139
 
140
+ @model_validator(mode="after")
141
+ @override
142
+ def write(self) -> Self:
62
143
  if not self.openai_api_key or Confirm.ask("Reset OpenAI API key?", default=False):
63
144
  (self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
64
145
 
@@ -86,20 +167,33 @@ class Tokens(FullyValidatedModel):
86
167
 
87
168
  set_password(self.service_name, self.username, self.model_dump_json())
88
169
 
170
+ return self
171
+
172
+
173
+ class ResponseModel(FullyValidatedModel):
174
+ class Response(FullyValidatedModel):
175
+ class Blog(FullyValidatedModel):
176
+ posts: int
177
+
178
+ blog: Blog = Blog(posts=0)
179
+ posts: list[Any] = []
180
+
181
+ response: Response
182
+
89
183
 
90
184
  class Post(FullyValidatedModel):
91
185
  class Block(FullyValidatedModel):
92
- type: str = "text"
186
+ type: str
93
187
  text: str = ""
94
- blocks: list[int] = [] # noqa: RUF012
188
+ blocks: list[int] = []
95
189
 
96
190
  timestamp: SkipJsonSchema[int] = 0
97
- tags: Annotated[list[str], PlainSerializer(",".join)] = [] # noqa: RUF012
98
- state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
191
+ tags: Annotated[list[str], PlainSerializer(",".join)]
192
+ state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "published"
99
193
 
100
- content: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
101
- layout: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
102
- trail: SkipJsonSchema[list[Any]] = [] # noqa: RUF012
194
+ content: SkipJsonSchema[list[Block]] = []
195
+ layout: SkipJsonSchema[list[Block]] = []
196
+ trail: SkipJsonSchema[list[Any]] = []
103
197
 
104
198
  is_submission: SkipJsonSchema[bool] = False
105
199
 
tumblrbot/utils/tumblr.py CHANGED
@@ -3,7 +3,7 @@ from typing import Self
3
3
  from requests import HTTPError, Response
4
4
  from requests_oauthlib import OAuth1Session
5
5
 
6
- from tumblrbot.utils.models import Post, Tokens
6
+ from tumblrbot.utils.models import Post, ResponseModel, Tokens
7
7
 
8
8
 
9
9
  class TumblrSession(OAuth1Session):
@@ -19,12 +19,11 @@ class TumblrSession(OAuth1Session):
19
19
  try:
20
20
  response.raise_for_status()
21
21
  except HTTPError as error:
22
- if response.text:
23
- error.add_note(response.text)
22
+ error.add_note(response.text)
24
23
  raise
25
24
 
26
- def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
27
- return self.get(
25
+ def retrieve_published_posts(self, blog_identifier: str, after: int) -> ResponseModel:
26
+ response = self.get(
28
27
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
29
28
  params={
30
29
  "after": after,
@@ -32,9 +31,11 @@ class TumblrSession(OAuth1Session):
32
31
  "npf": True,
33
32
  },
34
33
  )
34
+ return ResponseModel.model_validate_json(response.content)
35
35
 
36
- def create_post(self, blog_identifier: str, post: Post) -> Response:
37
- return self.post(
36
+ def create_post(self, blog_identifier: str, post: Post) -> ResponseModel:
37
+ response = self.post(
38
38
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
39
39
  json=post.model_dump(),
40
40
  )
41
+ return ResponseModel.model_validate_json(response.content)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.4.6
3
+ Version: 1.5.0
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
5
  Requires-Python: >= 3.13
6
6
  Description-Content-Type: text/markdown
@@ -8,13 +8,13 @@ Requires-Dist: keyring
8
8
  Requires-Dist: openai
9
9
  Requires-Dist: pwinput
10
10
  Requires-Dist: pydantic
11
- Requires-Dist: pydantic-settings
12
11
  Requires-Dist: requests
13
12
  Requires-Dist: requests-oauthlib
14
13
  Requires-Dist: rich
15
14
  Requires-Dist: tiktoken
16
15
  Requires-Dist: tomlkit
17
- Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
16
+ Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
17
+ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
18
18
 
19
19
  # tumblrbot
20
20
 
@@ -31,8 +31,8 @@ Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
31
31
  [OpenAI]: https://pypi.org/project/openai
32
32
  [OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
33
33
  [OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
34
+ [OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
34
35
  [Fine-Tuning Portal]: https://platform.openai.com/finetune
35
- [Moderation API]: https://platform.openai.com/docs/api-reference/moderations
36
36
 
37
37
  [Tumblr]: https://tumblr.com
38
38
  [Tumblr Tokens]: https://tumblr.com/oauth/apps
@@ -57,18 +57,16 @@ Features:
57
57
  - An [interactive console][Main] for all steps of generating posts for the blog:
58
58
  1. Asks for [OpenAI] and [Tumblr] tokens.
59
59
  - Stores API tokens using [keyring].
60
- - Prevents API tokens from printing to the console.
61
60
  1. Retrieves [Tumblr] [OAuth] tokens.
62
61
  1. [Downloads posts][Download] from the [configured][config] [Tumblr] blogs.
63
62
  - Skips redownloading already downloaded posts.
64
63
  - Shows progress and previews the current post.
65
64
  1. [Creates examples][Examples] to fine-tune the model from your posts.
66
65
  - Filters out posts that contain more than just text data.
67
- - Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
68
- - Shows progress and previews the current post.
69
66
  - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
70
- 1. Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
67
+ 1. Filters out any posts flagged by the [OpenAI Moderation API].
71
68
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
69
+ - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
72
70
  - Resumes monitoring the same fine-tuning process when restarted.
73
71
  - Deletes the uploaded examples file if fine-tuning does not succeed (optional).
74
72
  - Stores the output model automatically when fine-tuning is completed.
@@ -82,6 +80,7 @@ Features:
82
80
  **To-Do:**
83
81
 
84
82
  - Add code documentation.
83
+ - Add reblog generation logic.
85
84
 
86
85
  **Known Issues:**
87
86
 
@@ -0,0 +1,15 @@
1
+ tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
3
+ tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ tumblrbot/flow/download.py,sha256=PUs7eM-1kGOb2RKijy3lW0zyvfFDwbxzTGhVghrWIhc,2012
5
+ tumblrbot/flow/examples.py,sha256=MlukrVdzpIwk_-37PpRsBGV5eX-lLlNUUYvuozXC_vw,3726
6
+ tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
7
+ tumblrbot/flow/generate.py,sha256=cfIdmLFNuuKjUgk4Jtp0aTf2u86jOAUFuziq71zjDME,2148
8
+ tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
10
+ tumblrbot/utils/models.py,sha256=wAS3ptbaQX3J6IlixAdhBD2wcs4BO64HD6JcCB7W6lg,9903
11
+ tumblrbot/utils/tumblr.py,sha256=AgrczLFyrxES66N4PwIrjxX3QcpGvh8HP-jw0lwtmc0,1427
12
+ tumblrbot-1.5.0.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
13
+ tumblrbot-1.5.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
14
+ tumblrbot-1.5.0.dist-info/METADATA,sha256=-mGGhfyRyVcO1M0PtNAoVakPqqMw0sAevp2XkaYrfgw,10129
15
+ tumblrbot-1.5.0.dist-info/RECORD,,
tumblrbot/utils/config.py DELETED
@@ -1,97 +0,0 @@
1
- from collections.abc import Sequence
2
- from pathlib import Path
3
- from typing import TYPE_CHECKING, Self, override
4
-
5
- import rich
6
- import tomlkit
7
- from openai.types import ChatModel
8
- from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt, model_validator
9
- from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
10
- from rich.prompt import Prompt
11
- from tomlkit import comment, document
12
-
13
- if TYPE_CHECKING:
14
- from _typeshed import StrPath
15
-
16
-
17
- class Config(BaseSettings):
18
- model_config = SettingsConfigDict(
19
- extra="ignore",
20
- validate_assignment=True,
21
- validate_return=True,
22
- validate_by_name=True,
23
- cli_parse_args=True,
24
- cli_avoid_json=True,
25
- cli_kebab_case=True,
26
- toml_file="config.toml",
27
- )
28
-
29
- # Downloading Posts & Writing Examples
30
- download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
31
- data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
32
-
33
- # Writing Examples
34
- max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
35
- custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
36
-
37
- # Writing Examples & Fine-Tuning
38
- examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
39
-
40
- # Writing Examples & Generating
41
- developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
42
- user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
43
-
44
- # Fine-Tuning
45
- expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
46
- token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
47
- job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
48
-
49
- # Fine-Tuning & Generating
50
- base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
51
- fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
52
-
53
- # Generating
54
- upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
55
- draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
56
- tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
57
- tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
58
-
59
- @override
60
- @classmethod
61
- def settings_customise_sources(cls, settings_cls: type[BaseSettings], *args: PydanticBaseSettingsSource, **kwargs: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]:
62
- return (TomlConfigSettingsSource(settings_cls),)
63
-
64
- @model_validator(mode="after")
65
- def write_to_file(self) -> Self:
66
- if not self.download_blog_identifiers:
67
- rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
68
- self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
69
-
70
- if not self.upload_blog_identifier:
71
- rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
72
- self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
73
-
74
- toml_files = self.model_config.get("toml_file")
75
- if isinstance(toml_files, (Path, str)):
76
- self.dump_toml(toml_files)
77
- elif isinstance(toml_files, Sequence):
78
- for toml_file in toml_files:
79
- self.dump_toml(toml_file)
80
-
81
- return self
82
-
83
- def dump_toml(self, toml_file: "StrPath") -> None:
84
- toml_table = document()
85
-
86
- dumped_model = self.model_dump(mode="json")
87
- for name, field in self.__class__.model_fields.items():
88
- if field.description:
89
- for line in field.description.split(". "):
90
- toml_table.add(comment(f"{line.removesuffix('.')}."))
91
-
92
- toml_table[name] = dumped_model[name]
93
-
94
- Path(toml_file).write_text(
95
- tomlkit.dumps(toml_table),
96
- encoding="utf_8",
97
- )
@@ -1,16 +0,0 @@
1
- tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- tumblrbot/__main__.py,sha256=fV0lBm5FqTvBoq5g8soG1x0w0qebyR_oL5_339z4GpM,1460
3
- tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- tumblrbot/flow/download.py,sha256=xSfic66FR-LIG0KtJMcM5KVd2hiijCH3cyD7dzFKwLM,2042
5
- tumblrbot/flow/examples.py,sha256=6A6bqHwLIUtEr-dv-FcWbMdVVMFImHhuMmlJLyuSW6U,4005
6
- tumblrbot/flow/fine_tune.py,sha256=X9xR9uPgiK6DKQ-lZ6oqwTPypUx5D5S1MiKrFKSm5ng,5381
7
- tumblrbot/flow/generate.py,sha256=Q6nUtmoj28-rGUCs4V0fuovJshvFMlmipyu9GGqnmzM,2147
8
- tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- tumblrbot/utils/common.py,sha256=tnKBBiEGQPOWByLi0vT31BzpntndbIP_JekGkgjy15U,1841
10
- tumblrbot/utils/config.py,sha256=AlHZLx74-202BNwELNE3MzTg2Ru7R_Q5u8h9S4QGkJk,5460
11
- tumblrbot/utils/models.py,sha256=bLB96qrHBr18_X6-zHatc-bczej0kLUrZxyceITAWqo,4936
12
- tumblrbot/utils/tumblr.py,sha256=9lAbjO-27cpju9Wewv26lyQoogybs8b1y8mvYIuHkqw,1293
13
- tumblrbot-1.4.6.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
14
- tumblrbot-1.4.6.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
15
- tumblrbot-1.4.6.dist-info/METADATA,sha256=IotLVzlM8nw8XwxwBt-TjGozYHEKuJRD1n1hdbevs1Q,10183
16
- tumblrbot-1.4.6.dist-info/RECORD,,