tumblrbot 1.4.5__py3-none-any.whl → 1.4.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tumblrbot/__main__.py CHANGED
@@ -1,4 +1,4 @@
1
- from openai import DefaultHttpxClient, OpenAI
1
+ from openai import OpenAI
2
2
  from rich.prompt import Confirm
3
3
  from rich.traceback import install
4
4
 
@@ -14,11 +14,8 @@ from tumblrbot.utils.tumblr import TumblrSession
14
14
  def main() -> None:
15
15
  install()
16
16
 
17
- tokens = Tokens.read_from_keyring()
18
- with (
19
- OpenAI(api_key=tokens.openai_api_key.get_secret_value(), http_client=DefaultHttpxClient(http2=True)) as openai,
20
- TumblrSession(tokens=tokens) as tumblr,
21
- ):
17
+ tokens = Tokens.load()
18
+ with OpenAI(api_key=tokens.openai_api_key) as openai, TumblrSession(tokens) as tumblr:
22
19
  if Confirm.ask("Download latest posts?", default=False):
23
20
  PostDownloader(openai=openai, tumblr=tumblr).main()
24
21
 
@@ -18,7 +18,7 @@ class PostDownloader(FlowClass):
18
18
  completed = 0
19
19
  after = 0
20
20
  if data_path.exists():
21
- lines = data_path.read_text("utf_8").splitlines() if data_path.exists() else []
21
+ lines = data_path.read_bytes().splitlines() if data_path.exists() else []
22
22
  completed = len(lines)
23
23
  if lines:
24
24
  after = Post.model_validate_json(lines[-1]).timestamp
@@ -1,11 +1,11 @@
1
1
  from collections.abc import Generator
2
+ from itertools import batched
2
3
  from json import loads
3
4
  from math import ceil
4
5
  from re import search
5
6
  from typing import IO, override
6
7
 
7
8
  import rich
8
- from more_itertools import chunked
9
9
  from openai import BadRequestError
10
10
  from rich.prompt import Confirm
11
11
 
@@ -49,7 +49,7 @@ class ExamplesWriter(FlowClass):
49
49
  self.config.custom_prompts_file.parent.mkdir(parents=True, exist_ok=True)
50
50
  self.config.custom_prompts_file.touch(exist_ok=True)
51
51
 
52
- with self.config.custom_prompts_file.open("r", encoding="utf_8") as fp:
52
+ with self.config.custom_prompts_file.open("rb") as fp:
53
53
  for line in fp:
54
54
  data: dict[str, str] = loads(line)
55
55
  yield from data.items()
@@ -58,18 +58,18 @@ class ExamplesWriter(FlowClass):
58
58
  posts = self.get_valid_posts()
59
59
 
60
60
  if Confirm.ask("[gray62]Remove posts flagged by the OpenAI moderation? This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
61
- chunk_size = self.get_moderation_chunk_limit()
61
+ batch_size = self.get_moderation_batch_size()
62
62
  posts = list(posts)
63
63
  removed = 0
64
64
 
65
65
  with PreviewLive() as live:
66
- for chunk in live.progress.track(
67
- chunked(posts, chunk_size),
68
- ceil(len(posts) / chunk_size),
66
+ for batch in live.progress.track(
67
+ batched(posts, batch_size, strict=False),
68
+ ceil(len(posts) / batch_size),
69
69
  description="Removing flagged posts...",
70
70
  ):
71
- response = self.openai.moderations.create(input=list(map(Post.get_content_text, chunk)))
72
- for post, moderation in zip(chunk, response.results, strict=True):
71
+ response = self.openai.moderations.create(input=list(map(Post.get_content_text, batch)))
72
+ for post, moderation in zip(batch, response.results, strict=True):
73
73
  if moderation.flagged:
74
74
  removed += 1
75
75
  live.custom_update(post)
@@ -81,18 +81,17 @@ class ExamplesWriter(FlowClass):
81
81
 
82
82
  def get_valid_posts(self) -> Generator[Post]:
83
83
  for data_path in self.get_data_paths():
84
- with data_path.open(encoding="utf_8") as fp:
84
+ with data_path.open("rb") as fp:
85
85
  for line in fp:
86
86
  post = Post.model_validate_json(line)
87
- if not (post.is_submission or post.trail) and post.only_text_blocks() and post.get_content_text():
87
+ if post.valid_text_post():
88
88
  yield post
89
89
 
90
- def get_moderation_chunk_limit(self) -> int:
91
- test_n = 1000
90
+ def get_moderation_batch_size(self) -> int:
92
91
  try:
93
- self.openai.moderations.create(input=[""] * test_n)
92
+ self.openai.moderations.create(input=[""] * self.config.max_moderation_batch_size)
94
93
  except BadRequestError as error:
95
94
  message = error.response.json()["error"]["message"]
96
95
  if match := search(r"(\d+)\.", message):
97
96
  return int(match.group(1))
98
- return test_n
97
+ return self.config.max_moderation_batch_size
@@ -98,8 +98,8 @@ class FineTuner(FlowClass):
98
98
  if job.status == "failed" and job.error is not None:
99
99
  raise RuntimeError(job.error.message)
100
100
 
101
- if job.fine_tuned_model:
102
- self.config.fine_tuned_model = job.fine_tuned_model or ""
101
+ if job.fine_tuned_model is not None:
102
+ self.config.fine_tuned_model = job.fine_tuned_model
103
103
 
104
104
  def print_estimates(self) -> None:
105
105
  estimated_tokens = sum(self.count_tokens())
tumblrbot/utils/common.py CHANGED
@@ -1,4 +1,5 @@
1
1
  from abc import abstractmethod
2
+ from pathlib import Path
2
3
  from random import choice
3
4
  from typing import ClassVar, Self, override
4
5
 
@@ -10,15 +11,14 @@ from rich.live import Live
10
11
  from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
11
12
  from rich.table import Table
12
13
 
13
- from tumblrbot.utils.config import Config, Path
14
- from tumblrbot.utils.models import FullyValidatedModel
14
+ from tumblrbot.utils.models import Config, FullyValidatedModel
15
15
  from tumblrbot.utils.tumblr import TumblrSession
16
16
 
17
17
 
18
18
  class FlowClass(FullyValidatedModel):
19
19
  model_config = ConfigDict(arbitrary_types_allowed=True)
20
20
 
21
- config: ClassVar = Config() # pyright: ignore[reportCallIssue]
21
+ config: ClassVar = Config.load()
22
22
 
23
23
  openai: OpenAI
24
24
  tumblr: TumblrSession
tumblrbot/utils/models.py CHANGED
@@ -1,24 +1,20 @@
1
+ import tomllib
2
+ from abc import abstractmethod
1
3
  from collections.abc import Generator
4
+ from pathlib import Path
2
5
  from typing import Annotated, Any, ClassVar, Literal, Self, override
3
6
 
4
7
  import rich
8
+ import tomlkit
5
9
  from keyring import get_password, set_password
6
- from niquests import Session
7
- from openai import BaseModel
10
+ from openai.types import ChatModel
8
11
  from pwinput import pwinput
9
- from pydantic import ConfigDict, PlainSerializer, SecretStr
12
+ from pydantic import BaseModel, ConfigDict, Field, NonNegativeFloat, PlainSerializer, PositiveFloat, PositiveInt, model_validator
10
13
  from pydantic.json_schema import SkipJsonSchema
11
14
  from requests_oauthlib import OAuth1Session
12
15
  from rich.panel import Panel
13
- from rich.prompt import Confirm
14
-
15
- type SerializableSecretStr = Annotated[
16
- SecretStr,
17
- PlainSerializer(
18
- SecretStr.get_secret_value,
19
- when_used="json-unless-none",
20
- ),
21
- ]
16
+ from rich.prompt import Confirm, Prompt
17
+ from tomlkit import comment, document
22
18
 
23
19
 
24
20
  class FullyValidatedModel(BaseModel):
@@ -31,84 +27,162 @@ class FullyValidatedModel(BaseModel):
31
27
  )
32
28
 
33
29
 
34
- class Tokens(FullyValidatedModel):
30
+ class FileSyncSettings(FullyValidatedModel):
31
+ @classmethod
32
+ @abstractmethod
33
+ def read(cls) -> Self | dict[str, object] | str | None: ...
34
+
35
+ @classmethod
36
+ def load(cls) -> Self:
37
+ data = cls.read() or {}
38
+ return cls.model_validate_json(data) if isinstance(data, str) else cls.model_validate(data)
39
+
40
+ @model_validator(mode="after")
41
+ @abstractmethod
42
+ def write(self) -> Self: ...
43
+
44
+
45
+ class Config(FileSyncSettings):
46
+ toml_file: ClassVar = Path("config.toml")
47
+
48
+ # Downloading Posts & Writing Examples
49
+ download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
50
+ data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
51
+
52
+ # Writing Examples
53
+ max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
54
+ custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
55
+
56
+ # Writing Examples & Fine-Tuning
57
+ examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
58
+
59
+ # Writing Examples & Generating
60
+ developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
61
+ user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
62
+
63
+ # Fine-Tuning
64
+ expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
65
+ token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
66
+ job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
67
+
68
+ # Fine-Tuning & Generating
69
+ base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
70
+ fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
71
+
72
+ # Generating
73
+ upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
74
+ draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
75
+ tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
76
+ tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
77
+
78
+ @classmethod
79
+ @override
80
+ def read(cls) -> dict[str, object] | None:
81
+ return tomllib.loads(cls.toml_file.read_text("utf_8")) if cls.toml_file.exists() else None
82
+
83
+ @model_validator(mode="after")
84
+ @override
85
+ def write(self) -> Self:
86
+ if not self.download_blog_identifiers:
87
+ rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
88
+ self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
89
+
90
+ if not self.upload_blog_identifier:
91
+ rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
92
+ self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
93
+
94
+ toml_table = document()
95
+
96
+ for (name, field), value in zip(self.__class__.model_fields.items(), self.model_dump(mode="json").values(), strict=True):
97
+ if field.description is not None:
98
+ for line in field.description.split(". "):
99
+ toml_table.add(comment(f"{line.removesuffix('.')}."))
100
+
101
+ toml_table[name] = value
102
+
103
+ Path(self.toml_file).write_text(tomlkit.dumps(toml_table), encoding="utf_8")
104
+
105
+ return self
106
+
107
+
108
+ class Tokens(FileSyncSettings):
35
109
  class Tumblr(FullyValidatedModel):
36
- client_key: SerializableSecretStr = SecretStr("")
37
- client_secret: SerializableSecretStr = SecretStr("")
38
- resource_owner_key: SerializableSecretStr = SecretStr("")
39
- resource_owner_secret: SerializableSecretStr = SecretStr("")
110
+ client_key: str = ""
111
+ client_secret: str = ""
112
+ resource_owner_key: str = ""
113
+ resource_owner_secret: str = ""
40
114
 
41
115
  service_name: ClassVar = "tumblrbot"
42
116
  username: ClassVar = "tokens"
43
117
 
44
- openai_api_key: SerializableSecretStr = SecretStr("")
118
+ openai_api_key: str = ""
45
119
  tumblr: Tumblr = Tumblr()
46
120
 
47
121
  @staticmethod
48
- def online_token_prompt(url: str, *tokens: str) -> Generator[SecretStr]:
122
+ def get_oauth_tokens(token: dict[str, str]) -> tuple[str, str]:
123
+ return token["oauth_token"], token["oauth_token_secret"]
124
+
125
+ @staticmethod
126
+ def online_token_prompt(url: str, *tokens: str) -> Generator[str]:
49
127
  formatted_token_string = " and ".join(f"[cyan]{token}[/]" for token in tokens)
50
128
 
51
129
  rich.print(f"Retrieve your {formatted_token_string} from: {url}")
52
130
  for token in tokens:
53
- yield SecretStr(pwinput(f"Enter your {token} (masked): ").strip())
131
+ yield pwinput(f"Enter your {token} (masked): ").strip()
54
132
 
55
133
  rich.print()
56
134
 
57
135
  @classmethod
58
- def read_from_keyring(cls) -> Self:
59
- if json_data := get_password(cls.service_name, cls.username):
60
- return cls.model_validate_json(json_data)
61
- return cls()
62
-
63
136
  @override
64
- def model_post_init(self, context: object) -> None:
65
- super().model_post_init(context)
137
+ def read(cls) -> str | None:
138
+ return get_password(cls.service_name, cls.username)
66
139
 
67
- if not self.openai_api_key.get_secret_value() or Confirm.ask("Reset OpenAI API key?", default=False):
140
+ @model_validator(mode="after")
141
+ @override
142
+ def write(self) -> Self:
143
+ if not self.openai_api_key or Confirm.ask("Reset OpenAI API key?", default=False):
68
144
  (self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
69
145
 
70
- if not all(self.tumblr.model_dump(mode="json").values()) or Confirm.ask("Reset Tumblr API tokens?", default=False):
146
+ if not all(self.tumblr.model_dump().values()) or Confirm.ask("Reset Tumblr API tokens?", default=False):
71
147
  self.tumblr.client_key, self.tumblr.client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
72
148
 
73
- OAuth1Session.__bases__ = (Session,)
74
-
75
149
  with OAuth1Session(
76
- self.tumblr.client_key.get_secret_value(),
77
- self.tumblr.client_secret.get_secret_value(),
150
+ self.tumblr.client_key,
151
+ self.tumblr.client_secret,
78
152
  ) as oauth_session:
79
153
  fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token")
80
154
  full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize")
81
155
  (redirect_response,) = self.online_token_prompt(full_authorize_url, "full redirect URL")
82
- oauth_response = oauth_session.parse_authorization_response(redirect_response.get_secret_value())
156
+ oauth_response = oauth_session.parse_authorization_response(redirect_response)
83
157
 
84
158
  with OAuth1Session(
85
- self.tumblr.client_key.get_secret_value(),
86
- self.tumblr.client_secret.get_secret_value(),
87
- fetch_response["oauth_token"],
88
- fetch_response["oauth_token_secret"],
159
+ self.tumblr.client_key,
160
+ self.tumblr.client_secret,
161
+ *self.get_oauth_tokens(fetch_response),
89
162
  verifier=oauth_response["oauth_verifier"],
90
163
  ) as oauth_session:
91
164
  oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token")
92
165
 
93
- self.tumblr.resource_owner_key = oauth_tokens["oauth_token"]
94
- self.tumblr.resource_owner_secret = oauth_tokens["oauth_token_secret"]
166
+ self.tumblr.resource_owner_key, self.tumblr.resource_owner_secret = self.get_oauth_tokens(oauth_tokens)
95
167
 
96
168
  set_password(self.service_name, self.username, self.model_dump_json())
97
169
 
170
+ return self
171
+
98
172
 
99
173
  class Post(FullyValidatedModel):
100
174
  class Block(FullyValidatedModel):
101
175
  type: str = "text"
102
176
  text: str = ""
103
- blocks: list[int] = [] # noqa: RUF012
177
+ blocks: list[int] = []
104
178
 
105
179
  timestamp: SkipJsonSchema[int] = 0
106
- tags: Annotated[list[str], PlainSerializer(",".join)] = [] # noqa: RUF012
180
+ tags: Annotated[list[str], PlainSerializer(",".join)] = []
107
181
  state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
108
182
 
109
- content: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
110
- layout: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
111
- trail: SkipJsonSchema[list[Any]] = [] # noqa: RUF012
183
+ content: SkipJsonSchema[list[Block]] = []
184
+ layout: SkipJsonSchema[list[Block]] = []
185
+ trail: SkipJsonSchema[list[Any]] = []
112
186
 
113
187
  is_submission: SkipJsonSchema[bool] = False
114
188
 
@@ -120,8 +194,8 @@ class Post(FullyValidatedModel):
120
194
  subtitle_align="left",
121
195
  )
122
196
 
123
- def only_text_blocks(self) -> bool:
124
- return all(block.type == "text" for block in self.content) and not any(block.type == "ask" for block in self.layout)
197
+ def valid_text_post(self) -> bool:
198
+ return bool(self.content) and all(block.type == "text" for block in self.content) and not (self.is_submission or self.trail or any(block.type == "ask" for block in self.layout))
125
199
 
126
200
  def get_content_text(self) -> str:
127
201
  return "\n\n".join(block.text for block in self.content)
tumblrbot/utils/tumblr.py CHANGED
@@ -1,47 +1,39 @@
1
- from dataclasses import dataclass
2
1
  from typing import Self
3
2
 
4
- from niquests import HTTPError, PreparedRequest, Response, Session
5
- from requests_oauthlib import OAuth1
3
+ from requests import HTTPError, Response
4
+ from requests_oauthlib import OAuth1Session
6
5
 
7
6
  from tumblrbot.utils.models import Post, Tokens
8
7
 
9
8
 
10
- @dataclass
11
- class TumblrSession(Session):
12
- tokens: Tokens
13
-
14
- def __post_init__(self) -> None:
15
- super().__init__(multiplexed=True, happy_eyeballs=True)
16
-
17
- self.auth = OAuth1(**self.tokens.tumblr.model_dump(mode="json"))
9
+ class TumblrSession(OAuth1Session):
10
+ def __init__(self, tokens: Tokens) -> None:
11
+ super().__init__(**tokens.tumblr.model_dump())
18
12
  self.hooks["response"].append(self.response_hook)
19
13
 
20
14
  def __enter__(self) -> Self:
21
15
  super().__enter__()
22
16
  return self
23
17
 
24
- def response_hook(self, response: PreparedRequest | Response) -> None:
25
- if isinstance(response, Response):
26
- try:
27
- response.raise_for_status()
28
- except HTTPError as error:
29
- if response.text:
30
- error.add_note(response.text)
31
- raise
18
+ def response_hook(self, response: Response, *_args: object, **_kwargs: object) -> None:
19
+ try:
20
+ response.raise_for_status()
21
+ except HTTPError as error:
22
+ error.add_note(response.text)
23
+ raise
32
24
 
33
25
  def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
34
26
  return self.get(
35
27
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
36
28
  params={
37
- "after": str(after),
29
+ "after": after,
38
30
  "sort": "asc",
39
- "npf": str(True),
31
+ "npf": True,
40
32
  },
41
33
  )
42
34
 
43
35
  def create_post(self, blog_identifier: str, post: Post) -> Response:
44
36
  return self.post(
45
37
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
46
- json=post.model_dump(mode="json"),
38
+ json=post.model_dump(),
47
39
  )
@@ -1,22 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.4.5
3
+ Version: 1.4.7
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
5
  Requires-Python: >= 3.13
6
6
  Description-Content-Type: text/markdown
7
- Requires-Dist: httpx[http2]
8
7
  Requires-Dist: keyring
9
- Requires-Dist: more-itertools
10
- Requires-Dist: niquests[speedups, http3]
11
8
  Requires-Dist: openai
12
9
  Requires-Dist: pwinput
13
10
  Requires-Dist: pydantic
14
- Requires-Dist: pydantic-settings
11
+ Requires-Dist: requests
15
12
  Requires-Dist: requests-oauthlib
16
13
  Requires-Dist: rich
17
14
  Requires-Dist: tiktoken
18
15
  Requires-Dist: tomlkit
19
- Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
16
+ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
20
17
 
21
18
  # tumblrbot
22
19
 
@@ -59,7 +56,6 @@ Features:
59
56
  - An [interactive console][Main] for all steps of generating posts for the blog:
60
57
  1. Asks for [OpenAI] and [Tumblr] tokens.
61
58
  - Stores API tokens using [keyring].
62
- - Prevents API tokens from printing to the console.
63
59
  1. Retrieves [Tumblr] [OAuth] tokens.
64
60
  1. [Downloads posts][Download] from the [configured][config] [Tumblr] blogs.
65
61
  - Skips redownloading already downloaded posts.
@@ -69,8 +65,8 @@ Features:
69
65
  - Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
70
66
  - Shows progress and previews the current post.
71
67
  - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
72
- 1. Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
73
68
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
69
+ - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
74
70
  - Resumes monitoring the same fine-tuning process when restarted.
75
71
  - Deletes the uploaded examples file if fine-tuning does not succeed (optional).
76
72
  - Stores the output model automatically when fine-tuning is completed.
@@ -0,0 +1,15 @@
1
+ tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tumblrbot/__main__.py,sha256=BenjVNlVZDy-ZlSWukEIguGLa6qXvZjhYSSWMqa8-0Q,1447
3
+ tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ tumblrbot/flow/download.py,sha256=wdXmtCnnArn8Zw7D2Hoa_KhH-k61j9w3cbYztgBkUlY,2036
5
+ tumblrbot/flow/examples.py,sha256=Th6vgiu3D2VloOx7otZlk164h3ifkJEwDk21YHMEYP0,3976
6
+ tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
7
+ tumblrbot/flow/generate.py,sha256=Q6nUtmoj28-rGUCs4V0fuovJshvFMlmipyu9GGqnmzM,2147
8
+ tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
10
+ tumblrbot/utils/models.py,sha256=Z0k16qJsZEO8tfmPp7X3edz-RgGCDLRSm7HrSDLGh1Y,9663
11
+ tumblrbot/utils/tumblr.py,sha256=6V9AjT-dyR2vuUkfqgqs52Ua5irhQJzhgQhV54xKyGM,1258
12
+ tumblrbot-1.4.7.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
13
+ tumblrbot-1.4.7.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
14
+ tumblrbot-1.4.7.dist-info/METADATA,sha256=q_PH1oU-d0PRdXDuL5Bjh-Y8gccZJVvLCCho2E_CLvc,10104
15
+ tumblrbot-1.4.7.dist-info/RECORD,,
tumblrbot/utils/config.py DELETED
@@ -1,97 +0,0 @@
1
- from collections.abc import Sequence
2
- from pathlib import Path
3
- from typing import TYPE_CHECKING, Self, override
4
-
5
- import rich
6
- import tomlkit
7
- from openai.types import ChatModel
8
- from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt, Secret, model_validator
9
- from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
10
- from rich.prompt import Prompt
11
- from tomlkit import comment, document
12
-
13
- if TYPE_CHECKING:
14
- from _typeshed import StrPath
15
-
16
-
17
- class Config(BaseSettings):
18
- model_config = SettingsConfigDict(
19
- extra="ignore",
20
- validate_assignment=True,
21
- validate_return=True,
22
- validate_by_name=True,
23
- cli_parse_args=True,
24
- cli_avoid_json=True,
25
- cli_kebab_case=True,
26
- toml_file="config.toml",
27
- )
28
-
29
- # Downloading Posts & Writing Examples
30
- download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
31
- data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
32
-
33
- # Writing Examples
34
- custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
35
-
36
- # Writing Examples & Fine-Tuning
37
- examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
38
-
39
- # Writing Examples & Generating
40
- developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
41
- user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
42
-
43
- # Fine-Tuning
44
- expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
45
- token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
46
- job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
47
-
48
- # Fine-Tuning & Generating
49
- base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
50
- fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
51
-
52
- # Generating
53
- upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
54
- draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
55
- tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
56
- tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
57
-
58
- @override
59
- @classmethod
60
- def settings_customise_sources(cls, settings_cls: type[BaseSettings], *args: PydanticBaseSettingsSource, **kwargs: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]:
61
- return (TomlConfigSettingsSource(settings_cls),)
62
-
63
- @model_validator(mode="after")
64
- def write_to_file(self) -> Self:
65
- if not self.download_blog_identifiers:
66
- rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
67
- self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
68
-
69
- if not self.upload_blog_identifier:
70
- rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
71
- self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
72
-
73
- toml_files = self.model_config.get("toml_file")
74
- if isinstance(toml_files, (Path, str)):
75
- self.dump_toml(toml_files)
76
- elif isinstance(toml_files, Sequence):
77
- for toml_file in toml_files:
78
- self.dump_toml(toml_file)
79
-
80
- return self
81
-
82
- def dump_toml(self, toml_file: "StrPath") -> None:
83
- toml_table = document()
84
-
85
- dumped_model = self.model_dump(mode="json")
86
- for name, field in self.__class__.model_fields.items():
87
- if field.description:
88
- for line in field.description.split(". "):
89
- toml_table.add(comment(f"{line.removesuffix('.')}."))
90
-
91
- value = getattr(self, name)
92
- toml_table[name] = value.get_secret_value() if isinstance(value, Secret) else dumped_model[name]
93
-
94
- Path(toml_file).write_text(
95
- tomlkit.dumps(toml_table),
96
- encoding="utf_8",
97
- )
@@ -1,16 +0,0 @@
1
- tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- tumblrbot/__main__.py,sha256=SbMPe0vwfPXdYQWy9UKkn0FLngU_QtznJTWybVnGFoA,1578
3
- tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- tumblrbot/flow/download.py,sha256=xSfic66FR-LIG0KtJMcM5KVd2hiijCH3cyD7dzFKwLM,2042
5
- tumblrbot/flow/examples.py,sha256=5k14Eyqi9MRFF_k7Vp7WhjP9khq7YDDgxz54Shs1nU8,4031
6
- tumblrbot/flow/fine_tune.py,sha256=X9xR9uPgiK6DKQ-lZ6oqwTPypUx5D5S1MiKrFKSm5ng,5381
7
- tumblrbot/flow/generate.py,sha256=Q6nUtmoj28-rGUCs4V0fuovJshvFMlmipyu9GGqnmzM,2147
8
- tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- tumblrbot/utils/common.py,sha256=tnKBBiEGQPOWByLi0vT31BzpntndbIP_JekGkgjy15U,1841
10
- tumblrbot/utils/config.py,sha256=WL7LW8mKJy27DxcKK8JepO09pc4zz6Lwii8haV9vXLE,5399
11
- tumblrbot/utils/models.py,sha256=R2TPA35b8Lp4FalQrdzS0mhUClgBGx3FtdiGAZMU3tg,5355
12
- tumblrbot/utils/tumblr.py,sha256=bWTE1AAcOY1vf0NLoJCQCra0o0y7mJao4KvojgARJzI,1521
13
- tumblrbot-1.4.5.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
14
- tumblrbot-1.4.5.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
15
- tumblrbot-1.4.5.dist-info/METADATA,sha256=a2Et1vIW4HXA_fl_nPNP23g1XSVdTWTeq343ZPNXcoM,10258
16
- tumblrbot-1.4.5.dist-info/RECORD,,