tumblrbot 1.4.5__tar.gz → 1.4.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,4 @@
1
1
  # Custom
2
- .vscode
3
2
  data
4
3
  *.toml
5
4
  *.jsonl
@@ -192,7 +191,7 @@ cython_debug/
192
191
  # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
193
192
  # and can be added to the global gitignore or merged into this file. However, if you prefer,
194
193
  # you could uncomment the following to ignore the entire vscode folder
195
- # .vscode/
194
+ .vscode/
196
195
 
197
196
  # Ruff stuff:
198
197
  .ruff_cache/
@@ -200,13 +199,6 @@ cython_debug/
200
199
  # PyPI configuration file
201
200
  .pypirc
202
201
 
203
- # Cursor
204
- # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
205
- # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
206
- # refer to https://docs.cursor.com/context/ignore-files
207
- .cursorignore
208
- .cursorindexingignore
209
-
210
202
  # Marimo
211
203
  marimo/_static/
212
204
  marimo/_lsp/
@@ -1,22 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.4.5
3
+ Version: 1.4.7
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
5
  Requires-Python: >= 3.13
6
6
  Description-Content-Type: text/markdown
7
- Requires-Dist: httpx[http2]
8
7
  Requires-Dist: keyring
9
- Requires-Dist: more-itertools
10
- Requires-Dist: niquests[speedups, http3]
11
8
  Requires-Dist: openai
12
9
  Requires-Dist: pwinput
13
10
  Requires-Dist: pydantic
14
- Requires-Dist: pydantic-settings
11
+ Requires-Dist: requests
15
12
  Requires-Dist: requests-oauthlib
16
13
  Requires-Dist: rich
17
14
  Requires-Dist: tiktoken
18
15
  Requires-Dist: tomlkit
19
- Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
16
+ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
20
17
 
21
18
  # tumblrbot
22
19
 
@@ -59,7 +56,6 @@ Features:
59
56
  - An [interactive console][Main] for all steps of generating posts for the blog:
60
57
  1. Asks for [OpenAI] and [Tumblr] tokens.
61
58
  - Stores API tokens using [keyring].
62
- - Prevents API tokens from printing to the console.
63
59
  1. Retrieves [Tumblr] [OAuth] tokens.
64
60
  1. [Downloads posts][Download] from the [configured][config] [Tumblr] blogs.
65
61
  - Skips redownloading already downloaded posts.
@@ -69,8 +65,8 @@ Features:
69
65
  - Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
70
66
  - Shows progress and previews the current post.
71
67
  - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
72
- 1. Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
73
68
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
69
+ - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
74
70
  - Resumes monitoring the same fine-tuning process when restarted.
75
71
  - Deletes the uploaded examples file if fine-tuning does not succeed (optional).
76
72
  - Stores the output model automatically when fine-tuning is completed.
@@ -39,7 +39,6 @@ Features:
39
39
  - An [interactive console][Main] for all steps of generating posts for the blog:
40
40
  1. Asks for [OpenAI] and [Tumblr] tokens.
41
41
  - Stores API tokens using [keyring].
42
- - Prevents API tokens from printing to the console.
43
42
  1. Retrieves [Tumblr] [OAuth] tokens.
44
43
  1. [Downloads posts][Download] from the [configured][config] [Tumblr] blogs.
45
44
  - Skips redownloading already downloaded posts.
@@ -49,8 +48,8 @@ Features:
49
48
  - Filters out any posts flagged by the [OpenAI] [Moderation API] (optional).
50
49
  - Shows progress and previews the current post.
51
50
  - Adds custom user messages and assistant responses to the dataset from the [configured][config] file.
52
- 1. Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
53
51
  1. [Uploads examples][Fine-Tune] to [OpenAI] and begins the fine-tuning process.
52
+ - Provides cost estimates if the currently saved examples are used to fine-tune the [configured][config] model.
54
53
  - Resumes monitoring the same fine-tuning process when restarted.
55
54
  - Deletes the uploaded examples file if fine-tuning does not succeed (optional).
56
55
  - Stores the output model automatically when fine-tuning is completed.
@@ -1,18 +1,15 @@
1
1
  [project]
2
2
  name = "tumblrbot"
3
- version = "1.4.5"
3
+ version = "1.4.7"
4
4
  description = "An updated bot that posts to Tumblr, based on your very own blog!"
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.13"
7
7
  dependencies = [
8
- "httpx[http2]",
9
8
  "keyring",
10
- "more-itertools",
11
- "niquests[speedups,http3]",
12
9
  "openai",
13
10
  "pwinput",
14
11
  "pydantic",
15
- "pydantic-settings",
12
+ "requests",
16
13
  "requests-oauthlib",
17
14
  "rich",
18
15
  "tiktoken",
@@ -20,11 +17,11 @@ dependencies = [
20
17
  ]
21
18
 
22
19
  [project.urls]
23
- Source = "https://github.com/MaidThatPrograms/tumblrbot"
20
+ Source = "https://github.com/MaidScientistIzutsumiMarin/tumblrbot"
24
21
 
25
22
  [project.scripts]
26
23
  tumblrbot = "tumblrbot.__main__:main"
27
24
 
28
25
  [build-system]
29
- requires = ["hatchling"]
30
- build-backend = "hatchling.build"
26
+ requires = ["flit_core"]
27
+ build-backend = "flit_core.buildapi"
@@ -1,4 +1,4 @@
1
- from openai import DefaultHttpxClient, OpenAI
1
+ from openai import OpenAI
2
2
  from rich.prompt import Confirm
3
3
  from rich.traceback import install
4
4
 
@@ -14,11 +14,8 @@ from tumblrbot.utils.tumblr import TumblrSession
14
14
  def main() -> None:
15
15
  install()
16
16
 
17
- tokens = Tokens.read_from_keyring()
18
- with (
19
- OpenAI(api_key=tokens.openai_api_key.get_secret_value(), http_client=DefaultHttpxClient(http2=True)) as openai,
20
- TumblrSession(tokens=tokens) as tumblr,
21
- ):
17
+ tokens = Tokens.load()
18
+ with OpenAI(api_key=tokens.openai_api_key) as openai, TumblrSession(tokens) as tumblr:
22
19
  if Confirm.ask("Download latest posts?", default=False):
23
20
  PostDownloader(openai=openai, tumblr=tumblr).main()
24
21
 
@@ -18,7 +18,7 @@ class PostDownloader(FlowClass):
18
18
  completed = 0
19
19
  after = 0
20
20
  if data_path.exists():
21
- lines = data_path.read_text("utf_8").splitlines() if data_path.exists() else []
21
+ lines = data_path.read_bytes().splitlines() if data_path.exists() else []
22
22
  completed = len(lines)
23
23
  if lines:
24
24
  after = Post.model_validate_json(lines[-1]).timestamp
@@ -1,11 +1,11 @@
1
1
  from collections.abc import Generator
2
+ from itertools import batched
2
3
  from json import loads
3
4
  from math import ceil
4
5
  from re import search
5
6
  from typing import IO, override
6
7
 
7
8
  import rich
8
- from more_itertools import chunked
9
9
  from openai import BadRequestError
10
10
  from rich.prompt import Confirm
11
11
 
@@ -49,7 +49,7 @@ class ExamplesWriter(FlowClass):
49
49
  self.config.custom_prompts_file.parent.mkdir(parents=True, exist_ok=True)
50
50
  self.config.custom_prompts_file.touch(exist_ok=True)
51
51
 
52
- with self.config.custom_prompts_file.open("r", encoding="utf_8") as fp:
52
+ with self.config.custom_prompts_file.open("rb") as fp:
53
53
  for line in fp:
54
54
  data: dict[str, str] = loads(line)
55
55
  yield from data.items()
@@ -58,18 +58,18 @@ class ExamplesWriter(FlowClass):
58
58
  posts = self.get_valid_posts()
59
59
 
60
60
  if Confirm.ask("[gray62]Remove posts flagged by the OpenAI moderation? This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
61
- chunk_size = self.get_moderation_chunk_limit()
61
+ batch_size = self.get_moderation_batch_size()
62
62
  posts = list(posts)
63
63
  removed = 0
64
64
 
65
65
  with PreviewLive() as live:
66
- for chunk in live.progress.track(
67
- chunked(posts, chunk_size),
68
- ceil(len(posts) / chunk_size),
66
+ for batch in live.progress.track(
67
+ batched(posts, batch_size, strict=False),
68
+ ceil(len(posts) / batch_size),
69
69
  description="Removing flagged posts...",
70
70
  ):
71
- response = self.openai.moderations.create(input=list(map(Post.get_content_text, chunk)))
72
- for post, moderation in zip(chunk, response.results, strict=True):
71
+ response = self.openai.moderations.create(input=list(map(Post.get_content_text, batch)))
72
+ for post, moderation in zip(batch, response.results, strict=True):
73
73
  if moderation.flagged:
74
74
  removed += 1
75
75
  live.custom_update(post)
@@ -81,18 +81,17 @@ class ExamplesWriter(FlowClass):
81
81
 
82
82
  def get_valid_posts(self) -> Generator[Post]:
83
83
  for data_path in self.get_data_paths():
84
- with data_path.open(encoding="utf_8") as fp:
84
+ with data_path.open("rb") as fp:
85
85
  for line in fp:
86
86
  post = Post.model_validate_json(line)
87
- if not (post.is_submission or post.trail) and post.only_text_blocks() and post.get_content_text():
87
+ if post.valid_text_post():
88
88
  yield post
89
89
 
90
- def get_moderation_chunk_limit(self) -> int:
91
- test_n = 1000
90
+ def get_moderation_batch_size(self) -> int:
92
91
  try:
93
- self.openai.moderations.create(input=[""] * test_n)
92
+ self.openai.moderations.create(input=[""] * self.config.max_moderation_batch_size)
94
93
  except BadRequestError as error:
95
94
  message = error.response.json()["error"]["message"]
96
95
  if match := search(r"(\d+)\.", message):
97
96
  return int(match.group(1))
98
- return test_n
97
+ return self.config.max_moderation_batch_size
@@ -98,8 +98,8 @@ class FineTuner(FlowClass):
98
98
  if job.status == "failed" and job.error is not None:
99
99
  raise RuntimeError(job.error.message)
100
100
 
101
- if job.fine_tuned_model:
102
- self.config.fine_tuned_model = job.fine_tuned_model or ""
101
+ if job.fine_tuned_model is not None:
102
+ self.config.fine_tuned_model = job.fine_tuned_model
103
103
 
104
104
  def print_estimates(self) -> None:
105
105
  estimated_tokens = sum(self.count_tokens())
@@ -1,4 +1,5 @@
1
1
  from abc import abstractmethod
2
+ from pathlib import Path
2
3
  from random import choice
3
4
  from typing import ClassVar, Self, override
4
5
 
@@ -10,15 +11,14 @@ from rich.live import Live
10
11
  from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
11
12
  from rich.table import Table
12
13
 
13
- from tumblrbot.utils.config import Config, Path
14
- from tumblrbot.utils.models import FullyValidatedModel
14
+ from tumblrbot.utils.models import Config, FullyValidatedModel
15
15
  from tumblrbot.utils.tumblr import TumblrSession
16
16
 
17
17
 
18
18
  class FlowClass(FullyValidatedModel):
19
19
  model_config = ConfigDict(arbitrary_types_allowed=True)
20
20
 
21
- config: ClassVar = Config() # pyright: ignore[reportCallIssue]
21
+ config: ClassVar = Config.load()
22
22
 
23
23
  openai: OpenAI
24
24
  tumblr: TumblrSession
@@ -0,0 +1,209 @@
1
+ import tomllib
2
+ from abc import abstractmethod
3
+ from collections.abc import Generator
4
+ from pathlib import Path
5
+ from typing import Annotated, Any, ClassVar, Literal, Self, override
6
+
7
+ import rich
8
+ import tomlkit
9
+ from keyring import get_password, set_password
10
+ from openai.types import ChatModel
11
+ from pwinput import pwinput
12
+ from pydantic import BaseModel, ConfigDict, Field, NonNegativeFloat, PlainSerializer, PositiveFloat, PositiveInt, model_validator
13
+ from pydantic.json_schema import SkipJsonSchema
14
+ from requests_oauthlib import OAuth1Session
15
+ from rich.panel import Panel
16
+ from rich.prompt import Confirm, Prompt
17
+ from tomlkit import comment, document
18
+
19
+
20
+ class FullyValidatedModel(BaseModel):
21
+ model_config = ConfigDict(
22
+ extra="ignore",
23
+ validate_assignment=True,
24
+ validate_default=True,
25
+ validate_return=True,
26
+ validate_by_name=True,
27
+ )
28
+
29
+
30
+ class FileSyncSettings(FullyValidatedModel):
31
+ @classmethod
32
+ @abstractmethod
33
+ def read(cls) -> Self | dict[str, object] | str | None: ...
34
+
35
+ @classmethod
36
+ def load(cls) -> Self:
37
+ data = cls.read() or {}
38
+ return cls.model_validate_json(data) if isinstance(data, str) else cls.model_validate(data)
39
+
40
+ @model_validator(mode="after")
41
+ @abstractmethod
42
+ def write(self) -> Self: ...
43
+
44
+
45
+ class Config(FileSyncSettings):
46
+ toml_file: ClassVar = Path("config.toml")
47
+
48
+ # Downloading Posts & Writing Examples
49
+ download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
50
+ data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
51
+
52
+ # Writing Examples
53
+ max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
54
+ custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
55
+
56
+ # Writing Examples & Fine-Tuning
57
+ examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
58
+
59
+ # Writing Examples & Generating
60
+ developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
61
+ user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
62
+
63
+ # Fine-Tuning
64
+ expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
65
+ token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
66
+ job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
67
+
68
+ # Fine-Tuning & Generating
69
+ base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
70
+ fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
71
+
72
+ # Generating
73
+ upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
74
+ draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
75
+ tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
76
+ tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
77
+
78
+ @classmethod
79
+ @override
80
+ def read(cls) -> dict[str, object] | None:
81
+ return tomllib.loads(cls.toml_file.read_text("utf_8")) if cls.toml_file.exists() else None
82
+
83
+ @model_validator(mode="after")
84
+ @override
85
+ def write(self) -> Self:
86
+ if not self.download_blog_identifiers:
87
+ rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
88
+ self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
89
+
90
+ if not self.upload_blog_identifier:
91
+ rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
92
+ self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
93
+
94
+ toml_table = document()
95
+
96
+ for (name, field), value in zip(self.__class__.model_fields.items(), self.model_dump(mode="json").values(), strict=True):
97
+ if field.description is not None:
98
+ for line in field.description.split(". "):
99
+ toml_table.add(comment(f"{line.removesuffix('.')}."))
100
+
101
+ toml_table[name] = value
102
+
103
+ Path(self.toml_file).write_text(tomlkit.dumps(toml_table), encoding="utf_8")
104
+
105
+ return self
106
+
107
+
108
+ class Tokens(FileSyncSettings):
109
+ class Tumblr(FullyValidatedModel):
110
+ client_key: str = ""
111
+ client_secret: str = ""
112
+ resource_owner_key: str = ""
113
+ resource_owner_secret: str = ""
114
+
115
+ service_name: ClassVar = "tumblrbot"
116
+ username: ClassVar = "tokens"
117
+
118
+ openai_api_key: str = ""
119
+ tumblr: Tumblr = Tumblr()
120
+
121
+ @staticmethod
122
+ def get_oauth_tokens(token: dict[str, str]) -> tuple[str, str]:
123
+ return token["oauth_token"], token["oauth_token_secret"]
124
+
125
+ @staticmethod
126
+ def online_token_prompt(url: str, *tokens: str) -> Generator[str]:
127
+ formatted_token_string = " and ".join(f"[cyan]{token}[/]" for token in tokens)
128
+
129
+ rich.print(f"Retrieve your {formatted_token_string} from: {url}")
130
+ for token in tokens:
131
+ yield pwinput(f"Enter your {token} (masked): ").strip()
132
+
133
+ rich.print()
134
+
135
+ @classmethod
136
+ @override
137
+ def read(cls) -> str | None:
138
+ return get_password(cls.service_name, cls.username)
139
+
140
+ @model_validator(mode="after")
141
+ @override
142
+ def write(self) -> Self:
143
+ if not self.openai_api_key or Confirm.ask("Reset OpenAI API key?", default=False):
144
+ (self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
145
+
146
+ if not all(self.tumblr.model_dump().values()) or Confirm.ask("Reset Tumblr API tokens?", default=False):
147
+ self.tumblr.client_key, self.tumblr.client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
148
+
149
+ with OAuth1Session(
150
+ self.tumblr.client_key,
151
+ self.tumblr.client_secret,
152
+ ) as oauth_session:
153
+ fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token")
154
+ full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize")
155
+ (redirect_response,) = self.online_token_prompt(full_authorize_url, "full redirect URL")
156
+ oauth_response = oauth_session.parse_authorization_response(redirect_response)
157
+
158
+ with OAuth1Session(
159
+ self.tumblr.client_key,
160
+ self.tumblr.client_secret,
161
+ *self.get_oauth_tokens(fetch_response),
162
+ verifier=oauth_response["oauth_verifier"],
163
+ ) as oauth_session:
164
+ oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token")
165
+
166
+ self.tumblr.resource_owner_key, self.tumblr.resource_owner_secret = self.get_oauth_tokens(oauth_tokens)
167
+
168
+ set_password(self.service_name, self.username, self.model_dump_json())
169
+
170
+ return self
171
+
172
+
173
+ class Post(FullyValidatedModel):
174
+ class Block(FullyValidatedModel):
175
+ type: str = "text"
176
+ text: str = ""
177
+ blocks: list[int] = []
178
+
179
+ timestamp: SkipJsonSchema[int] = 0
180
+ tags: Annotated[list[str], PlainSerializer(",".join)] = []
181
+ state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
182
+
183
+ content: SkipJsonSchema[list[Block]] = []
184
+ layout: SkipJsonSchema[list[Block]] = []
185
+ trail: SkipJsonSchema[list[Any]] = []
186
+
187
+ is_submission: SkipJsonSchema[bool] = False
188
+
189
+ def __rich__(self) -> Panel:
190
+ return Panel(
191
+ self.get_content_text(),
192
+ title="Preview",
193
+ subtitle=" ".join(f"#{tag}" for tag in self.tags),
194
+ subtitle_align="left",
195
+ )
196
+
197
+ def valid_text_post(self) -> bool:
198
+ return bool(self.content) and all(block.type == "text" for block in self.content) and not (self.is_submission or self.trail or any(block.type == "ask" for block in self.layout))
199
+
200
+ def get_content_text(self) -> str:
201
+ return "\n\n".join(block.text for block in self.content)
202
+
203
+
204
+ class Example(FullyValidatedModel):
205
+ class Message(FullyValidatedModel):
206
+ role: Literal["developer", "user", "assistant"]
207
+ content: str
208
+
209
+ messages: list[Message]
@@ -0,0 +1,39 @@
1
+ from typing import Self
2
+
3
+ from requests import HTTPError, Response
4
+ from requests_oauthlib import OAuth1Session
5
+
6
+ from tumblrbot.utils.models import Post, Tokens
7
+
8
+
9
+ class TumblrSession(OAuth1Session):
10
+ def __init__(self, tokens: Tokens) -> None:
11
+ super().__init__(**tokens.tumblr.model_dump())
12
+ self.hooks["response"].append(self.response_hook)
13
+
14
+ def __enter__(self) -> Self:
15
+ super().__enter__()
16
+ return self
17
+
18
+ def response_hook(self, response: Response, *_args: object, **_kwargs: object) -> None:
19
+ try:
20
+ response.raise_for_status()
21
+ except HTTPError as error:
22
+ error.add_note(response.text)
23
+ raise
24
+
25
+ def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
26
+ return self.get(
27
+ f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
28
+ params={
29
+ "after": after,
30
+ "sort": "asc",
31
+ "npf": True,
32
+ },
33
+ )
34
+
35
+ def create_post(self, blog_identifier: str, post: Post) -> Response:
36
+ return self.post(
37
+ f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
38
+ json=post.model_dump(),
39
+ )
@@ -1,97 +0,0 @@
1
- from collections.abc import Sequence
2
- from pathlib import Path
3
- from typing import TYPE_CHECKING, Self, override
4
-
5
- import rich
6
- import tomlkit
7
- from openai.types import ChatModel
8
- from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt, Secret, model_validator
9
- from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
10
- from rich.prompt import Prompt
11
- from tomlkit import comment, document
12
-
13
- if TYPE_CHECKING:
14
- from _typeshed import StrPath
15
-
16
-
17
- class Config(BaseSettings):
18
- model_config = SettingsConfigDict(
19
- extra="ignore",
20
- validate_assignment=True,
21
- validate_return=True,
22
- validate_by_name=True,
23
- cli_parse_args=True,
24
- cli_avoid_json=True,
25
- cli_kebab_case=True,
26
- toml_file="config.toml",
27
- )
28
-
29
- # Downloading Posts & Writing Examples
30
- download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
31
- data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
32
-
33
- # Writing Examples
34
- custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
35
-
36
- # Writing Examples & Fine-Tuning
37
- examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
38
-
39
- # Writing Examples & Generating
40
- developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
41
- user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
42
-
43
- # Fine-Tuning
44
- expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
45
- token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
46
- job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
47
-
48
- # Fine-Tuning & Generating
49
- base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
50
- fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
51
-
52
- # Generating
53
- upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
54
- draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
55
- tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
56
- tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
57
-
58
- @override
59
- @classmethod
60
- def settings_customise_sources(cls, settings_cls: type[BaseSettings], *args: PydanticBaseSettingsSource, **kwargs: PydanticBaseSettingsSource) -> tuple[PydanticBaseSettingsSource, ...]:
61
- return (TomlConfigSettingsSource(settings_cls),)
62
-
63
- @model_validator(mode="after")
64
- def write_to_file(self) -> Self:
65
- if not self.download_blog_identifiers:
66
- rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
67
- self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
68
-
69
- if not self.upload_blog_identifier:
70
- rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
71
- self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
72
-
73
- toml_files = self.model_config.get("toml_file")
74
- if isinstance(toml_files, (Path, str)):
75
- self.dump_toml(toml_files)
76
- elif isinstance(toml_files, Sequence):
77
- for toml_file in toml_files:
78
- self.dump_toml(toml_file)
79
-
80
- return self
81
-
82
- def dump_toml(self, toml_file: "StrPath") -> None:
83
- toml_table = document()
84
-
85
- dumped_model = self.model_dump(mode="json")
86
- for name, field in self.__class__.model_fields.items():
87
- if field.description:
88
- for line in field.description.split(". "):
89
- toml_table.add(comment(f"{line.removesuffix('.')}."))
90
-
91
- value = getattr(self, name)
92
- toml_table[name] = value.get_secret_value() if isinstance(value, Secret) else dumped_model[name]
93
-
94
- Path(toml_file).write_text(
95
- tomlkit.dumps(toml_table),
96
- encoding="utf_8",
97
- )
@@ -1,135 +0,0 @@
1
- from collections.abc import Generator
2
- from typing import Annotated, Any, ClassVar, Literal, Self, override
3
-
4
- import rich
5
- from keyring import get_password, set_password
6
- from niquests import Session
7
- from openai import BaseModel
8
- from pwinput import pwinput
9
- from pydantic import ConfigDict, PlainSerializer, SecretStr
10
- from pydantic.json_schema import SkipJsonSchema
11
- from requests_oauthlib import OAuth1Session
12
- from rich.panel import Panel
13
- from rich.prompt import Confirm
14
-
15
- type SerializableSecretStr = Annotated[
16
- SecretStr,
17
- PlainSerializer(
18
- SecretStr.get_secret_value,
19
- when_used="json-unless-none",
20
- ),
21
- ]
22
-
23
-
24
- class FullyValidatedModel(BaseModel):
25
- model_config = ConfigDict(
26
- extra="ignore",
27
- validate_assignment=True,
28
- validate_default=True,
29
- validate_return=True,
30
- validate_by_name=True,
31
- )
32
-
33
-
34
- class Tokens(FullyValidatedModel):
35
- class Tumblr(FullyValidatedModel):
36
- client_key: SerializableSecretStr = SecretStr("")
37
- client_secret: SerializableSecretStr = SecretStr("")
38
- resource_owner_key: SerializableSecretStr = SecretStr("")
39
- resource_owner_secret: SerializableSecretStr = SecretStr("")
40
-
41
- service_name: ClassVar = "tumblrbot"
42
- username: ClassVar = "tokens"
43
-
44
- openai_api_key: SerializableSecretStr = SecretStr("")
45
- tumblr: Tumblr = Tumblr()
46
-
47
- @staticmethod
48
- def online_token_prompt(url: str, *tokens: str) -> Generator[SecretStr]:
49
- formatted_token_string = " and ".join(f"[cyan]{token}[/]" for token in tokens)
50
-
51
- rich.print(f"Retrieve your {formatted_token_string} from: {url}")
52
- for token in tokens:
53
- yield SecretStr(pwinput(f"Enter your {token} (masked): ").strip())
54
-
55
- rich.print()
56
-
57
- @classmethod
58
- def read_from_keyring(cls) -> Self:
59
- if json_data := get_password(cls.service_name, cls.username):
60
- return cls.model_validate_json(json_data)
61
- return cls()
62
-
63
- @override
64
- def model_post_init(self, context: object) -> None:
65
- super().model_post_init(context)
66
-
67
- if not self.openai_api_key.get_secret_value() or Confirm.ask("Reset OpenAI API key?", default=False):
68
- (self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
69
-
70
- if not all(self.tumblr.model_dump(mode="json").values()) or Confirm.ask("Reset Tumblr API tokens?", default=False):
71
- self.tumblr.client_key, self.tumblr.client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
72
-
73
- OAuth1Session.__bases__ = (Session,)
74
-
75
- with OAuth1Session(
76
- self.tumblr.client_key.get_secret_value(),
77
- self.tumblr.client_secret.get_secret_value(),
78
- ) as oauth_session:
79
- fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token")
80
- full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize")
81
- (redirect_response,) = self.online_token_prompt(full_authorize_url, "full redirect URL")
82
- oauth_response = oauth_session.parse_authorization_response(redirect_response.get_secret_value())
83
-
84
- with OAuth1Session(
85
- self.tumblr.client_key.get_secret_value(),
86
- self.tumblr.client_secret.get_secret_value(),
87
- fetch_response["oauth_token"],
88
- fetch_response["oauth_token_secret"],
89
- verifier=oauth_response["oauth_verifier"],
90
- ) as oauth_session:
91
- oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token")
92
-
93
- self.tumblr.resource_owner_key = oauth_tokens["oauth_token"]
94
- self.tumblr.resource_owner_secret = oauth_tokens["oauth_token_secret"]
95
-
96
- set_password(self.service_name, self.username, self.model_dump_json())
97
-
98
-
99
- class Post(FullyValidatedModel):
100
- class Block(FullyValidatedModel):
101
- type: str = "text"
102
- text: str = ""
103
- blocks: list[int] = [] # noqa: RUF012
104
-
105
- timestamp: SkipJsonSchema[int] = 0
106
- tags: Annotated[list[str], PlainSerializer(",".join)] = [] # noqa: RUF012
107
- state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
108
-
109
- content: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
110
- layout: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
111
- trail: SkipJsonSchema[list[Any]] = [] # noqa: RUF012
112
-
113
- is_submission: SkipJsonSchema[bool] = False
114
-
115
- def __rich__(self) -> Panel:
116
- return Panel(
117
- self.get_content_text(),
118
- title="Preview",
119
- subtitle=" ".join(f"#{tag}" for tag in self.tags),
120
- subtitle_align="left",
121
- )
122
-
123
- def only_text_blocks(self) -> bool:
124
- return all(block.type == "text" for block in self.content) and not any(block.type == "ask" for block in self.layout)
125
-
126
- def get_content_text(self) -> str:
127
- return "\n\n".join(block.text for block in self.content)
128
-
129
-
130
- class Example(FullyValidatedModel):
131
- class Message(FullyValidatedModel):
132
- role: Literal["developer", "user", "assistant"]
133
- content: str
134
-
135
- messages: list[Message]
@@ -1,47 +0,0 @@
1
- from dataclasses import dataclass
2
- from typing import Self
3
-
4
- from niquests import HTTPError, PreparedRequest, Response, Session
5
- from requests_oauthlib import OAuth1
6
-
7
- from tumblrbot.utils.models import Post, Tokens
8
-
9
-
10
- @dataclass
11
- class TumblrSession(Session):
12
- tokens: Tokens
13
-
14
- def __post_init__(self) -> None:
15
- super().__init__(multiplexed=True, happy_eyeballs=True)
16
-
17
- self.auth = OAuth1(**self.tokens.tumblr.model_dump(mode="json"))
18
- self.hooks["response"].append(self.response_hook)
19
-
20
- def __enter__(self) -> Self:
21
- super().__enter__()
22
- return self
23
-
24
- def response_hook(self, response: PreparedRequest | Response) -> None:
25
- if isinstance(response, Response):
26
- try:
27
- response.raise_for_status()
28
- except HTTPError as error:
29
- if response.text:
30
- error.add_note(response.text)
31
- raise
32
-
33
- def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
34
- return self.get(
35
- f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
36
- params={
37
- "after": str(after),
38
- "sort": "asc",
39
- "npf": str(True),
40
- },
41
- )
42
-
43
- def create_post(self, blog_identifier: str, post: Post) -> Response:
44
- return self.post(
45
- f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
46
- json=post.model_dump(mode="json"),
47
- )
File without changes