tumblrbot 1.3.2__py3-none-any.whl → 1.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tumblrbot/__main__.py CHANGED
@@ -1,4 +1,4 @@
1
- from openai import OpenAI
1
+ from openai import DefaultHttpxClient, OpenAI
2
2
  from rich.prompt import Confirm
3
3
  from rich.traceback import install
4
4
 
@@ -14,7 +14,10 @@ def main() -> None:
14
14
  install()
15
15
 
16
16
  tokens = Tokens()
17
- with OpenAI(api_key=tokens.openai_api_key.get_secret_value()) as openai, TumblrClient(tokens=tokens) as tumblr:
17
+ with (
18
+ OpenAI(api_key=tokens.openai_api_key.get_secret_value(), http_client=DefaultHttpxClient(http2=True)) as openai,
19
+ TumblrClient(tokens=tokens) as tumblr,
20
+ ):
18
21
  post_downloader = PostDownloader(openai, tumblr)
19
22
  if Confirm.ask("Download latest posts?", default=False):
20
23
  post_downloader.download()
@@ -7,32 +7,6 @@ from tumblrbot.utils.models import Post
7
7
 
8
8
 
9
9
  class PostDownloader(FlowClass):
10
- def paginate_posts(self, blog_identifier: str, completed: int, after: int, fp: TextIOBase, live: PreviewLive) -> None:
11
- task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=completed)
12
-
13
- while True:
14
- response = self.tumblr.retrieve_published_posts(blog_identifier, after=after).json()["response"]
15
- live.progress.update(task_id, total=response["blog"]["posts"], completed=completed)
16
-
17
- if posts := response["posts"]:
18
- for post in posts:
19
- dump(post, fp)
20
- fp.write("\n")
21
-
22
- model = Post.model_validate(post)
23
- after = model.timestamp
24
- live.custom_update(model)
25
-
26
- completed += len(posts)
27
- else:
28
- return
29
-
30
- def get_data_path(self, blog_identifier: str) -> Path:
31
- return (self.config.data_directory / blog_identifier).with_suffix(".jsonl")
32
-
33
- def get_data_paths(self) -> list[Path]:
34
- return list(map(self.get_data_path, self.config.download_blog_identifiers))
35
-
36
10
  def download(self) -> None:
37
11
  self.config.data_directory.mkdir(parents=True, exist_ok=True)
38
12
 
@@ -56,3 +30,29 @@ class PostDownloader(FlowClass):
56
30
  fp,
57
31
  live,
58
32
  )
33
+
34
+ def paginate_posts(self, blog_identifier: str, completed: int, after: int, fp: TextIOBase, live: PreviewLive) -> None:
35
+ task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=completed)
36
+
37
+ while True:
38
+ response = self.tumblr.retrieve_published_posts(blog_identifier, after=after).json()["response"]
39
+ live.progress.update(task_id, total=response["blog"]["posts"], completed=completed)
40
+
41
+ if posts := response["posts"]:
42
+ for post in posts:
43
+ dump(post, fp)
44
+ fp.write("\n")
45
+
46
+ model = Post.model_validate(post)
47
+ after = model.timestamp
48
+ live.custom_update(model)
49
+
50
+ completed += len(posts)
51
+ else:
52
+ return
53
+
54
+ def get_data_paths(self) -> list[Path]:
55
+ return list(map(self.get_data_path, self.config.download_blog_identifiers))
56
+
57
+ def get_data_path(self, blog_identifier: str) -> Path:
58
+ return (self.config.data_directory / blog_identifier).with_suffix(".jsonl")
@@ -8,7 +8,7 @@ from typing import IO
8
8
 
9
9
  import rich
10
10
  from more_itertools import chunked
11
- from openai import BadRequestError, OpenAI
11
+ from openai import BadRequestError
12
12
  from rich.console import Console
13
13
  from rich.prompt import Confirm
14
14
  from tiktoken import encoding_for_model, get_encoding
@@ -19,42 +19,42 @@ from tumblrbot.utils.models import Example, Post
19
19
 
20
20
  @dataclass
21
21
  class ExamplesWriter(FlowClass):
22
- openai: OpenAI
23
22
  data_paths: list[Path]
24
23
 
25
- def count_tokens(self) -> Generator[int]:
26
- # Based on https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
27
- # and https://cookbook.openai.com/examples/chat_finetuning_data_prep
28
- try:
29
- encoding = encoding_for_model(self.config.base_model)
30
- except KeyError as error:
31
- encoding = get_encoding("o200k_base")
32
- Console(stderr=True, style="logging.level.warning").print(f"[Warning] Using encoding '{encoding.name}': {''.join(error.args)}\n")
24
+ def write_examples(self) -> None:
25
+ self.config.examples_file.parent.mkdir(parents=True, exist_ok=True)
33
26
 
34
- with self.config.examples_file.open(encoding="utf_8") as fp:
35
- for line in fp:
36
- example = Example.model_validate_json(line)
37
- yield len(encoding.encode("assistant")) # every reply is primed with <|start|>assistant<|message|>
38
- for message in example.messages:
39
- yield 4 + len(encoding.encode(message.content))
27
+ with self.config.examples_file.open("w", encoding="utf_8") as fp:
28
+ for user_message, assistant_response in self.get_custom_prompts():
29
+ self.write_example(
30
+ user_message,
31
+ assistant_response,
32
+ fp,
33
+ )
40
34
 
41
- def get_moderation_chunk_limit(self) -> int:
42
- test_n = 1000
43
- try:
44
- self.openai.moderations.create(input=[""] * test_n)
45
- except BadRequestError as error:
46
- message = error.response.json()["error"]["message"]
47
- if match := search(r"(\d+)\.", message):
48
- return int(match.group(1))
49
- return test_n
35
+ for post in self.get_filtered_posts():
36
+ self.write_example(
37
+ self.config.user_message,
38
+ post.get_content_text(),
39
+ fp,
40
+ )
50
41
 
51
- def get_valid_posts(self) -> Generator[Post]:
52
- for data_path in self.data_paths:
53
- with data_path.open(encoding="utf_8") as fp:
54
- for line in fp:
55
- post = Post.model_validate_json(line)
56
- if not (post.is_submission or post.trail) and post.only_text_blocks() and post.get_content_text():
57
- yield post
42
+ rich.print(f"[bold]The examples file can be found at: '{self.config.examples_file}'\n")
43
+
44
+ def write_example(self, user_message: str, assistant_message: str, fp: IO[str]) -> None:
45
+ example = Example(
46
+ messages=[
47
+ Example.Message(role="developer", content=self.config.developer_message),
48
+ Example.Message(role="user", content=user_message),
49
+ Example.Message(role="assistant", content=assistant_message),
50
+ ],
51
+ )
52
+ fp.write(f"{example.model_dump_json()}\n")
53
+
54
+ def get_custom_prompts(self) -> Generator[tuple[str, str]]:
55
+ if self.config.custom_prompts_file.exists():
56
+ text = self.config.custom_prompts_file.read_text(encoding="utf_8")
57
+ yield from loads(text).items()
58
58
 
59
59
  def get_filtered_posts(self) -> Generator[Post]:
60
60
  posts = list(self.get_valid_posts())
@@ -79,37 +79,36 @@ class ExamplesWriter(FlowClass):
79
79
  else:
80
80
  yield from posts
81
81
 
82
- def get_custom_prompts(self) -> Generator[tuple[str, str]]:
83
- if self.config.custom_prompts_file.exists():
84
- text = self.config.custom_prompts_file.read_text(encoding="utf_8")
85
- yield from loads(text).items()
86
-
87
- def write_examples(self) -> None:
88
- self.config.examples_file.parent.mkdir(parents=True, exist_ok=True)
89
-
90
- with self.config.examples_file.open("w", encoding="utf_8") as fp:
91
- for post in self.get_filtered_posts():
92
- self.write_example(
93
- self.config.user_message,
94
- post.get_content_text(),
95
- fp,
96
- )
82
+ def get_valid_posts(self) -> Generator[Post]:
83
+ for data_path in self.data_paths:
84
+ with data_path.open(encoding="utf_8") as fp:
85
+ for line in fp:
86
+ post = Post.model_validate_json(line)
87
+ if not (post.is_submission or post.trail) and post.only_text_blocks() and post.get_content_text():
88
+ yield post
97
89
 
98
- for user_message, assistant_response in self.get_custom_prompts():
99
- self.write_example(
100
- user_message,
101
- assistant_response,
102
- fp,
103
- )
90
+ def get_moderation_chunk_limit(self) -> int:
91
+ test_n = 1000
92
+ try:
93
+ self.openai.moderations.create(input=[""] * test_n)
94
+ except BadRequestError as error:
95
+ message = error.response.json()["error"]["message"]
96
+ if match := search(r"(\d+)\.", message):
97
+ return int(match.group(1))
98
+ return test_n
104
99
 
105
- rich.print(f"[bold]The examples file can be found at: '{self.config.examples_file}'\n")
100
+ def count_tokens(self) -> Generator[int]:
101
+ # Based on https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
102
+ # and https://cookbook.openai.com/examples/chat_finetuning_data_prep
103
+ try:
104
+ encoding = encoding_for_model(self.config.base_model)
105
+ except KeyError as error:
106
+ encoding = get_encoding("o200k_base")
107
+ Console(stderr=True, style="logging.level.warning").print(f"[Warning] Using encoding '{encoding.name}': {''.join(error.args)}\n")
106
108
 
107
- def write_example(self, user_message: str, assistant_message: str, fp: IO[str]) -> None:
108
- example = Example(
109
- messages=[
110
- Example.Message(role="developer", content=self.config.developer_message),
111
- Example.Message(role="user", content=user_message),
112
- Example.Message(role="assistant", content=assistant_message),
113
- ],
114
- )
115
- fp.write(f"{example.model_dump_json()}\n")
109
+ with self.config.examples_file.open(encoding="utf_8") as fp:
110
+ for line in fp:
111
+ example = Example.model_validate_json(line)
112
+ yield len(encoding.encode("assistant")) # every reply is primed with <|start|>assistant<|message|>
113
+ for message in example.messages:
114
+ yield 4 + len(encoding.encode(message.content))
@@ -4,10 +4,7 @@ from textwrap import dedent
4
4
  from time import sleep
5
5
 
6
6
  import rich
7
- from openai import BadRequestError
8
- from openai.types import FileObject
9
7
  from openai.types.fine_tuning import FineTuningJob
10
- from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_fixed, wait_random
11
8
 
12
9
  from tumblrbot.utils.common import FlowClass, PreviewLive
13
10
 
@@ -20,46 +17,33 @@ class FineTuner(FlowClass):
20
17
  def dedent_print(text: str) -> None:
21
18
  rich.print(dedent(text).lstrip())
22
19
 
23
- def process_completed_job(self, job: FineTuningJob) -> None:
24
- if job.trained_tokens is not None:
25
- self.dedent_print(f"""
26
- Trained Tokens: {job.trained_tokens:,}
27
- Cost: {self.get_cost_string(job.trained_tokens)}
28
- """)
29
-
30
- self.config.job_id = ""
20
+ def fine_tune(self) -> None:
21
+ with PreviewLive() as live:
22
+ job = self.create_job(live)
31
23
 
32
- if job.status == "failed" and job.error is not None:
33
- raise RuntimeError(job.error.message)
24
+ self.dedent_print(f"""
25
+ [bold]Fine-tuning is starting...[/]
26
+ View it online at: https://platform.openai.com/finetune/{job.id}
27
+ Created at: {datetime.fromtimestamp(job.created_at)}
28
+ Base Model: {job.model}
34
29
 
35
- if job.fine_tuned_model:
36
- self.config.fine_tuned_model = job.fine_tuned_model or ""
30
+ [italic dim]Closing this terminal will not stop the fine-tuning. This will take a while...
31
+ """) # noqa: DTZ006
37
32
 
38
- def poll_job_status(self) -> FineTuningJob:
39
- job = self.openai.fine_tuning.jobs.retrieve(self.config.job_id)
33
+ task_id = live.progress.add_task("", total=None)
40
34
 
41
- if self.config.expected_epochs != job.hyperparameters.n_epochs and isinstance(job.hyperparameters.n_epochs, int):
42
- self.config.expected_epochs = job.hyperparameters.n_epochs
35
+ while job.status not in {"succeeded", "failed", "cancelled"}:
36
+ job = self.poll_job_status()
43
37
 
44
- self.dedent_print(f"""
45
- The number of epochs has been updated to {job.hyperparameters.n_epochs}!
46
- [cyan]Updated the config.
47
- """)
48
- self.print_estimates()
38
+ live.progress.update(
39
+ task_id,
40
+ total=job.estimated_finish,
41
+ description=f"Fine-tuning: [italic]{job.status.replace('_', ' ').title()}[/]...",
42
+ )
49
43
 
50
- return job
44
+ sleep(1)
51
45
 
52
- @retry(
53
- stop=stop_after_attempt(5),
54
- wait=wait_fixed(1.5) + wait_random(),
55
- retry=retry_if_exception_type(BadRequestError),
56
- reraise=True,
57
- )
58
- def attempt_submit_job(self, file: FileObject) -> FineTuningJob:
59
- return self.openai.fine_tuning.jobs.create(
60
- model=self.config.base_model,
61
- training_file=file.id,
62
- )
46
+ self.process_completed_job(job)
63
47
 
64
48
  def create_job(self, live: PreviewLive) -> FineTuningJob:
65
49
  if self.config.job_id:
@@ -71,41 +55,42 @@ class FineTuner(FlowClass):
71
55
  purpose="fine-tune",
72
56
  )
73
57
 
74
- job = self.attempt_submit_job(file)
58
+ job = self.openai.fine_tuning.jobs.create(
59
+ model=self.config.base_model,
60
+ training_file=file.id,
61
+ )
75
62
 
76
63
  self.config.job_id = job.id
77
64
  return job
78
65
 
79
- def fine_tune(self) -> None:
80
- with PreviewLive() as live:
81
- job = self.create_job(live)
82
-
83
- self.dedent_print(f"""
84
- [bold]Fine-tuning is starting...[/]
85
- View it online at: https://platform.openai.com/finetune/{job.id}
86
- Created at: {datetime.fromtimestamp(job.created_at)}
87
- Base Model: {job.model}
66
+ def poll_job_status(self) -> FineTuningJob:
67
+ job = self.openai.fine_tuning.jobs.retrieve(self.config.job_id)
88
68
 
89
- [italic dim]Closing this terminal will not stop the fine-tuning. This will take a while...
90
- """) # noqa: DTZ006
69
+ if self.config.expected_epochs != job.hyperparameters.n_epochs and isinstance(job.hyperparameters.n_epochs, int):
70
+ self.config.expected_epochs = job.hyperparameters.n_epochs
91
71
 
92
- task_id = live.progress.add_task("", total=None)
72
+ self.dedent_print(f"""
73
+ The number of epochs has been updated to {job.hyperparameters.n_epochs}!
74
+ [cyan]Updated the config.
75
+ """)
76
+ self.print_estimates()
93
77
 
94
- while job.status not in {"succeeded", "failed", "cancelled"}:
95
- job = self.poll_job_status()
78
+ return job
96
79
 
97
- live.progress.update(
98
- task_id,
99
- total=job.estimated_finish,
100
- description=f"Fine-tuning: [italic]{job.status.replace('_', ' ').title()}[/]...",
101
- )
80
+ def process_completed_job(self, job: FineTuningJob) -> None:
81
+ if job.trained_tokens is not None:
82
+ self.dedent_print(f"""
83
+ Trained Tokens: {job.trained_tokens:,}
84
+ Cost: {self.get_cost_string(job.trained_tokens)}
85
+ """)
102
86
 
103
- sleep(1)
87
+ self.config.job_id = ""
104
88
 
105
- self.process_completed_job(job)
89
+ if job.status == "failed" and job.error is not None:
90
+ raise RuntimeError(job.error.message)
106
91
 
107
- def get_cost_string(self, total_tokens: int) -> str:
108
- return f"${self.config.token_price / 1000000 * total_tokens:.2f}"
92
+ if job.fine_tuned_model:
93
+ self.config.fine_tuned_model = job.fine_tuned_model or ""
109
94
 
110
95
  def print_estimates(self) -> None:
111
96
  total_tokens = self.config.expected_epochs * self.estimated_tokens
@@ -118,3 +103,6 @@ class FineTuner(FlowClass):
118
103
  NOTE: Token values are approximate and may not be 100% accurate, please be aware of this when using the data.
119
104
  [italic red]Amelia, Mutsumi, and Marin are not responsible for any inaccuracies in the token count or estimated price.[/]
120
105
  """)
106
+
107
+ def get_cost_string(self, total_tokens: int) -> str:
108
+ return f"${self.config.token_price / 1000000 * total_tokens:.2f}"
@@ -7,26 +7,20 @@ from tumblrbot.utils.models import Post
7
7
 
8
8
 
9
9
  class DraftGenerator(FlowClass):
10
- def generate_tags(self, content: Post.Block) -> Post | None:
11
- if random() < self.config.tags_chance: # noqa: S311
12
- return self.openai.responses.parse(
13
- input=f"Extract the most important subjects from the following text:\n\n{content.text}",
14
- model=self.config.base_model,
15
- text_format=Post,
16
- instructions="You are an advanced text summarization tool. You return the requested data to the user as a list of comma-separated strings.",
17
- temperature=0.5,
18
- ).output_parsed
19
-
20
- return None
10
+ def create_drafts(self) -> None:
11
+ message = f"View drafts here: https://tumblr.com/blog/{self.config.upload_blog_identifier}/drafts"
21
12
 
22
- def generate_content(self) -> Post.Block:
23
- content = self.openai.responses.create(
24
- input=self.config.user_message,
25
- instructions=self.config.developer_message,
26
- model=self.config.fine_tuned_model,
27
- ).output_text
13
+ with PreviewLive() as live:
14
+ for i in live.progress.track(range(self.config.draft_count), description="Generating drafts..."):
15
+ try:
16
+ post = self.generate_post()
17
+ self.tumblr.create_post(self.config.upload_blog_identifier, post)
18
+ live.custom_update(post)
19
+ except BaseException as exception:
20
+ exception.add_note(f"📉 An error occurred! Generated {i} draft(s) before failing. {message}")
21
+ raise
28
22
 
29
- return Post.Block(type="text", text=content)
23
+ rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
30
24
 
31
25
  def generate_post(self) -> Post:
32
26
  content = self.generate_content()
@@ -38,17 +32,23 @@ class DraftGenerator(FlowClass):
38
32
  post.tags = tags.tags
39
33
  return post
40
34
 
41
- def create_drafts(self) -> None:
42
- message = f"View drafts here: https://tumblr.com/blog/{self.config.upload_blog_identifier}/drafts"
35
+ def generate_content(self) -> Post.Block:
36
+ content = self.openai.responses.create(
37
+ input=self.config.user_message,
38
+ instructions=self.config.developer_message,
39
+ model=self.config.fine_tuned_model,
40
+ ).output_text
43
41
 
44
- with PreviewLive() as live:
45
- for i in live.progress.track(range(self.config.draft_count), description="Generating drafts..."):
46
- try:
47
- post = self.generate_post()
48
- self.tumblr.create_post(self.config.upload_blog_identifier, post)
49
- live.custom_update(post)
50
- except BaseException as exception:
51
- exception.add_note(f"📉 An error occurred! Generated {i} draft(s) before failing. {message}")
52
- raise
42
+ return Post.Block(type="text", text=content)
53
43
 
54
- rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
44
+ def generate_tags(self, content: Post.Block) -> Post | None:
45
+ if random() < self.config.tags_chance: # noqa: S311
46
+ return self.openai.responses.parse(
47
+ text_format=Post,
48
+ input=f"Extract the most important subjects from the following text:\n\n{content.text}",
49
+ instructions="You are an advanced text summarization tool. You return the requested data to the user as a list of comma-separated strings.",
50
+ model=self.config.base_model,
51
+ temperature=0.5,
52
+ ).output_parsed
53
+
54
+ return None
tumblrbot/utils/common.py CHANGED
@@ -13,6 +13,14 @@ from tumblrbot.utils.config import Config
13
13
  from tumblrbot.utils.tumblr import TumblrClient
14
14
 
15
15
 
16
+ @dataclass
17
+ class FlowClass:
18
+ config: ClassVar = Config() # pyright: ignore[reportCallIssue]
19
+
20
+ openai: OpenAI
21
+ tumblr: TumblrClient
22
+
23
+
16
24
  class PreviewLive(Live):
17
25
  def __init__(self) -> None:
18
26
  super().__init__()
@@ -38,11 +46,3 @@ class PreviewLive(Live):
38
46
  table.add_row(self.progress)
39
47
  table.add_row(*renderables)
40
48
  self.update(table)
41
-
42
-
43
- @dataclass
44
- class FlowClass:
45
- config: ClassVar = Config() # pyright: ignore[reportCallIssue]
46
-
47
- openai: OpenAI
48
- tumblr: TumblrClient
tumblrbot/utils/config.py CHANGED
@@ -26,28 +26,33 @@ class Config(BaseSettings):
26
26
  toml_file="config.toml",
27
27
  )
28
28
 
29
- fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
30
- upload_blog_identifier: str = Field(
31
- "",
32
- description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.",
33
- )
34
- draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
35
- tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
36
-
37
- download_blog_identifiers: list[str] = Field(
38
- [],
39
- description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.",
40
- )
29
+ # Downloading Posts & Writing Examples
30
+ download_blog_identifiers: list[str] = Field([], description="The identifiers of the blogs which post data will be downloaded from. These must be blogs associated with the same account as the configured Tumblr secret tokens.")
41
31
  data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
32
+
33
+ # Writing Examples
42
34
  custom_prompts_file: Path = Field(Path("custom_prompts.json"), description="Where to read in custom prompts from.")
35
+
36
+ # Writing Examples & Fine-Tuning
43
37
  examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
44
- job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
38
+
39
+ # Writing Examples & Generating
40
+ developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
41
+ user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
42
+
43
+ # Fine-Tuning
45
44
  expected_epochs: PositiveInt = Field(3, description="The expected number of epochs fine-tuning will be run for. This will be updated during fine-tuning.")
46
45
  token_price: PositiveFloat = Field(3, description="The expected price in USD per million tokens during fine-tuning for the current model.")
46
+ job_id: str = Field("", description="The fine-tuning job ID that will be polled on next run.")
47
47
 
48
+ # Fine-Tuning & Generating
48
49
  base_model: ChatModel = Field("gpt-4o-mini-2024-07-18", description="The name of the model that will be fine-tuned by the generated training data.")
49
- developer_message: str = Field("You are a Tumblr post bot. Please generate a Tumblr post in accordance with the user's request.", description="The developer message used by the OpenAI API to generate drafts.")
50
- user_message: str = Field("Please write a comical Tumblr post.", description="The user input used by the OpenAI API to generate drafts.")
50
+ fine_tuned_model: str = Field("", description="The name of the OpenAI model that was fine-tuned with your posts.")
51
+
52
+ # Generating
53
+ upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
54
+ draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
55
+ tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
51
56
 
52
57
  @override
53
58
  @classmethod
@@ -58,11 +63,11 @@ class Config(BaseSettings):
58
63
  def write_to_file(self) -> Self:
59
64
  if not self.download_blog_identifiers:
60
65
  rich.print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
61
- self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold]Example: staff.tumblr.com,changes").split(",")))
66
+ self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
62
67
 
63
68
  if not self.upload_blog_identifier:
64
69
  rich.print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
65
- self.upload_blog_identifier = Prompt.ask("[bold]Examples: staff.tumblr.com or changes").strip()
70
+ self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
66
71
 
67
72
  toml_files = self.model_config.get("toml_file")
68
73
  if isinstance(toml_files, (Path, str)):
@@ -86,6 +91,6 @@ class Config(BaseSettings):
86
91
  toml_table[name] = value.get_secret_value() if isinstance(value, Secret) else dumped_model[name]
87
92
 
88
93
  Path(toml_file).write_text(
89
- tomlkit.dumps(toml_table), # pyright: ignore[reportUnknownMemberType]
94
+ tomlkit.dumps(toml_table),
90
95
  encoding="utf_8",
91
96
  )
tumblrbot/utils/models.py CHANGED
@@ -57,8 +57,8 @@ class Tokens(FullyValidatedModel):
57
57
  self.tumblr_client_key, self.tumblr_client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
58
58
 
59
59
  oauth_session = OAuth1Session(*self.get_tumblr_tokens()[:2])
60
- fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token") # pyright: ignore[reportUnknownMemberType]
61
- full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize") # pyright: ignore[reportUnknownMemberType]
60
+ fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token")
61
+ full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize")
62
62
  (redirect_response,) = self.online_token_prompt(full_authorize_url, "full redirect URL")
63
63
  oauth_response = oauth_session.parse_authorization_response(redirect_response.get_secret_value())
64
64
  oauth_session = OAuth1Session(
@@ -67,7 +67,7 @@ class Tokens(FullyValidatedModel):
67
67
  fetch_response["oauth_token_secret"],
68
68
  verifier=oauth_response["oauth_verifier"],
69
69
  )
70
- oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token") # pyright: ignore[reportUnknownMemberType]
70
+ oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token")
71
71
  self.tumblr_resource_owner_key = oauth_tokens["oauth_token"]
72
72
  self.tumblr_resource_owner_secret = oauth_tokens["oauth_token_secret"]
73
73
 
tumblrbot/utils/tumblr.py CHANGED
@@ -1,34 +1,44 @@
1
1
  from dataclasses import dataclass
2
+ from typing import Self
2
3
 
3
- from requests import HTTPError, Response
4
- from requests_oauthlib import OAuth1Session
4
+ from niquests import HTTPError, PreparedRequest, Response, Session
5
+ from requests_cache import CacheMixin
6
+ from requests_oauthlib import OAuth1
5
7
 
6
8
  from tumblrbot.utils.models import Post, Tokens
7
9
 
8
10
 
9
11
  @dataclass
10
- class TumblrClient(OAuth1Session):
12
+ class TumblrClient(Session, CacheMixin): # pyright: ignore[reportIncompatibleMethodOverride, reportIncompatibleVariableOverride]
11
13
  tokens: Tokens
12
14
 
13
15
  def __post_init__(self) -> None:
14
- super().__init__(*self.tokens.get_tumblr_tokens()) # pyright: ignore[reportUnknownMemberType]
16
+ super().__init__(happy_eyeballs=True)
17
+ CacheMixin.__init__(self, use_cache_dir=True)
15
18
 
19
+ self.auth = OAuth1(*self.tokens.get_tumblr_tokens())
16
20
  self.hooks["response"].append(self.response_hook)
17
21
 
18
- def response_hook(self, response: Response, **_: object) -> None:
19
- try:
20
- response.raise_for_status()
21
- except HTTPError as error:
22
- error.add_note(response.text)
23
- raise
22
+ def __enter__(self) -> Self:
23
+ super().__enter__()
24
+ return self
25
+
26
+ def response_hook(self, response: PreparedRequest | Response) -> None:
27
+ if isinstance(response, Response):
28
+ try:
29
+ response.raise_for_status()
30
+ except HTTPError as error:
31
+ if response.text:
32
+ error.add_note(response.text)
33
+ raise
24
34
 
25
35
  def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
26
36
  return self.get(
27
37
  f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
28
38
  params={
29
- "after": after,
39
+ "after": str(after),
30
40
  "sort": "asc",
31
- "npf": True,
41
+ "npf": str(True),
32
42
  },
33
43
  )
34
44
 
@@ -1,18 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.3.2
3
+ Version: 1.4.0
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
5
  Requires-Python: >= 3.13
6
6
  Description-Content-Type: text/markdown
7
+ Requires-Dist: httpx[http2]
7
8
  Requires-Dist: keyring
8
9
  Requires-Dist: more-itertools
10
+ Requires-Dist: niquests[speedups, http3]
9
11
  Requires-Dist: openai
10
12
  Requires-Dist: pydantic
11
13
  Requires-Dist: pydantic-settings
12
- Requires-Dist: requests
14
+ Requires-Dist: requests-cache
13
15
  Requires-Dist: requests-oauthlib
14
16
  Requires-Dist: rich
15
- Requires-Dist: tenacity
16
17
  Requires-Dist: tiktoken
17
18
  Requires-Dist: tomlkit
18
19
  Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
@@ -71,8 +72,7 @@ Features:
71
72
  - Automatically keeps the [config] file up-to-date and recreates it if missing.
72
73
 
73
74
  **To-Do:**
74
- - Add documentation.
75
- - Finish updating [README.md].
75
+ - Add code documentation.
76
76
 
77
77
 
78
78
  **Please submit an issue or contact us for features you want added/reimplemented.**
@@ -113,5 +113,19 @@ After inputting the [Tumblr] tokens, you will be given a URL that you need to op
113
113
 
114
114
  ## Configuration
115
115
  All config options can be found in `config.toml` after running the program once. This will be kept up-to-date if there are changes to the config's format in a future update. This also means it may be worthwhile to double-check the config file after an update. Any changes to the config should be in the changelog for a given version.
116
- > WIP: There will be more information about the config options soon.
116
+
117
+ All file options can include directories that will be created when the program is run.
118
+
119
+ - `custom_prompts_file` You will have to create this file yourself. It should follow the following format:
120
+ ```json
121
+ {"user message 1": "assistant response 1",
122
+ "user message 2": "assistant response 2"}
123
+ ```
124
+ - **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
125
+ - **`user_message`** - This message is used in the same way as `developer_message` and should be treated the same.
126
+ - **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal](https://platform.openai.com/finetune) under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
127
+ - **`token_price`** - The default value here is the default token price for `base_model`. You can find the up-to-date value [here](https://platform.openai.com/docs/pricing#fine-tuning), in the *Training* column.
128
+ - **`job_id`** - If there is any value here, this program will resume monitoring the corresponding job, instead of starting a new one. This gets set when starting the fine-tuning and is cleared when it is completed. You can find job IDs in the [fine-tuning portal](https://platform.openai.com/finetune).
129
+ - **`base_model`** - This value is used to choose the tokenizer for estimating fine-tuning costs. It is also the base model that will be fine-tuned and the model that is used to generate tags. You can find a list of options in the [fine-tuning portal](https://platform.openai.com/finetune) by pressing *+ Create* and opening the drop-down list for *Base Model*. Be sure to update `token_price` if you change this value.
130
+ - **`tags_chance`** - This should be between 0 and 1. Setting it to 0 corresponds to a 0% chance (never) to add tags to a post. 1 corresponds to a 100% chance (always) to add tags to a post. Adding tags incurs a very small token cost.
117
131
 
@@ -0,0 +1,16 @@
1
+ tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tumblrbot/__main__.py,sha256=et7kVLouRgYJwIjKJ7ljWSonB0DCDHDG-H6wWftukpU,1608
3
+ tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ tumblrbot/flow/download.py,sha256=bqcCV5n0dfYJdOlBGf7q5xVLucfSCGG7-j-YnpobXS4,2307
5
+ tumblrbot/flow/examples.py,sha256=24UijVYrTWh1Qwv9jbcsVBURx9oykkFW0JwhaBHbzdc,4919
6
+ tumblrbot/flow/fine_tune.py,sha256=JHARrhIaDTp7G-9aFK_tSc9E3ITZofXyuIAuJ9or-h0,4065
7
+ tumblrbot/flow/generate.py,sha256=dtxxL4chXOOAj4iw7hTbzt4CqEZxgHJ5vytw8u3Iqbw,2214
8
+ tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ tumblrbot/utils/common.py,sha256=AZMYE0mDgvpjCiMOPXKun57O45ABWuLN8qbIR3f1V1k,1340
10
+ tumblrbot/utils/config.py,sha256=EFQlgVYsDpPE-orgSnL96iJ4eL7zuXAxHKRWruEg-Zw,5165
11
+ tumblrbot/utils/models.py,sha256=BBt_DySsX65v_feNI9akpD-wzPRp4TWuD41gAwzcjzA,5143
12
+ tumblrbot/utils/tumblr.py,sha256=gM5j_Mj6cNXg4Rj2-xIbWAl0lfWdtxc-nZCUQRRCQRY,1685
13
+ tumblrbot-1.4.0.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
14
+ tumblrbot-1.4.0.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
15
+ tumblrbot-1.4.0.dist-info/METADATA,sha256=H7vQOlurpKPubfSPcSkbjqNVSG3Zgndejlei10o6UCk,8338
16
+ tumblrbot-1.4.0.dist-info/RECORD,,
@@ -1,16 +0,0 @@
1
- tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- tumblrbot/__main__.py,sha256=uth-d5QsXa-BilOHrgh7Dky4431RLIf9-SS44FFvyOA,1516
3
- tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- tumblrbot/flow/download.py,sha256=CzGv3gpVKFqB0mEbFGk0tJmtwRZM8isvJ7pOPqVfIY0,2307
5
- tumblrbot/flow/examples.py,sha256=QQWrPo8g6KRFmEaydnk91fBjI2a7ey3vlxsgecuF2yk,4947
6
- tumblrbot/flow/fine_tune.py,sha256=q5J1fL5ICjL7B72ceHRRbArj345DA0mX0_f6DYX5P4Q,4538
7
- tumblrbot/flow/generate.py,sha256=PeMMfTLD41iLBbVnNhvdM2UzyOtoTK3OS0R4YLxZjMA,2214
8
- tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- tumblrbot/utils/common.py,sha256=GjyTJmU3NEjxr6Bhta-RmKlvboH4Qhi8Si6Yw5N8BGo,1340
10
- tumblrbot/utils/config.py,sha256=32AiFIYXzlNG1K-I_zjflfiEjHJ9uUhfm3OcYzNgv_I,5030
11
- tumblrbot/utils/models.py,sha256=KmJIxTLZNcrd5sNwN2ldKVr8JGlmcSksH6v8e9zFWkg,5275
12
- tumblrbot/utils/tumblr.py,sha256=z4kdtHSRx89AQPlLpQ_Vcdfs3tZToFS89k6QKeBu42s,1243
13
- tumblrbot-1.3.2.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
14
- tumblrbot-1.3.2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
15
- tumblrbot-1.3.2.dist-info/METADATA,sha256=KjZINv3bcH64q5SXIB9VJMEkQR4X0bifUf-2q6mgr6I,6163
16
- tumblrbot-1.3.2.dist-info/RECORD,,