tumblrbot 1.9.5__py3-none-any.whl → 1.9.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tumblrbot/__main__.py +10 -4
- tumblrbot/flow/examples.py +55 -58
- tumblrbot/flow/fine_tune.py +6 -6
- tumblrbot/flow/generate.py +6 -7
- tumblrbot/utils/common.py +11 -14
- tumblrbot/utils/models.py +66 -61
- tumblrbot/utils/tumblr.py +28 -11
- {tumblrbot-1.9.5.dist-info → tumblrbot-1.9.7.dist-info}/METADATA +28 -22
- tumblrbot-1.9.7.dist-info/RECORD +15 -0
- tumblrbot-1.9.5.dist-info/RECORD +0 -15
- {tumblrbot-1.9.5.dist-info → tumblrbot-1.9.7.dist-info}/WHEEL +0 -0
- {tumblrbot-1.9.5.dist-info → tumblrbot-1.9.7.dist-info}/entry_points.txt +0 -0
tumblrbot/__main__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from sys import exit as sys_exit
|
|
2
|
+
|
|
1
3
|
from openai import OpenAI
|
|
2
4
|
from rich.prompt import Confirm
|
|
3
5
|
from rich.traceback import install
|
|
@@ -17,16 +19,16 @@ def main() -> None:
|
|
|
17
19
|
tokens = Tokens.load()
|
|
18
20
|
with OpenAI(api_key=tokens.openai_api_key) as openai, TumblrSession(tokens) as tumblr:
|
|
19
21
|
if Confirm.ask("Download latest posts?", default=False):
|
|
20
|
-
PostDownloader(openai
|
|
22
|
+
PostDownloader(openai, tumblr).main()
|
|
21
23
|
|
|
22
|
-
examples_writer = ExamplesWriter(openai
|
|
24
|
+
examples_writer = ExamplesWriter(openai, tumblr)
|
|
23
25
|
if Confirm.ask("Create training data?", default=False):
|
|
24
26
|
examples_writer.main()
|
|
25
27
|
|
|
26
28
|
if Confirm.ask("Remove training data flagged by the OpenAI moderation? [bold]This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
|
|
27
29
|
examples_writer.filter_examples()
|
|
28
30
|
|
|
29
|
-
fine_tuner = FineTuner(openai
|
|
31
|
+
fine_tuner = FineTuner(openai, tumblr)
|
|
30
32
|
fine_tuner.print_estimates()
|
|
31
33
|
|
|
32
34
|
message = "Resume monitoring the previous fine-tuning process?" if FlowClass.config.job_id else "Upload data to OpenAI for fine-tuning?"
|
|
@@ -34,4 +36,8 @@ def main() -> None:
|
|
|
34
36
|
fine_tuner.main()
|
|
35
37
|
|
|
36
38
|
if Confirm.ask("Generate drafts?", default=False):
|
|
37
|
-
DraftGenerator(openai
|
|
39
|
+
DraftGenerator(openai, tumblr).main()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
sys_exit(main())
|
tumblrbot/flow/examples.py
CHANGED
|
@@ -1,52 +1,45 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Generator
|
|
2
2
|
from itertools import batched
|
|
3
3
|
from json import loads
|
|
4
4
|
from math import ceil
|
|
5
|
-
from re import
|
|
6
|
-
from
|
|
5
|
+
from re import IGNORECASE
|
|
6
|
+
from re import compile as re_compile
|
|
7
|
+
from typing import TYPE_CHECKING, override
|
|
7
8
|
|
|
8
|
-
import
|
|
9
|
-
from
|
|
9
|
+
from openai import RateLimitError
|
|
10
|
+
from rich import print as rich_print
|
|
11
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_random_exponential
|
|
10
12
|
|
|
11
13
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
12
|
-
from tumblrbot.utils.models import Example, Post
|
|
14
|
+
from tumblrbot.utils.models import Example, Message, Post
|
|
13
15
|
|
|
14
16
|
if TYPE_CHECKING:
|
|
15
|
-
from collections.abc import Generator
|
|
17
|
+
from collections.abc import Generator, Iterable
|
|
16
18
|
from pathlib import Path
|
|
17
19
|
|
|
20
|
+
from openai._types import SequenceNotStr
|
|
21
|
+
from openai.types import ModerationCreateResponse, ModerationMultiModalInputParam
|
|
22
|
+
|
|
18
23
|
|
|
19
24
|
class ExamplesWriter(FlowClass):
|
|
20
25
|
@override
|
|
21
26
|
def main(self) -> None:
|
|
22
27
|
self.config.examples_file.parent.mkdir(parents=True, exist_ok=True)
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
for post in self.get_valid_posts():
|
|
33
|
-
self.write_example(
|
|
34
|
-
self.config.user_message,
|
|
35
|
-
str(post),
|
|
36
|
-
fp,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
rich.print(f"[bold]The examples file can be found at: '{self.config.examples_file}'\n")
|
|
40
|
-
|
|
41
|
-
def write_example(self, user_message: str, assistant_message: str, fp: IO[str]) -> None:
|
|
42
|
-
example = Example(
|
|
29
|
+
examples = [self.create_example(*prompt) for prompt in self.get_custom_prompts()]
|
|
30
|
+
examples.extend(self.create_example(self.config.user_message, str(post)) for post in self.get_valid_posts())
|
|
31
|
+
self.write_examples(examples)
|
|
32
|
+
|
|
33
|
+
rich_print(f"[bold]The examples file can be found at: '{self.config.examples_file}'\n")
|
|
34
|
+
|
|
35
|
+
def create_example(self, user_message: str, assistant_message: str) -> Example:
|
|
36
|
+
return Example(
|
|
43
37
|
messages=[
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
38
|
+
Message(role="developer", content=self.config.developer_message),
|
|
39
|
+
Message(role="user", content=user_message),
|
|
40
|
+
Message(role="assistant", content=assistant_message),
|
|
47
41
|
],
|
|
48
42
|
)
|
|
49
|
-
fp.write(f"{example.model_dump_json()}\n")
|
|
50
43
|
|
|
51
44
|
def get_custom_prompts(self) -> Generator[tuple[str, str]]:
|
|
52
45
|
self.config.custom_prompts_file.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -57,13 +50,19 @@ class ExamplesWriter(FlowClass):
|
|
|
57
50
|
data: dict[str, str] = loads(line)
|
|
58
51
|
yield from data.items()
|
|
59
52
|
|
|
53
|
+
# This function mostly exists to make writing examples atomic.
|
|
54
|
+
def write_examples(self, examples: Iterable[Example]) -> None:
|
|
55
|
+
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
56
|
+
for example in examples:
|
|
57
|
+
fp.write(f"{example.model_dump_json()}\n")
|
|
58
|
+
|
|
60
59
|
def get_valid_posts(self) -> Generator[Post]:
|
|
61
60
|
for path in self.get_data_paths():
|
|
62
61
|
posts = list(self.get_valid_posts_from_path(path))
|
|
63
62
|
yield from posts[-self.config.post_limit :]
|
|
64
63
|
|
|
65
64
|
def get_valid_posts_from_path(self, path: Path) -> Generator[Post]:
|
|
66
|
-
pattern =
|
|
65
|
+
pattern = re_compile("|".join(self.config.filtered_words), IGNORECASE)
|
|
67
66
|
with path.open("rb") as fp:
|
|
68
67
|
for line in fp:
|
|
69
68
|
post = Post.model_validate_json(line)
|
|
@@ -71,30 +70,28 @@ class ExamplesWriter(FlowClass):
|
|
|
71
70
|
yield post
|
|
72
71
|
|
|
73
72
|
def filter_examples(self) -> None:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
return int(match.group(1))
|
|
100
|
-
return self.config.max_moderation_batch_size
|
|
73
|
+
raw_examples = self.config.examples_file.read_bytes().splitlines()
|
|
74
|
+
old_examples = map(Example.model_validate_json, raw_examples)
|
|
75
|
+
new_examples: list[Example] = []
|
|
76
|
+
with PreviewLive() as live:
|
|
77
|
+
for batch in live.progress.track(
|
|
78
|
+
batched(old_examples, self.config.moderation_batch_size, strict=False),
|
|
79
|
+
ceil(len(raw_examples) / self.config.moderation_batch_size),
|
|
80
|
+
description="Removing flagged posts...",
|
|
81
|
+
):
|
|
82
|
+
response = self.create_moderation_batch(tuple(map(Example.get_assistant_message, batch)))
|
|
83
|
+
new_examples.extend(example for example, moderation in zip(batch, response.results, strict=True) if not moderation.flagged)
|
|
84
|
+
|
|
85
|
+
self.write_examples(new_examples)
|
|
86
|
+
|
|
87
|
+
rich_print(f"[red]Removed {len(raw_examples) - len(new_examples)} posts.\n")
|
|
88
|
+
|
|
89
|
+
@retry(
|
|
90
|
+
stop=stop_after_attempt(10),
|
|
91
|
+
wait=wait_random_exponential(),
|
|
92
|
+
retry=retry_if_exception_type(RateLimitError),
|
|
93
|
+
before_sleep=lambda state: rich_print(f"[yellow]OpenAI rate limit exceeded. Waiting for {state.idle_for} seconds..."),
|
|
94
|
+
reraise=True,
|
|
95
|
+
)
|
|
96
|
+
def create_moderation_batch(self, api_input: str | SequenceNotStr[str] | Iterable[ModerationMultiModalInputParam]) -> ModerationCreateResponse:
|
|
97
|
+
return self.openai.moderations.create(input=api_input)
|
tumblrbot/flow/fine_tune.py
CHANGED
|
@@ -3,9 +3,9 @@ from textwrap import dedent
|
|
|
3
3
|
from time import sleep
|
|
4
4
|
from typing import TYPE_CHECKING, override
|
|
5
5
|
|
|
6
|
-
import
|
|
7
|
-
from rich import progress
|
|
6
|
+
from rich import print as rich_print
|
|
8
7
|
from rich.console import Console
|
|
8
|
+
from rich.progress import open as progress_open
|
|
9
9
|
from rich.prompt import Confirm
|
|
10
10
|
from tiktoken import encoding_for_model, get_encoding
|
|
11
11
|
|
|
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
|
|
|
21
21
|
class FineTuner(FlowClass):
|
|
22
22
|
@staticmethod
|
|
23
23
|
def dedent_print(text: str) -> None:
|
|
24
|
-
|
|
24
|
+
rich_print(dedent(text).lstrip())
|
|
25
25
|
|
|
26
26
|
@override
|
|
27
27
|
def main(self) -> None:
|
|
@@ -55,12 +55,12 @@ class FineTuner(FlowClass):
|
|
|
55
55
|
if self.config.job_id:
|
|
56
56
|
return self.poll_job_status()
|
|
57
57
|
|
|
58
|
-
with
|
|
58
|
+
with progress_open(self.config.examples_file, "rb", description=f"Uploading [purple]{self.config.examples_file}[/]...") as fp:
|
|
59
59
|
file = self.openai.files.create(
|
|
60
60
|
file=fp,
|
|
61
61
|
purpose="fine-tune",
|
|
62
62
|
)
|
|
63
|
-
|
|
63
|
+
rich_print()
|
|
64
64
|
|
|
65
65
|
job = self.openai.fine_tuning.jobs.create(
|
|
66
66
|
model=self.config.base_model,
|
|
@@ -96,7 +96,7 @@ class FineTuner(FlowClass):
|
|
|
96
96
|
if job.status != "succeeded":
|
|
97
97
|
if Confirm.ask("[gray62]Delete uploaded examples file?", default=False):
|
|
98
98
|
self.openai.files.delete(job.training_file)
|
|
99
|
-
|
|
99
|
+
rich_print()
|
|
100
100
|
|
|
101
101
|
if job.status == "failed" and job.error is not None:
|
|
102
102
|
raise RuntimeError(job.error.message)
|
tumblrbot/flow/generate.py
CHANGED
|
@@ -1,21 +1,20 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
1
2
|
from functools import cache
|
|
2
3
|
from random import choice, random, sample
|
|
3
4
|
from typing import TYPE_CHECKING, override
|
|
4
5
|
|
|
5
|
-
import
|
|
6
|
-
from pydantic import ConfigDict
|
|
6
|
+
from rich import print as rich_print
|
|
7
7
|
from rich.prompt import IntPrompt
|
|
8
8
|
|
|
9
9
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
10
|
-
from tumblrbot.utils.models import Post
|
|
10
|
+
from tumblrbot.utils.models import Block, Post
|
|
11
11
|
|
|
12
12
|
if TYPE_CHECKING:
|
|
13
13
|
from collections.abc import Iterable
|
|
14
14
|
|
|
15
15
|
|
|
16
|
+
@dataclass(frozen=True)
|
|
16
17
|
class DraftGenerator(FlowClass):
|
|
17
|
-
model_config = ConfigDict(frozen=True) # Makes this class hashable.
|
|
18
|
-
|
|
19
18
|
@override
|
|
20
19
|
def main(self) -> None:
|
|
21
20
|
self.config.draft_count = IntPrompt.ask("How many drafts should be generated?", default=self.config.draft_count)
|
|
@@ -32,7 +31,7 @@ class DraftGenerator(FlowClass):
|
|
|
32
31
|
exception.add_note(f"📉 An error occurred! Generated {i} draft(s) before failing. {message}")
|
|
33
32
|
raise
|
|
34
33
|
|
|
35
|
-
|
|
34
|
+
rich_print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
|
|
36
35
|
|
|
37
36
|
def generate_post(self) -> Post:
|
|
38
37
|
if original := self.get_random_post():
|
|
@@ -48,7 +47,7 @@ class DraftGenerator(FlowClass):
|
|
|
48
47
|
tags = tags.tags
|
|
49
48
|
|
|
50
49
|
return Post(
|
|
51
|
-
content=[
|
|
50
|
+
content=[Block(text=text)],
|
|
52
51
|
tags=tags or [],
|
|
53
52
|
parent_tumblelog_uuid=original.blog.uuid,
|
|
54
53
|
parent_post_id=original.id,
|
tumblrbot/utils/common.py
CHANGED
|
@@ -1,23 +1,25 @@
|
|
|
1
1
|
from abc import abstractmethod
|
|
2
|
-
from
|
|
2
|
+
from dataclasses import dataclass
|
|
3
3
|
from random import choice
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
5
5
|
|
|
6
|
-
from openai import OpenAI
|
|
7
|
-
from pydantic import ConfigDict
|
|
6
|
+
from openai import OpenAI # noqa: TC002
|
|
8
7
|
from rich._spinners import SPINNERS
|
|
9
|
-
from rich.console import RenderableType
|
|
10
8
|
from rich.live import Live
|
|
11
9
|
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
|
|
12
10
|
from rich.table import Table
|
|
13
11
|
|
|
14
|
-
from tumblrbot.utils.models import Config
|
|
15
|
-
from tumblrbot.utils.tumblr import TumblrSession
|
|
12
|
+
from tumblrbot.utils.models import Config
|
|
13
|
+
from tumblrbot.utils.tumblr import TumblrSession # noqa: TC001
|
|
16
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pathlib import Path
|
|
17
17
|
|
|
18
|
-
|
|
19
|
-
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
18
|
+
from rich.console import RenderableType
|
|
20
19
|
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class FlowClass:
|
|
21
23
|
config: ClassVar = Config.load()
|
|
22
24
|
|
|
23
25
|
openai: OpenAI
|
|
@@ -48,11 +50,6 @@ class PreviewLive(Live):
|
|
|
48
50
|
|
|
49
51
|
self.custom_update()
|
|
50
52
|
|
|
51
|
-
@override
|
|
52
|
-
def __enter__(self) -> Self:
|
|
53
|
-
super().__enter__()
|
|
54
|
-
return self
|
|
55
|
-
|
|
56
53
|
def custom_update(self, *renderables: RenderableType | None) -> None:
|
|
57
54
|
table = Table.grid()
|
|
58
55
|
table.add_row(self.progress)
|
tumblrbot/utils/models.py
CHANGED
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
from collections.abc import Generator
|
|
2
1
|
from getpass import getpass
|
|
3
2
|
from pathlib import Path
|
|
4
|
-
from
|
|
3
|
+
from tomllib import loads
|
|
4
|
+
from typing import TYPE_CHECKING, Annotated, Any, Literal, Self, override
|
|
5
5
|
|
|
6
|
-
import
|
|
7
|
-
from openai.types import ChatModel
|
|
6
|
+
from openai.types import ChatModel # noqa: TC002
|
|
8
7
|
from pydantic import BaseModel, ConfigDict, Field, NonNegativeFloat, NonNegativeInt, PlainSerializer, PositiveFloat, PositiveInt, model_validator
|
|
9
|
-
from pydantic.json_schema import SkipJsonSchema
|
|
8
|
+
from pydantic.json_schema import SkipJsonSchema # noqa: TC002
|
|
10
9
|
from requests_oauthlib import OAuth1Session
|
|
10
|
+
from rich import print as rich_print
|
|
11
11
|
from rich.panel import Panel
|
|
12
12
|
from rich.prompt import Prompt
|
|
13
13
|
from tomlkit import comment, document, dumps # pyright: ignore[reportUnknownVariableType]
|
|
14
|
-
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Generator
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
class FullyValidatedModel(BaseModel):
|
|
@@ -58,7 +60,7 @@ class Config(FileSyncSettings):
|
|
|
58
60
|
|
|
59
61
|
# Writing Examples
|
|
60
62
|
post_limit: NonNegativeInt = Field(0, description="The number of the most recent posts from each blog that should be included in the training data.")
|
|
61
|
-
|
|
63
|
+
moderation_batch_size: PositiveInt = Field(25, description="The number of posts at a time to submit to the OpenAI moderation API.")
|
|
62
64
|
custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
|
|
63
65
|
filtered_words: list[str] = Field([], description="A case-insensitive list of disallowed words used to filter out training data. Regular expressions are allowed, but must be escaped.")
|
|
64
66
|
|
|
@@ -80,7 +82,7 @@ class Config(FileSyncSettings):
|
|
|
80
82
|
|
|
81
83
|
# Generating
|
|
82
84
|
upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
|
|
83
|
-
draft_count: PositiveInt = Field(
|
|
85
|
+
draft_count: PositiveInt = Field(100, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
|
|
84
86
|
tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will use more OpenAI tokens.")
|
|
85
87
|
tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
|
|
86
88
|
reblog_blog_identifiers: list[str] = Field([], description="The identifiers of blogs that can be reblogged from when generating drafts.")
|
|
@@ -88,13 +90,15 @@ class Config(FileSyncSettings):
|
|
|
88
90
|
reblog_user_message: str = Field("Please write a comical Tumblr post in response to the following post:\n\n{}", description="The format string for the user message used to reblog posts.")
|
|
89
91
|
|
|
90
92
|
@override
|
|
91
|
-
def model_post_init(self,
|
|
93
|
+
def model_post_init(self, context: object) -> None:
|
|
94
|
+
super().model_post_init(context)
|
|
95
|
+
|
|
92
96
|
if not self.download_blog_identifiers:
|
|
93
|
-
|
|
97
|
+
rich_print("Enter the [cyan]identifiers of your blogs[/] that data should be [bold purple]downloaded[/] from, separated by commas.")
|
|
94
98
|
self.download_blog_identifiers = list(map(str.strip, Prompt.ask("[bold][Example] [dim]staff.tumblr.com,changes").split(",")))
|
|
95
99
|
|
|
96
100
|
if not self.upload_blog_identifier:
|
|
97
|
-
|
|
101
|
+
rich_print("Enter the [cyan]identifier of your blog[/] that drafts should be [bold purple]uploaded[/] to.")
|
|
98
102
|
self.upload_blog_identifier = Prompt.ask("[bold][Example] [dim]staff.tumblr.com or changes").strip()
|
|
99
103
|
|
|
100
104
|
|
|
@@ -108,8 +112,20 @@ class Tokens(FileSyncSettings):
|
|
|
108
112
|
openai_api_key: str = ""
|
|
109
113
|
tumblr: Tumblr = Tumblr()
|
|
110
114
|
|
|
115
|
+
@staticmethod
|
|
116
|
+
def online_token_prompt(url: str, *tokens: str) -> Generator[str]:
|
|
117
|
+
formatted_token_string = " and ".join(f"[cyan]{token}[/]" for token in tokens)
|
|
118
|
+
|
|
119
|
+
rich_print(f"Retrieve your {formatted_token_string} from: {url}")
|
|
120
|
+
for token in tokens:
|
|
121
|
+
yield getpass(f"Enter your {token} (masked): ", echo_char="*").strip()
|
|
122
|
+
|
|
123
|
+
rich_print()
|
|
124
|
+
|
|
111
125
|
@override
|
|
112
|
-
def model_post_init(self,
|
|
126
|
+
def model_post_init(self, context: object) -> None:
|
|
127
|
+
super().model_post_init(context)
|
|
128
|
+
|
|
113
129
|
# Check if any tokens are missing or if the user wants to reset them, then set tokens if necessary.
|
|
114
130
|
if not self.openai_api_key:
|
|
115
131
|
(self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
|
|
@@ -120,38 +136,18 @@ class Tokens(FileSyncSettings):
|
|
|
120
136
|
# This is the whole OAuth 1.0 process.
|
|
121
137
|
# https://requests-oauthlib.readthedocs.io/en/latest/examples/tumblr.html
|
|
122
138
|
# We tried setting up OAuth 2.0, but the token refresh process is far too unreliable for this sort of program.
|
|
123
|
-
with OAuth1Session(
|
|
124
|
-
|
|
125
|
-
self.tumblr.client_secret,
|
|
126
|
-
) as oauth_session:
|
|
127
|
-
fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token")
|
|
128
|
-
full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize")
|
|
129
|
-
(redirect_response,) = self.online_token_prompt(full_authorize_url, "full redirect URL")
|
|
130
|
-
oauth_response = oauth_session.parse_authorization_response(redirect_response)
|
|
131
|
-
|
|
132
|
-
with OAuth1Session(
|
|
133
|
-
self.tumblr.client_key,
|
|
134
|
-
self.tumblr.client_secret,
|
|
135
|
-
*self.get_oauth_tokens(fetch_response),
|
|
136
|
-
verifier=oauth_response["oauth_verifier"],
|
|
137
|
-
) as oauth_session:
|
|
138
|
-
oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token")
|
|
139
|
-
|
|
140
|
-
self.tumblr.resource_owner_key, self.tumblr.resource_owner_secret = self.get_oauth_tokens(oauth_tokens)
|
|
139
|
+
with OAuth1Session(**self.tumblr.model_dump()) as session:
|
|
140
|
+
session.fetch_request_token("http://tumblr.com/oauth/request_token") # pyright: ignore[reportUnknownMemberType]
|
|
141
141
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
rich_print("Open the link below in your browser, and authorize this application.\nAfter authorizing, copy and paste the URL of the page you are redirected to below.")
|
|
143
|
+
authorization_url = session.authorization_url("http://tumblr.com/oauth/authorize") # pyright: ignore[reportUnknownMemberType]
|
|
144
|
+
(authorization_response,) = self.online_token_prompt(authorization_url, "full redirect URL")
|
|
145
|
+
session.parse_authorization_response(authorization_response)
|
|
145
146
|
|
|
146
|
-
|
|
147
|
-
for token in tokens:
|
|
148
|
-
yield getpass(f"Enter your {token} (masked): ", echo_char="*").strip()
|
|
147
|
+
access_token = session.fetch_access_token("http://tumblr.com/oauth/access_token") # pyright: ignore[reportUnknownMemberType]
|
|
149
148
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
@staticmethod
|
|
153
|
-
def get_oauth_tokens(token: dict[str, str]) -> tuple[str, str]:
|
|
154
|
-
return token["oauth_token"], token["oauth_token_secret"]
|
|
149
|
+
self.tumblr.resource_owner_key = access_token["oauth_token"]
|
|
150
|
+
self.tumblr.resource_owner_secret = access_token["oauth_token_secret"]
|
|
155
151
|
|
|
156
152
|
|
|
157
153
|
class Blog(FullyValidatedModel):
|
|
@@ -168,25 +164,26 @@ class ResponseModel(FullyValidatedModel):
|
|
|
168
164
|
response: Response
|
|
169
165
|
|
|
170
166
|
|
|
167
|
+
class Block(FullyValidatedModel):
|
|
168
|
+
type: str = "text"
|
|
169
|
+
text: str = ""
|
|
170
|
+
blocks: list[int] = []
|
|
171
|
+
|
|
172
|
+
|
|
171
173
|
class Post(FullyValidatedModel):
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
parent_tumblelog_uuid: SkipJsonSchema[str] = ""
|
|
180
|
-
parent_post_id: SkipJsonSchema[int] = 0
|
|
181
|
-
reblog_key: SkipJsonSchema[str] = ""
|
|
182
|
-
|
|
183
|
-
timestamp: SkipJsonSchema[int] = 0
|
|
174
|
+
blog: Blog = Blog()
|
|
175
|
+
id: int = 0
|
|
176
|
+
parent_tumblelog_uuid: str = ""
|
|
177
|
+
parent_post_id: int = 0
|
|
178
|
+
reblog_key: str = ""
|
|
179
|
+
|
|
180
|
+
timestamp: int = 0
|
|
184
181
|
tags: Annotated[list[str], PlainSerializer(",".join)] = []
|
|
185
|
-
state:
|
|
182
|
+
state: Literal["published", "queued", "draft", "private", "unapproved"] = "draft"
|
|
186
183
|
|
|
187
|
-
content:
|
|
188
|
-
layout:
|
|
189
|
-
trail:
|
|
184
|
+
content: list[Block] = []
|
|
185
|
+
layout: list[Block] = []
|
|
186
|
+
trail: list[Self] = []
|
|
190
187
|
|
|
191
188
|
is_submission: SkipJsonSchema[bool] = False
|
|
192
189
|
|
|
@@ -212,9 +209,17 @@ class Post(FullyValidatedModel):
|
|
|
212
209
|
return bool(self.content) and all(block.type == "text" for block in self.content) and not (self.is_submission or any(block.type == "ask" for block in self.layout))
|
|
213
210
|
|
|
214
211
|
|
|
215
|
-
class
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
content: str
|
|
212
|
+
class Message(FullyValidatedModel):
|
|
213
|
+
role: Literal["developer", "user", "assistant"]
|
|
214
|
+
content: str
|
|
219
215
|
|
|
216
|
+
|
|
217
|
+
class Example(FullyValidatedModel):
|
|
220
218
|
messages: list[Message]
|
|
219
|
+
|
|
220
|
+
def get_assistant_message(self) -> str:
|
|
221
|
+
for message in self.messages:
|
|
222
|
+
if message.role == "assistant":
|
|
223
|
+
return message.content
|
|
224
|
+
msg = "Assistant message not found!"
|
|
225
|
+
raise ValueError(msg)
|
tumblrbot/utils/tumblr.py
CHANGED
|
@@ -1,31 +1,46 @@
|
|
|
1
|
-
from
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
from
|
|
1
|
+
from requests import HTTPError, Response, Session
|
|
2
|
+
from requests_oauthlib import OAuth1
|
|
3
|
+
from rich import print as rich_print
|
|
4
|
+
from tenacity import retry, retry_if_exception_message, stop_after_attempt, wait_random_exponential
|
|
5
5
|
|
|
6
6
|
from tumblrbot.utils.models import Post, ResponseModel, Tokens
|
|
7
7
|
|
|
8
|
+
rate_limit_retry = retry(
|
|
9
|
+
stop=stop_after_attempt(10),
|
|
10
|
+
wait=wait_random_exponential(min=60),
|
|
11
|
+
retry=retry_if_exception_message(match="429 Client Error: Limit Exceeded for url: .+"),
|
|
12
|
+
before_sleep=lambda state: rich_print(f"[yellow]Tumblr rate limit exceeded. Waiting for {state.idle_for} seconds..."),
|
|
13
|
+
reraise=True,
|
|
14
|
+
)
|
|
15
|
+
|
|
8
16
|
|
|
9
|
-
class TumblrSession(
|
|
17
|
+
class TumblrSession(Session):
|
|
10
18
|
def __init__(self, tokens: Tokens) -> None:
|
|
11
|
-
super().__init__(
|
|
19
|
+
super().__init__()
|
|
20
|
+
self.auth = OAuth1(**tokens.tumblr.model_dump())
|
|
12
21
|
self.hooks["response"].append(self.response_hook)
|
|
13
22
|
|
|
14
|
-
|
|
15
|
-
super().__enter__()
|
|
16
|
-
return self
|
|
23
|
+
self.api_key = tokens.tumblr.client_key
|
|
17
24
|
|
|
18
25
|
def response_hook(self, response: Response, *_args: object, **_kwargs: object) -> None:
|
|
19
26
|
try:
|
|
20
27
|
response.raise_for_status()
|
|
21
28
|
except HTTPError as error:
|
|
22
|
-
|
|
29
|
+
for error_msg in response.json()["errors"]:
|
|
30
|
+
error.add_note(f"{error_msg['code']}: {error_msg['detail']}")
|
|
23
31
|
raise
|
|
24
32
|
|
|
33
|
+
@rate_limit_retry
|
|
25
34
|
def retrieve_blog_info(self, blog_identifier: str) -> ResponseModel:
|
|
26
|
-
response = self.get(
|
|
35
|
+
response = self.get(
|
|
36
|
+
f"https://api.tumblr.com/v2/blog/{blog_identifier}/info",
|
|
37
|
+
params={
|
|
38
|
+
"api_key": self.api_key,
|
|
39
|
+
},
|
|
40
|
+
)
|
|
27
41
|
return ResponseModel.model_validate_json(response.text)
|
|
28
42
|
|
|
43
|
+
@rate_limit_retry
|
|
29
44
|
def retrieve_published_posts(
|
|
30
45
|
self,
|
|
31
46
|
blog_identifier: str,
|
|
@@ -35,6 +50,7 @@ class TumblrSession(OAuth1Session):
|
|
|
35
50
|
response = self.get(
|
|
36
51
|
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
37
52
|
params={
|
|
53
|
+
"api_key": self.api_key,
|
|
38
54
|
"offset": offset,
|
|
39
55
|
"after": after,
|
|
40
56
|
"sort": "asc",
|
|
@@ -43,6 +59,7 @@ class TumblrSession(OAuth1Session):
|
|
|
43
59
|
)
|
|
44
60
|
return ResponseModel.model_validate_json(response.text)
|
|
45
61
|
|
|
62
|
+
@rate_limit_retry
|
|
46
63
|
def create_post(self, blog_identifier: str, post: Post) -> ResponseModel:
|
|
47
64
|
response = self.post(
|
|
48
65
|
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tumblrbot
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.7
|
|
4
4
|
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
5
|
Requires-Python: >= 3.14
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist: click
|
|
8
7
|
Requires-Dist: openai
|
|
9
8
|
Requires-Dist: pydantic
|
|
10
9
|
Requires-Dist: requests
|
|
11
10
|
Requires-Dist: requests-oauthlib
|
|
12
11
|
Requires-Dist: rich
|
|
12
|
+
Requires-Dist: tenacity
|
|
13
13
|
Requires-Dist: tiktoken
|
|
14
14
|
Requires-Dist: tomlkit
|
|
15
|
+
Requires-Dist: pyinstaller ; extra == "dev"
|
|
15
16
|
Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
|
|
16
17
|
Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
18
|
+
Provides-Extra: dev
|
|
17
19
|
|
|
18
20
|
# tumblrbot
|
|
19
21
|
|
|
@@ -38,7 +40,6 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
|
38
40
|
[Tumblr]: https://tumblr.com
|
|
39
41
|
[Tumblr Tokens]: https://tumblr.com/oauth/apps
|
|
40
42
|
[Tumblr API Documentation on Blog Identifiers]: https://tumblr.com/docs/en/api/v2#blog-identifiers
|
|
41
|
-
[Tumblr API Documentation on Rate Limits]: https://tumblr.com/docs/en/api/v2#rate-limits
|
|
42
43
|
|
|
43
44
|
[Format String]: https://docs.python.org/3/library/string.html#format-string-syntax
|
|
44
45
|
|
|
@@ -84,10 +85,6 @@ Features:
|
|
|
84
85
|
- Colorful output, progress bars, and post previews using [rich].
|
|
85
86
|
- Automatically keeps the [config][configurable] file up-to-date and recreates it if missing (without overriding user settings).
|
|
86
87
|
|
|
87
|
-
**To-Do:**
|
|
88
|
-
|
|
89
|
-
- Add retry logic for rate limiting.
|
|
90
|
-
|
|
91
88
|
**Known Issues:**
|
|
92
89
|
|
|
93
90
|
- Fine-tuning can fail after the validation phase due to the examples file not passing [OpenAI] moderation checks. There are a few workarounds for this that can be tried in combination:
|
|
@@ -96,31 +93,39 @@ Features:
|
|
|
96
93
|
- You can use regular expressions to filter out training data in the [config][configurable]. This is more of a brute-force solution, but it can work if the other solutions do not.
|
|
97
94
|
- You can try limiting your dataset by specifying fewer blogs to download from or limiting the number of posts taken from each one in the [config][configurable].
|
|
98
95
|
- If all else fails, you can manually remove data from the examples file until it passes. It is unfortunately not a definitive resource, but it can help to read about what the [OpenAI moderation API flags][Flags].
|
|
99
|
-
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning]
|
|
100
|
-
-
|
|
101
|
-
-
|
|
102
|
-
- Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
|
|
96
|
+
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning]
|
|
97
|
+
- *We are unsure if this is still happening.*
|
|
98
|
+
- Post counts are incorrect when downloading posts. Our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers, so treat them as estimates.
|
|
103
99
|
|
|
104
100
|
**Please submit an issue or contact us for features you want added/reimplemented.**
|
|
105
101
|
|
|
106
|
-
## Installation
|
|
102
|
+
## Installation & Usage
|
|
103
|
+
|
|
104
|
+
### Downloadable Binary
|
|
105
|
+
|
|
106
|
+
| Pros | Cons |
|
|
107
|
+
| --- | --- |
|
|
108
|
+
| Easier to install | Harder to update |
|
|
109
|
+
| No risk of dependencies breaking | Dependencies may be older |
|
|
110
|
+
|
|
111
|
+
1. Download the latest release's [tumblrbot.exe].
|
|
112
|
+
1. Launch `tumblrbot.exe` in the install location.
|
|
113
|
+
|
|
114
|
+
### PyPi
|
|
115
|
+
|
|
116
|
+
| Pros | Cons |
|
|
117
|
+
| --- | --- |
|
|
118
|
+
| Easier to update | Harder to install |
|
|
119
|
+
| Dependencies may be newer | Dependencies may break |
|
|
107
120
|
|
|
108
121
|
1. Install the latest version of [Python]:
|
|
109
122
|
- Windows: `winget install python3`
|
|
110
123
|
- Linux (apt): `apt install python-pip`
|
|
111
124
|
- Linux (pacman): `pacman install python-pip`
|
|
112
125
|
1. Install the [pip] package: `pip install tumblrbot`
|
|
113
|
-
- Alternatively, you can install from this repository: `pip install git+https://github.com/
|
|
126
|
+
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidScientistIzutsumiMarin/tumblrbot.git`
|
|
114
127
|
- On Linux, you will have to make a virtual environment or use the flag to install packages system-wide.
|
|
115
|
-
|
|
116
|
-
### Alternative Installation for Windows
|
|
117
|
-
|
|
118
|
-
1. Download the latest release's [tumblrbot.exe].
|
|
119
|
-
1. Run the file directly, or add it to your path, and use it as normal.
|
|
120
|
-
|
|
121
|
-
## Usage
|
|
122
|
-
|
|
123
|
-
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
128
|
+
1. Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
124
129
|
|
|
125
130
|
## Obtaining Tokens
|
|
126
131
|
|
|
@@ -177,6 +182,7 @@ Specific Options:
|
|
|
177
182
|
To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
|
|
178
183
|
|
|
179
184
|
- **`post_limit`** - At most, this many valid posts will be included in the training data. This effectively is a filter to select the `N` most recent valid posts from each blog. `0` will use every available valid post.
|
|
185
|
+
- **`moderation_batch_size`** - This controls the batch size when submitting posts to the OpenAI moderation. There is no limit, but higher numbers will cause you to be rate-limited more, which can overall be slower. Low numbers reduce rate-limiting, but can sometimes take longer due to needing more requests. The best value will depend on your computer, internet connection, and any number of factors on OpenAI's side. The default value is just what worked best for our computer.
|
|
180
186
|
- **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so “the” will also filter out posts with “them” or “thematic”. This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with “\\\b”, i.e., “\\\bthe\\\b”. Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a “|” which is then used to search the post content for any matches.
|
|
181
187
|
- **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
|
|
182
188
|
- **`user_message`** - This setting is used and works in the same way as `developer_message`.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
tumblrbot/__main__.py,sha256=DYHfOs5oyOIT4c0cdEd4o0p7UPo39EnTMJHyOrsVWBg,1753
|
|
3
|
+
tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
tumblrbot/flow/download.py,sha256=r_4Bc_SNnIDbilT18YypBjJ4xhrg0AJAfvOezUL-p2c,2052
|
|
5
|
+
tumblrbot/flow/examples.py,sha256=30FVJQomjwpUBKeeAJDfdkr8ZcQlykoXZ6uL3tqxgOg,4465
|
|
6
|
+
tumblrbot/flow/fine_tune.py,sha256=KaC37_RNsbfAh1_ZdsiAELvRbYkJrrH8QPJPGuZtzOM,5483
|
|
7
|
+
tumblrbot/flow/generate.py,sha256=WQEG_85_nuRp91Le6lvZ0FeIWnBA1a012MlI3_YMq-8,4302
|
|
8
|
+
tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
tumblrbot/utils/common.py,sha256=4n3kcdgTP_HHjCg5Z7GjNyBZzBLUGrTu4fwxaOFYGt4,1676
|
|
10
|
+
tumblrbot/utils/models.py,sha256=s5Hm1AEEtnz4mi_apUlu2x4NuXeZtMAOH8DHSYgbv1E,11050
|
|
11
|
+
tumblrbot/utils/tumblr.py,sha256=HfaoRr0YikP0uJdnwCxdhyK5wUBOuvaC9unNWa1Erk8,2526
|
|
12
|
+
tumblrbot-1.9.7.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
|
|
13
|
+
tumblrbot-1.9.7.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
14
|
+
tumblrbot-1.9.7.dist-info/METADATA,sha256=s21DxgfmBLtlK03NSZktVB0UZWc6MUCZ8YHpFGM-MkU,14195
|
|
15
|
+
tumblrbot-1.9.7.dist-info/RECORD,,
|
tumblrbot-1.9.5.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
|
|
3
|
-
tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
tumblrbot/flow/download.py,sha256=r_4Bc_SNnIDbilT18YypBjJ4xhrg0AJAfvOezUL-p2c,2052
|
|
5
|
-
tumblrbot/flow/examples.py,sha256=fSgAat4VpTw_ALt_Ytp9TBzl6-RFeMQaK_jcMXF0rcQ,4124
|
|
6
|
-
tumblrbot/flow/fine_tune.py,sha256=LHpaoqHc9JnXNQzbJIjYO36hwy2l8A_e4IuLx7ke2MQ,5436
|
|
7
|
-
tumblrbot/flow/generate.py,sha256=-Q5ZSbfRGk3jQdE_73DjlI-iICIUxbJtIP463eChsHg,4337
|
|
8
|
-
tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
|
|
10
|
-
tumblrbot/utils/models.py,sha256=iBCY1NCnUvEgfsSR7Tes6ht7UYY6UIvnWFnZEtkB69I,11018
|
|
11
|
-
tumblrbot/utils/tumblr.py,sha256=vpLRX5cdhqmCaJdQbiGMOo9uOVhiC2CQF67BJ5u4fwU,1769
|
|
12
|
-
tumblrbot-1.9.5.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
|
|
13
|
-
tumblrbot-1.9.5.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
14
|
-
tumblrbot-1.9.5.dist-info/METADATA,sha256=OB1Ke61MPRdtsGxbd2Zi6pnwxqpSI61-Lai4vrVssaE,14126
|
|
15
|
-
tumblrbot-1.9.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|