tumblrbot 1.9.4__py3-none-any.whl → 1.9.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tumblrbot/flow/download.py +4 -2
- tumblrbot/flow/examples.py +5 -3
- tumblrbot/flow/fine_tune.py +7 -4
- tumblrbot/flow/generate.py +4 -2
- tumblrbot/utils/models.py +3 -3
- {tumblrbot-1.9.4.dist-info → tumblrbot-1.9.5.dist-info}/METADATA +22 -5
- tumblrbot-1.9.5.dist-info/RECORD +15 -0
- tumblrbot-1.9.4.dist-info/RECORD +0 -15
- {tumblrbot-1.9.4.dist-info → tumblrbot-1.9.5.dist-info}/WHEEL +0 -0
- {tumblrbot-1.9.4.dist-info → tumblrbot-1.9.5.dist-info}/entry_points.txt +0 -0
tumblrbot/flow/download.py
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
from io import TextIOBase
|
|
2
1
|
from json import dump
|
|
3
|
-
from typing import override
|
|
2
|
+
from typing import TYPE_CHECKING, override
|
|
4
3
|
|
|
5
4
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
6
5
|
from tumblrbot.utils.models import Post
|
|
7
6
|
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from io import TextIOBase
|
|
9
|
+
|
|
8
10
|
|
|
9
11
|
class PostDownloader(FlowClass):
|
|
10
12
|
@override
|
tumblrbot/flow/examples.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
import re
|
|
2
|
-
from collections.abc import Generator
|
|
3
2
|
from itertools import batched
|
|
4
3
|
from json import loads
|
|
5
4
|
from math import ceil
|
|
6
|
-
from pathlib import Path
|
|
7
5
|
from re import search
|
|
8
|
-
from typing import IO, override
|
|
6
|
+
from typing import IO, TYPE_CHECKING, override
|
|
9
7
|
|
|
10
8
|
import rich
|
|
11
9
|
from openai import BadRequestError
|
|
@@ -13,6 +11,10 @@ from openai import BadRequestError
|
|
|
13
11
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
14
12
|
from tumblrbot.utils.models import Example, Post
|
|
15
13
|
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from collections.abc import Generator
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
16
18
|
|
|
17
19
|
class ExamplesWriter(FlowClass):
|
|
18
20
|
@override
|
tumblrbot/flow/fine_tune.py
CHANGED
|
@@ -1,11 +1,9 @@
|
|
|
1
|
-
from collections.abc import Generator
|
|
2
1
|
from datetime import datetime
|
|
3
2
|
from textwrap import dedent
|
|
4
3
|
from time import sleep
|
|
5
|
-
from typing import override
|
|
4
|
+
from typing import TYPE_CHECKING, override
|
|
6
5
|
|
|
7
6
|
import rich
|
|
8
|
-
from openai.types.fine_tuning import FineTuningJob
|
|
9
7
|
from rich import progress
|
|
10
8
|
from rich.console import Console
|
|
11
9
|
from rich.prompt import Confirm
|
|
@@ -14,6 +12,11 @@ from tiktoken import encoding_for_model, get_encoding
|
|
|
14
12
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
15
13
|
from tumblrbot.utils.models import Example
|
|
16
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from collections.abc import Generator
|
|
17
|
+
|
|
18
|
+
from openai.types.fine_tuning import FineTuningJob
|
|
19
|
+
|
|
17
20
|
|
|
18
21
|
class FineTuner(FlowClass):
|
|
19
22
|
@staticmethod
|
|
@@ -131,4 +134,4 @@ class FineTuner(FlowClass):
|
|
|
131
134
|
yield 4 + len(encoding.encode(message.content))
|
|
132
135
|
|
|
133
136
|
def get_cost_string(self, total_tokens: int) -> str:
|
|
134
|
-
return f"
|
|
137
|
+
return f"{self.config.token_price / 1000000 * total_tokens:.2f} USD"
|
tumblrbot/flow/generate.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
from collections.abc import Iterable
|
|
2
1
|
from functools import cache
|
|
3
2
|
from random import choice, random, sample
|
|
4
|
-
from typing import override
|
|
3
|
+
from typing import TYPE_CHECKING, override
|
|
5
4
|
|
|
6
5
|
import rich
|
|
7
6
|
from pydantic import ConfigDict
|
|
@@ -10,6 +9,9 @@ from rich.prompt import IntPrompt
|
|
|
10
9
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
11
10
|
from tumblrbot.utils.models import Post
|
|
12
11
|
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from collections.abc import Iterable
|
|
14
|
+
|
|
13
15
|
|
|
14
16
|
class DraftGenerator(FlowClass):
|
|
15
17
|
model_config = ConfigDict(frozen=True) # Makes this class hashable.
|
tumblrbot/utils/models.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
from collections.abc import Generator
|
|
2
|
+
from getpass import getpass
|
|
2
3
|
from pathlib import Path
|
|
3
|
-
from tomllib import loads
|
|
4
4
|
from typing import Annotated, Any, Literal, Self, override
|
|
5
5
|
|
|
6
6
|
import rich
|
|
7
7
|
from openai.types import ChatModel
|
|
8
|
-
from pwinput import pwinput
|
|
9
8
|
from pydantic import BaseModel, ConfigDict, Field, NonNegativeFloat, NonNegativeInt, PlainSerializer, PositiveFloat, PositiveInt, model_validator
|
|
10
9
|
from pydantic.json_schema import SkipJsonSchema
|
|
11
10
|
from requests_oauthlib import OAuth1Session
|
|
12
11
|
from rich.panel import Panel
|
|
13
12
|
from rich.prompt import Prompt
|
|
14
13
|
from tomlkit import comment, document, dumps # pyright: ignore[reportUnknownVariableType]
|
|
14
|
+
from tomllib import loads
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
class FullyValidatedModel(BaseModel):
|
|
@@ -145,7 +145,7 @@ class Tokens(FileSyncSettings):
|
|
|
145
145
|
|
|
146
146
|
rich.print(f"Retrieve your {formatted_token_string} from: {url}")
|
|
147
147
|
for token in tokens:
|
|
148
|
-
yield
|
|
148
|
+
yield getpass(f"Enter your {token} (masked): ", echo_char="*").strip()
|
|
149
149
|
|
|
150
150
|
rich.print()
|
|
151
151
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tumblrbot
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.5
|
|
4
4
|
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
|
-
Requires-Python: >= 3.
|
|
5
|
+
Requires-Python: >= 3.14
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
7
|
Requires-Dist: click
|
|
8
8
|
Requires-Dist: openai
|
|
9
|
-
Requires-Dist: pwinput
|
|
10
9
|
Requires-Dist: pydantic
|
|
11
10
|
Requires-Dist: requests
|
|
12
11
|
Requires-Dist: requests-oauthlib
|
|
@@ -18,6 +17,8 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
|
18
17
|
|
|
19
18
|
# tumblrbot
|
|
20
19
|
|
|
20
|
+
[tumblrbot.exe]: https://github.com/MaidScientistIzutsumiMarin/tumblrbot/releases/latest/download/tumblrbot.exe
|
|
21
|
+
|
|
21
22
|
[OAuth]: https://oauth.net/1
|
|
22
23
|
[Python]: https://python.org/download
|
|
23
24
|
|
|
@@ -31,6 +32,7 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
|
31
32
|
[OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
|
|
32
33
|
[OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
|
|
33
34
|
[OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
|
|
35
|
+
[Flags]: https://platform.openai.com/docs/guides/moderation/over#content-classifications
|
|
34
36
|
[Fine-Tuning Portal]: https://platform.openai.com/finetune
|
|
35
37
|
|
|
36
38
|
[Tumblr]: https://tumblr.com
|
|
@@ -82,11 +84,21 @@ Features:
|
|
|
82
84
|
- Colorful output, progress bars, and post previews using [rich].
|
|
83
85
|
- Automatically keeps the [config][configurable] file up-to-date and recreates it if missing (without overriding user settings).
|
|
84
86
|
|
|
87
|
+
**To-Do:**
|
|
88
|
+
|
|
89
|
+
- Add retry logic for rate limiting.
|
|
90
|
+
|
|
85
91
|
**Known Issues:**
|
|
86
92
|
|
|
93
|
+
- Fine-tuning can fail after the validation phase due to the examples file not passing [OpenAI] moderation checks. There are a few workarounds for this that can be tried in combination:
|
|
94
|
+
- You can retry with the same examples file. This has, on rare occasions, worked.
|
|
95
|
+
- You can submit the examples file to the [OpenAI] moderation API with this program's guided prompts. This has worked consistently for our dataset, but others have reported it not being thorough enough.
|
|
96
|
+
- You can use regular expressions to filter out training data in the [config][configurable]. This is more of a brute-force solution, but it can work if the other solutions do not.
|
|
97
|
+
- You can try limiting your dataset by specifying fewer blogs to download from or limiting the number of posts taken from each one in the [config][configurable].
|
|
98
|
+
- If all else fails, you can manually remove data from the examples file until it passes. It is unfortunately not a definitive resource, but it can help to read about what the [OpenAI moderation API flags][Flags].
|
|
87
99
|
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
|
|
88
100
|
- Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
|
|
89
|
-
- During post downloading or post generation, you may receive a
|
|
101
|
+
- During post downloading or post generation, you may receive a “Limit Exceeded” error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
|
|
90
102
|
- Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
|
|
91
103
|
|
|
92
104
|
**Please submit an issue or contact us for features you want added/reimplemented.**
|
|
@@ -101,6 +113,11 @@ Features:
|
|
|
101
113
|
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidThatPrograms/tumblrbot.git`
|
|
102
114
|
- On Linux, you will have to make a virtual environment or use the flag to install packages system-wide.
|
|
103
115
|
|
|
116
|
+
### Alternative Installation for Windows
|
|
117
|
+
|
|
118
|
+
1. Download the latest release's [tumblrbot.exe].
|
|
119
|
+
1. Run the file directly, or add it to your path, and use it as normal.
|
|
120
|
+
|
|
104
121
|
## Usage
|
|
105
122
|
|
|
106
123
|
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
@@ -160,7 +177,7 @@ Specific Options:
|
|
|
160
177
|
To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
|
|
161
178
|
|
|
162
179
|
- **`post_limit`** - At most, this many valid posts will be included in the training data. This effectively is a filter to select the `N` most recent valid posts from each blog. `0` will use every available valid post.
|
|
163
|
-
- **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so
|
|
180
|
+
- **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so “the” will also filter out posts with “them” or “thematic”. This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with “\\\b”, i.e., “\\\bthe\\\b”. Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a “|” which is then used to search the post content for any matches.
|
|
164
181
|
- **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
|
|
165
182
|
- **`user_message`** - This setting is used and works in the same way as `developer_message`.
|
|
166
183
|
- **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
|
|
3
|
+
tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
tumblrbot/flow/download.py,sha256=r_4Bc_SNnIDbilT18YypBjJ4xhrg0AJAfvOezUL-p2c,2052
|
|
5
|
+
tumblrbot/flow/examples.py,sha256=fSgAat4VpTw_ALt_Ytp9TBzl6-RFeMQaK_jcMXF0rcQ,4124
|
|
6
|
+
tumblrbot/flow/fine_tune.py,sha256=LHpaoqHc9JnXNQzbJIjYO36hwy2l8A_e4IuLx7ke2MQ,5436
|
|
7
|
+
tumblrbot/flow/generate.py,sha256=-Q5ZSbfRGk3jQdE_73DjlI-iICIUxbJtIP463eChsHg,4337
|
|
8
|
+
tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
|
|
10
|
+
tumblrbot/utils/models.py,sha256=iBCY1NCnUvEgfsSR7Tes6ht7UYY6UIvnWFnZEtkB69I,11018
|
|
11
|
+
tumblrbot/utils/tumblr.py,sha256=vpLRX5cdhqmCaJdQbiGMOo9uOVhiC2CQF67BJ5u4fwU,1769
|
|
12
|
+
tumblrbot-1.9.5.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
|
|
13
|
+
tumblrbot-1.9.5.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
14
|
+
tumblrbot-1.9.5.dist-info/METADATA,sha256=OB1Ke61MPRdtsGxbd2Zi6pnwxqpSI61-Lai4vrVssaE,14126
|
|
15
|
+
tumblrbot-1.9.5.dist-info/RECORD,,
|
tumblrbot-1.9.4.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
tumblrbot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
tumblrbot/__main__.py,sha256=XWSbOmI_y2MJVU9xpkgA-0zaF3HNwR5uF6_BZqtCQWY,1719
|
|
3
|
-
tumblrbot/flow/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
tumblrbot/flow/download.py,sha256=PUs7eM-1kGOb2RKijy3lW0zyvfFDwbxzTGhVghrWIhc,2012
|
|
5
|
-
tumblrbot/flow/examples.py,sha256=MLj51kEZ3AWvP47V4mR9UKUUciKoNQJEvXhr5jxSgfs,4080
|
|
6
|
-
tumblrbot/flow/fine_tune.py,sha256=YDukEwZNw3GveEAH4ORv6oylka5MQNLK_4iSmuAVPtg,5387
|
|
7
|
-
tumblrbot/flow/generate.py,sha256=b5Yfo_k9LoQWvHEJEAWWARshpxqY1rsInFrAuqJ8Qbw,4297
|
|
8
|
-
tumblrbot/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
tumblrbot/utils/common.py,sha256=RvICPORtBSqsN7VWADgStogJ8w4owzBfR1E2XbCQrfA,1795
|
|
10
|
-
tumblrbot/utils/models.py,sha256=WdLqX-og8h4vZurtuyZ05ph4sssP16eny4GXvFkkijM,11003
|
|
11
|
-
tumblrbot/utils/tumblr.py,sha256=vpLRX5cdhqmCaJdQbiGMOo9uOVhiC2CQF67BJ5u4fwU,1769
|
|
12
|
-
tumblrbot-1.9.4.dist-info/entry_points.txt,sha256=lTiN7PxAbyGY1fpCWApEw6NUIUgobfcOKhvn6cu3IQA,53
|
|
13
|
-
tumblrbot-1.9.4.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
|
14
|
-
tumblrbot-1.9.4.dist-info/METADATA,sha256=MBrQIyt48nWGQL4tAgV7a9hPr_sRw3DiGptTGkX-DHM,12666
|
|
15
|
-
tumblrbot-1.9.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|