tumblrbot 1.4.4__tar.gz → 1.4.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/.gitignore +2 -1
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/PKG-INFO +32 -14
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/README.md +30 -10
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/pyproject.toml +4 -6
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/__main__.py +7 -14
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/flow/download.py +3 -8
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/flow/examples.py +25 -42
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/flow/fine_tune.py +27 -10
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/flow/generate.py +10 -9
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/utils/common.py +16 -4
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/utils/config.py +5 -4
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/utils/models.py +25 -34
- tumblrbot-1.4.6/src/tumblrbot/utils/tumblr.py +40 -0
- tumblrbot-1.4.4/src/tumblrbot/utils/tumblr.py +0 -47
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/.github/dependabot.yml +0 -0
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/UNLICENSE +0 -0
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/__init__.py +0 -0
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/flow/__init__.py +0 -0
- {tumblrbot-1.4.4 → tumblrbot-1.4.6}/src/tumblrbot/utils/__init__.py +0 -0
|
@@ -1,26 +1,29 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tumblrbot
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.6
|
|
4
4
|
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
5
|
Requires-Python: >= 3.13
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist: httpx[http2]
|
|
8
7
|
Requires-Dist: keyring
|
|
9
|
-
Requires-Dist: more-itertools
|
|
10
|
-
Requires-Dist: niquests[speedups, http3]
|
|
11
8
|
Requires-Dist: openai
|
|
12
9
|
Requires-Dist: pwinput
|
|
13
10
|
Requires-Dist: pydantic
|
|
14
11
|
Requires-Dist: pydantic-settings
|
|
12
|
+
Requires-Dist: requests
|
|
15
13
|
Requires-Dist: requests-oauthlib
|
|
16
14
|
Requires-Dist: rich
|
|
17
15
|
Requires-Dist: tiktoken
|
|
18
16
|
Requires-Dist: tomlkit
|
|
19
17
|
Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
|
|
20
18
|
|
|
19
|
+
# tumblrbot
|
|
20
|
+
|
|
21
21
|
[OAuth]: https://oauth.net/1
|
|
22
22
|
[Python]: https://python.org/download
|
|
23
23
|
|
|
24
|
+
[JSON Lines]: https://jsonlines.org
|
|
25
|
+
[JSON Lines Validator]: https://jsonlines.org/validator
|
|
26
|
+
|
|
24
27
|
[pip]: https://pypi.org
|
|
25
28
|
[keyring]: https://pypi.org/project/keyring
|
|
26
29
|
[Rich]: https://pypi.org/project/rich
|
|
@@ -42,8 +45,6 @@ Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
|
|
|
42
45
|
|
|
43
46
|
[Config]: #configuration
|
|
44
47
|
[Fine-Tuning]: #manual-fine-tuning
|
|
45
|
-
|
|
46
|
-
# tumblrbot
|
|
47
48
|
[](https://python.org/pypi/tumblrbot)
|
|
48
49
|
|
|
49
50
|
Description of original project:
|
|
@@ -52,6 +53,7 @@ Description of original project:
|
|
|
52
53
|
This fork is largely a rewrite of the source code with similarities in its structure and process.
|
|
53
54
|
|
|
54
55
|
Features:
|
|
56
|
+
|
|
55
57
|
- An [interactive console][Main] for all steps of generating posts for the blog:
|
|
56
58
|
1. Asks for [OpenAI] and [Tumblr] tokens.
|
|
57
59
|
- Stores API tokens using [keyring].
|
|
@@ -78,16 +80,18 @@ Features:
|
|
|
78
80
|
- Automatically keeps the [config] file up-to-date and recreates it if missing.
|
|
79
81
|
|
|
80
82
|
**To-Do:**
|
|
83
|
+
|
|
81
84
|
- Add code documentation.
|
|
82
85
|
|
|
83
86
|
**Known Issues:**
|
|
87
|
+
|
|
84
88
|
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
|
|
85
89
|
- Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
|
|
86
90
|
|
|
87
|
-
|
|
88
91
|
**Please submit an issue or contact us for features you want added/reimplemented.**
|
|
89
92
|
|
|
90
93
|
## Installation
|
|
94
|
+
|
|
91
95
|
1. Install the latest version of [Python]:
|
|
92
96
|
- Windows: `winget install python3`
|
|
93
97
|
- Linux (apt): `apt install python-pip`
|
|
@@ -98,17 +102,23 @@ Features:
|
|
|
98
102
|
- See [keyring] for additional requirements if you are not on Windows.
|
|
99
103
|
|
|
100
104
|
## Usage
|
|
105
|
+
|
|
101
106
|
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config].
|
|
102
107
|
|
|
103
108
|
## Obtaining Tokens
|
|
109
|
+
|
|
104
110
|
### OpenAI
|
|
105
|
-
|
|
111
|
+
|
|
112
|
+
API token can be created here: [OpenAI Tokens].
|
|
113
|
+
|
|
106
114
|
1. Leave everything at the defaults and set `Project` to `Default Project`.
|
|
107
115
|
1. Press `Create secret key`.
|
|
108
116
|
1. Press `Copy` to copy the API token to your clipboard.
|
|
109
117
|
|
|
110
118
|
### Tumblr
|
|
111
|
-
|
|
119
|
+
|
|
120
|
+
API tokens can be created here: [Tumblr Tokens].
|
|
121
|
+
|
|
112
122
|
1. Press `+ Register Application`.
|
|
113
123
|
1. Enter anything for `Application Name` and `Application Description`.
|
|
114
124
|
1. Enter any URL for `Application Website` and `Default callback URL`, like `https://example.com`.
|
|
@@ -123,26 +133,34 @@ When running this program, you will be prompted to enter all of these tokens. **
|
|
|
123
133
|
After inputting the [Tumblr] tokens, you will be given a URL that you need to open in your browser. Press `Allow`, then copy and paste the URL of the page you are redirected to into the console.
|
|
124
134
|
|
|
125
135
|
## Configuration
|
|
136
|
+
|
|
126
137
|
All config options can be found in `config.toml` after running the program once. This will be kept up-to-date if there are changes to the config's format in a future update. This also means it may be worthwhile to double-check the config file after an update. Any changes to the config should be in the changelog for a given version.
|
|
127
138
|
|
|
128
139
|
All file options can include directories that will be created when the program is run.
|
|
129
140
|
|
|
130
|
-
- `custom_prompts_file`
|
|
141
|
+
- `custom_prompts_file` This file should follow the following file format:
|
|
142
|
+
|
|
131
143
|
```json
|
|
132
|
-
{"user message 1": "assistant response 1"
|
|
133
|
-
|
|
144
|
+
{"user message 1": "assistant response 1"}
|
|
145
|
+
{"user message 1": "assistant response 1"}
|
|
146
|
+
{"user message 2": "assistant response 2", "user message 3": "assistant response 3"}
|
|
134
147
|
```
|
|
148
|
+
|
|
149
|
+
To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
|
|
150
|
+
|
|
135
151
|
- **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
|
|
136
152
|
- **`user_message`** - This message is used in the same way as `developer_message` and should be treated the same.
|
|
137
153
|
- **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
|
|
138
|
-
- **`token_price`** - The default value here is the default token price for `base_model`. You can find the up-to-date value [
|
|
154
|
+
- **`token_price`** - The default value here is the default token price for `base_model`. You can find the up-to-date value in [OpenAI Pricing], in the *Training* column.
|
|
139
155
|
- **`job_id`** - If there is any value here, this program will resume monitoring the corresponding job, instead of starting a new one. This gets set when starting the fine-tuning and is cleared when it is completed. You can read more in [fine-tuning].
|
|
140
156
|
- **`base_model`** - This value is used to choose the tokenizer for estimating fine-tuning costs. It is also the base model that will be fine-tuned and the model that is used to generate tags. You can find a list of options in the [fine-tuning portal] by pressing `+ Create` and opening the drop-down list for `Base Model`. Be sure to update `token_price` if you change this value.
|
|
141
157
|
- **`fine_tuned_model`** - Set automatically after monitoring fine-tuning if the job has succeeded. You can read more in [fine-tuning].
|
|
142
158
|
- **`tags_chance`** - This should be between 0 and 1. Setting it to 0 corresponds to a 0% chance (never) to add tags to a post. 1 corresponds to a 100% chance (always) to add tags to a post. Adding tags incurs a very small token cost.
|
|
143
159
|
|
|
144
160
|
## Manual Fine-Tuning
|
|
145
|
-
|
|
161
|
+
|
|
162
|
+
You can manually upload the examples file to [OpenAI] and start the fine-tuning here: [fine-tuning portal].
|
|
163
|
+
|
|
146
164
|
1. Press `+ Create`.
|
|
147
165
|
1. Select the desired `Base Model` from the dropdown. This should ideally match the model set in the [config].
|
|
148
166
|
1. Upload the generated examples file to the section under `Training data`. You can find the path for this in the [config].
|
|
@@ -1,6 +1,11 @@
|
|
|
1
|
+
# tumblrbot
|
|
2
|
+
|
|
1
3
|
[OAuth]: https://oauth.net/1
|
|
2
4
|
[Python]: https://python.org/download
|
|
3
5
|
|
|
6
|
+
[JSON Lines]: https://jsonlines.org
|
|
7
|
+
[JSON Lines Validator]: https://jsonlines.org/validator
|
|
8
|
+
|
|
4
9
|
[pip]: https://pypi.org
|
|
5
10
|
[keyring]: https://pypi.org/project/keyring
|
|
6
11
|
[Rich]: https://pypi.org/project/rich
|
|
@@ -22,8 +27,6 @@
|
|
|
22
27
|
|
|
23
28
|
[Config]: #configuration
|
|
24
29
|
[Fine-Tuning]: #manual-fine-tuning
|
|
25
|
-
|
|
26
|
-
# tumblrbot
|
|
27
30
|
[](https://python.org/pypi/tumblrbot)
|
|
28
31
|
|
|
29
32
|
Description of original project:
|
|
@@ -32,6 +35,7 @@ Description of original project:
|
|
|
32
35
|
This fork is largely a rewrite of the source code with similarities in its structure and process.
|
|
33
36
|
|
|
34
37
|
Features:
|
|
38
|
+
|
|
35
39
|
- An [interactive console][Main] for all steps of generating posts for the blog:
|
|
36
40
|
1. Asks for [OpenAI] and [Tumblr] tokens.
|
|
37
41
|
- Stores API tokens using [keyring].
|
|
@@ -58,16 +62,18 @@ Features:
|
|
|
58
62
|
- Automatically keeps the [config] file up-to-date and recreates it if missing.
|
|
59
63
|
|
|
60
64
|
**To-Do:**
|
|
65
|
+
|
|
61
66
|
- Add code documentation.
|
|
62
67
|
|
|
63
68
|
**Known Issues:**
|
|
69
|
+
|
|
64
70
|
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
|
|
65
71
|
- Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
|
|
66
72
|
|
|
67
|
-
|
|
68
73
|
**Please submit an issue or contact us for features you want added/reimplemented.**
|
|
69
74
|
|
|
70
75
|
## Installation
|
|
76
|
+
|
|
71
77
|
1. Install the latest version of [Python]:
|
|
72
78
|
- Windows: `winget install python3`
|
|
73
79
|
- Linux (apt): `apt install python-pip`
|
|
@@ -78,17 +84,23 @@ Features:
|
|
|
78
84
|
- See [keyring] for additional requirements if you are not on Windows.
|
|
79
85
|
|
|
80
86
|
## Usage
|
|
87
|
+
|
|
81
88
|
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config].
|
|
82
89
|
|
|
83
90
|
## Obtaining Tokens
|
|
91
|
+
|
|
84
92
|
### OpenAI
|
|
85
|
-
|
|
93
|
+
|
|
94
|
+
API token can be created here: [OpenAI Tokens].
|
|
95
|
+
|
|
86
96
|
1. Leave everything at the defaults and set `Project` to `Default Project`.
|
|
87
97
|
1. Press `Create secret key`.
|
|
88
98
|
1. Press `Copy` to copy the API token to your clipboard.
|
|
89
99
|
|
|
90
100
|
### Tumblr
|
|
91
|
-
|
|
101
|
+
|
|
102
|
+
API tokens can be created here: [Tumblr Tokens].
|
|
103
|
+
|
|
92
104
|
1. Press `+ Register Application`.
|
|
93
105
|
1. Enter anything for `Application Name` and `Application Description`.
|
|
94
106
|
1. Enter any URL for `Application Website` and `Default callback URL`, like `https://example.com`.
|
|
@@ -103,26 +115,34 @@ When running this program, you will be prompted to enter all of these tokens. **
|
|
|
103
115
|
After inputting the [Tumblr] tokens, you will be given a URL that you need to open in your browser. Press `Allow`, then copy and paste the URL of the page you are redirected to into the console.
|
|
104
116
|
|
|
105
117
|
## Configuration
|
|
118
|
+
|
|
106
119
|
All config options can be found in `config.toml` after running the program once. This will be kept up-to-date if there are changes to the config's format in a future update. This also means it may be worthwhile to double-check the config file after an update. Any changes to the config should be in the changelog for a given version.
|
|
107
120
|
|
|
108
121
|
All file options can include directories that will be created when the program is run.
|
|
109
122
|
|
|
110
|
-
- `custom_prompts_file`
|
|
123
|
+
- `custom_prompts_file` This file should follow the following file format:
|
|
124
|
+
|
|
111
125
|
```json
|
|
112
|
-
{"user message 1": "assistant response 1"
|
|
113
|
-
|
|
126
|
+
{"user message 1": "assistant response 1"}
|
|
127
|
+
{"user message 1": "assistant response 1"}
|
|
128
|
+
{"user message 2": "assistant response 2", "user message 3": "assistant response 3"}
|
|
114
129
|
```
|
|
130
|
+
|
|
131
|
+
To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
|
|
132
|
+
|
|
115
133
|
- **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
|
|
116
134
|
- **`user_message`** - This message is used in the same way as `developer_message` and should be treated the same.
|
|
117
135
|
- **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
|
|
118
|
-
- **`token_price`** - The default value here is the default token price for `base_model`. You can find the up-to-date value [
|
|
136
|
+
- **`token_price`** - The default value here is the default token price for `base_model`. You can find the up-to-date value in [OpenAI Pricing], in the *Training* column.
|
|
119
137
|
- **`job_id`** - If there is any value here, this program will resume monitoring the corresponding job, instead of starting a new one. This gets set when starting the fine-tuning and is cleared when it is completed. You can read more in [fine-tuning].
|
|
120
138
|
- **`base_model`** - This value is used to choose the tokenizer for estimating fine-tuning costs. It is also the base model that will be fine-tuned and the model that is used to generate tags. You can find a list of options in the [fine-tuning portal] by pressing `+ Create` and opening the drop-down list for `Base Model`. Be sure to update `token_price` if you change this value.
|
|
121
139
|
- **`fine_tuned_model`** - Set automatically after monitoring fine-tuning if the job has succeeded. You can read more in [fine-tuning].
|
|
122
140
|
- **`tags_chance`** - This should be between 0 and 1. Setting it to 0 corresponds to a 0% chance (never) to add tags to a post. 1 corresponds to a 100% chance (always) to add tags to a post. Adding tags incurs a very small token cost.
|
|
123
141
|
|
|
124
142
|
## Manual Fine-Tuning
|
|
125
|
-
|
|
143
|
+
|
|
144
|
+
You can manually upload the examples file to [OpenAI] and start the fine-tuning here: [fine-tuning portal].
|
|
145
|
+
|
|
126
146
|
1. Press `+ Create`.
|
|
127
147
|
1. Select the desired `Base Model` from the dropdown. This should ideally match the model set in the [config].
|
|
128
148
|
1. Upload the generated examples file to the section under `Training data`. You can find the path for this in the [config].
|
|
@@ -1,18 +1,16 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tumblrbot"
|
|
3
|
-
version = "1.4.
|
|
3
|
+
version = "1.4.6"
|
|
4
4
|
description = "An updated bot that posts to Tumblr, based on your very own blog!"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">= 3.13"
|
|
7
7
|
dependencies = [
|
|
8
|
-
"httpx[http2]",
|
|
9
8
|
"keyring",
|
|
10
|
-
"more-itertools",
|
|
11
|
-
"niquests[speedups,http3]",
|
|
12
9
|
"openai",
|
|
13
10
|
"pwinput",
|
|
14
11
|
"pydantic",
|
|
15
12
|
"pydantic-settings",
|
|
13
|
+
"requests",
|
|
16
14
|
"requests-oauthlib",
|
|
17
15
|
"rich",
|
|
18
16
|
"tiktoken",
|
|
@@ -26,5 +24,5 @@ Source = "https://github.com/MaidThatPrograms/tumblrbot"
|
|
|
26
24
|
tumblrbot = "tumblrbot.__main__:main"
|
|
27
25
|
|
|
28
26
|
[build-system]
|
|
29
|
-
requires = ["
|
|
30
|
-
build-backend = "
|
|
27
|
+
requires = ["flit_core"]
|
|
28
|
+
build-backend = "flit_core.buildapi"
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from openai import
|
|
1
|
+
from openai import OpenAI
|
|
2
2
|
from rich.prompt import Confirm
|
|
3
3
|
from rich.traceback import install
|
|
4
4
|
|
|
@@ -15,26 +15,19 @@ def main() -> None:
|
|
|
15
15
|
install()
|
|
16
16
|
|
|
17
17
|
tokens = Tokens.read_from_keyring()
|
|
18
|
-
with (
|
|
19
|
-
OpenAI(api_key=tokens.openai_api_key.get_secret_value(), http_client=DefaultHttpxClient(http2=True)) as openai,
|
|
20
|
-
TumblrSession(tokens=tokens) as tumblr,
|
|
21
|
-
):
|
|
22
|
-
post_downloader = PostDownloader(openai, tumblr)
|
|
18
|
+
with OpenAI(api_key=tokens.openai_api_key) as openai, TumblrSession(tokens) as tumblr:
|
|
23
19
|
if Confirm.ask("Download latest posts?", default=False):
|
|
24
|
-
|
|
25
|
-
download_paths = post_downloader.get_data_paths()
|
|
20
|
+
PostDownloader(openai=openai, tumblr=tumblr).main()
|
|
26
21
|
|
|
27
|
-
examples_writer = ExamplesWriter(openai, tumblr, download_paths)
|
|
28
22
|
if Confirm.ask("Create training data?", default=False):
|
|
29
|
-
|
|
30
|
-
estimated_tokens = sum(examples_writer.count_tokens())
|
|
23
|
+
ExamplesWriter(openai=openai, tumblr=tumblr).main()
|
|
31
24
|
|
|
32
|
-
fine_tuner = FineTuner(openai, tumblr
|
|
25
|
+
fine_tuner = FineTuner(openai=openai, tumblr=tumblr)
|
|
33
26
|
fine_tuner.print_estimates()
|
|
34
27
|
|
|
35
28
|
message = "Resume monitoring the previous fine-tuning process?" if FlowClass.config.job_id else "Upload data to OpenAI for fine-tuning?"
|
|
36
29
|
if Confirm.ask(f"{message} [bold]You must do this to set the model to generate drafts from. Alternatively, manually enter a model into the config", default=False):
|
|
37
|
-
fine_tuner.
|
|
30
|
+
fine_tuner.main()
|
|
38
31
|
|
|
39
32
|
if Confirm.ask("Generate drafts?", default=False):
|
|
40
|
-
DraftGenerator(openai, tumblr).
|
|
33
|
+
DraftGenerator(openai=openai, tumblr=tumblr).main()
|
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
from io import TextIOBase
|
|
2
2
|
from json import dump
|
|
3
|
-
from
|
|
3
|
+
from typing import override
|
|
4
4
|
|
|
5
5
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
6
6
|
from tumblrbot.utils.models import Post
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class PostDownloader(FlowClass):
|
|
10
|
-
|
|
10
|
+
@override
|
|
11
|
+
def main(self) -> None:
|
|
11
12
|
self.config.data_directory.mkdir(parents=True, exist_ok=True)
|
|
12
13
|
|
|
13
14
|
with PreviewLive() as live:
|
|
@@ -50,9 +51,3 @@ class PostDownloader(FlowClass):
|
|
|
50
51
|
completed += len(posts)
|
|
51
52
|
else:
|
|
52
53
|
return
|
|
53
|
-
|
|
54
|
-
def get_data_paths(self) -> list[Path]:
|
|
55
|
-
return list(map(self.get_data_path, self.config.download_blog_identifiers))
|
|
56
|
-
|
|
57
|
-
def get_data_path(self, blog_identifier: str) -> Path:
|
|
58
|
-
return (self.config.data_directory / blog_identifier).with_suffix(".jsonl")
|
|
@@ -1,27 +1,21 @@
|
|
|
1
1
|
from collections.abc import Generator
|
|
2
|
-
from
|
|
2
|
+
from itertools import batched
|
|
3
3
|
from json import loads
|
|
4
4
|
from math import ceil
|
|
5
|
-
from pathlib import Path
|
|
6
5
|
from re import search
|
|
7
|
-
from typing import IO
|
|
6
|
+
from typing import IO, override
|
|
8
7
|
|
|
9
8
|
import rich
|
|
10
|
-
from more_itertools import chunked
|
|
11
9
|
from openai import BadRequestError
|
|
12
|
-
from rich.console import Console
|
|
13
10
|
from rich.prompt import Confirm
|
|
14
|
-
from tiktoken import encoding_for_model, get_encoding
|
|
15
11
|
|
|
16
12
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
17
13
|
from tumblrbot.utils.models import Example, Post
|
|
18
14
|
|
|
19
15
|
|
|
20
|
-
@dataclass
|
|
21
16
|
class ExamplesWriter(FlowClass):
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def write_examples(self) -> None:
|
|
17
|
+
@override
|
|
18
|
+
def main(self) -> None:
|
|
25
19
|
self.config.examples_file.parent.mkdir(parents=True, exist_ok=True)
|
|
26
20
|
|
|
27
21
|
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
@@ -52,24 +46,30 @@ class ExamplesWriter(FlowClass):
|
|
|
52
46
|
fp.write(f"{example.model_dump_json()}\n")
|
|
53
47
|
|
|
54
48
|
def get_custom_prompts(self) -> Generator[tuple[str, str]]:
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
49
|
+
self.config.custom_prompts_file.parent.mkdir(parents=True, exist_ok=True)
|
|
50
|
+
self.config.custom_prompts_file.touch(exist_ok=True)
|
|
51
|
+
|
|
52
|
+
with self.config.custom_prompts_file.open("r", encoding="utf_8") as fp:
|
|
53
|
+
for line in fp:
|
|
54
|
+
data: dict[str, str] = loads(line)
|
|
55
|
+
yield from data.items()
|
|
58
56
|
|
|
59
57
|
def get_filtered_posts(self) -> Generator[Post]:
|
|
60
|
-
posts =
|
|
58
|
+
posts = self.get_valid_posts()
|
|
61
59
|
|
|
62
60
|
if Confirm.ask("[gray62]Remove posts flagged by the OpenAI moderation? This can sometimes resolve errors with fine-tuning validation, but is slow.", default=False):
|
|
61
|
+
batch_size = self.get_moderation_batch_size()
|
|
62
|
+
posts = list(posts)
|
|
63
63
|
removed = 0
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
with PreviewLive() as live:
|
|
66
|
-
for
|
|
67
|
-
|
|
68
|
-
ceil(len(posts) /
|
|
66
|
+
for batch in live.progress.track(
|
|
67
|
+
batched(posts, batch_size, strict=False),
|
|
68
|
+
ceil(len(posts) / batch_size),
|
|
69
69
|
description="Removing flagged posts...",
|
|
70
70
|
):
|
|
71
|
-
response = self.openai.moderations.create(input=list(map(Post.get_content_text,
|
|
72
|
-
for post, moderation in zip(
|
|
71
|
+
response = self.openai.moderations.create(input=list(map(Post.get_content_text, batch)))
|
|
72
|
+
for post, moderation in zip(batch, response.results, strict=True):
|
|
73
73
|
if moderation.flagged:
|
|
74
74
|
removed += 1
|
|
75
75
|
live.custom_update(post)
|
|
@@ -80,35 +80,18 @@ class ExamplesWriter(FlowClass):
|
|
|
80
80
|
yield from posts
|
|
81
81
|
|
|
82
82
|
def get_valid_posts(self) -> Generator[Post]:
|
|
83
|
-
for data_path in self.
|
|
83
|
+
for data_path in self.get_data_paths():
|
|
84
84
|
with data_path.open(encoding="utf_8") as fp:
|
|
85
85
|
for line in fp:
|
|
86
86
|
post = Post.model_validate_json(line)
|
|
87
|
-
if
|
|
87
|
+
if post.valid_text_post():
|
|
88
88
|
yield post
|
|
89
89
|
|
|
90
|
-
def
|
|
91
|
-
test_n = 1000
|
|
90
|
+
def get_moderation_batch_size(self) -> int:
|
|
92
91
|
try:
|
|
93
|
-
self.openai.moderations.create(input=[""] *
|
|
92
|
+
self.openai.moderations.create(input=[""] * self.config.max_moderation_batch_size)
|
|
94
93
|
except BadRequestError as error:
|
|
95
94
|
message = error.response.json()["error"]["message"]
|
|
96
95
|
if match := search(r"(\d+)\.", message):
|
|
97
96
|
return int(match.group(1))
|
|
98
|
-
return
|
|
99
|
-
|
|
100
|
-
def count_tokens(self) -> Generator[int]:
|
|
101
|
-
# Based on https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
|
|
102
|
-
# and https://cookbook.openai.com/examples/chat_finetuning_data_prep
|
|
103
|
-
try:
|
|
104
|
-
encoding = encoding_for_model(self.config.base_model)
|
|
105
|
-
except KeyError as error:
|
|
106
|
-
encoding = get_encoding("o200k_base")
|
|
107
|
-
Console(stderr=True, style="logging.level.warning").print(f"[Warning] Using encoding '{encoding.name}': {''.join(error.args)}\n")
|
|
108
|
-
|
|
109
|
-
with self.config.examples_file.open(encoding="utf_8") as fp:
|
|
110
|
-
for line in fp:
|
|
111
|
-
example = Example.model_validate_json(line)
|
|
112
|
-
yield len(encoding.encode("assistant")) # every reply is primed with <|start|>assistant<|message|>
|
|
113
|
-
for message in example.messages:
|
|
114
|
-
yield 4 + len(encoding.encode(message.content))
|
|
97
|
+
return self.config.max_moderation_batch_size
|
|
@@ -1,25 +1,27 @@
|
|
|
1
|
-
from
|
|
1
|
+
from collections.abc import Generator
|
|
2
2
|
from datetime import datetime
|
|
3
3
|
from textwrap import dedent
|
|
4
|
-
from time import sleep
|
|
4
|
+
from time import sleep
|
|
5
|
+
from typing import override
|
|
5
6
|
|
|
6
7
|
import rich
|
|
7
8
|
from openai.types.fine_tuning import FineTuningJob
|
|
8
9
|
from rich import progress
|
|
10
|
+
from rich.console import Console
|
|
9
11
|
from rich.prompt import Confirm
|
|
12
|
+
from tiktoken import encoding_for_model, get_encoding
|
|
10
13
|
|
|
11
14
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
15
|
+
from tumblrbot.utils.models import Example
|
|
12
16
|
|
|
13
17
|
|
|
14
|
-
@dataclass
|
|
15
18
|
class FineTuner(FlowClass):
|
|
16
|
-
estimated_tokens: int
|
|
17
|
-
|
|
18
19
|
@staticmethod
|
|
19
20
|
def dedent_print(text: str) -> None:
|
|
20
21
|
rich.print(dedent(text).lstrip())
|
|
21
22
|
|
|
22
|
-
|
|
23
|
+
@override
|
|
24
|
+
def main(self) -> None:
|
|
23
25
|
job = self.create_job()
|
|
24
26
|
|
|
25
27
|
self.dedent_print(f"""
|
|
@@ -39,8 +41,6 @@ class FineTuner(FlowClass):
|
|
|
39
41
|
|
|
40
42
|
live.progress.update(
|
|
41
43
|
task_id,
|
|
42
|
-
total=job.estimated_finish - job.created_at if job.estimated_finish else None,
|
|
43
|
-
completed=time() - job.created_at,
|
|
44
44
|
description=f"Fine-tuning: [italic]{job.status.replace('_', ' ').title()}[/]...",
|
|
45
45
|
)
|
|
46
46
|
|
|
@@ -102,16 +102,33 @@ class FineTuner(FlowClass):
|
|
|
102
102
|
self.config.fine_tuned_model = job.fine_tuned_model or ""
|
|
103
103
|
|
|
104
104
|
def print_estimates(self) -> None:
|
|
105
|
-
|
|
105
|
+
estimated_tokens = sum(self.count_tokens())
|
|
106
|
+
total_tokens = self.config.expected_epochs * estimated_tokens
|
|
106
107
|
cost_string = self.get_cost_string(total_tokens)
|
|
107
108
|
|
|
108
109
|
self.dedent_print(f"""
|
|
109
|
-
Tokens {
|
|
110
|
+
Tokens {estimated_tokens:,}:
|
|
110
111
|
Total tokens for [bold orange1]{self.config.expected_epochs}[/] epoch(s): {total_tokens:,}
|
|
111
112
|
Expected cost when trained with [bold purple]{self.config.base_model}[/]: {cost_string}
|
|
112
113
|
NOTE: Token values are approximate and may not be 100% accurate, please be aware of this when using the data.
|
|
113
114
|
[italic red]Amelia, Mutsumi, and Marin are not responsible for any inaccuracies in the token count or estimated price.[/]
|
|
114
115
|
""")
|
|
115
116
|
|
|
117
|
+
def count_tokens(self) -> Generator[int]:
|
|
118
|
+
# Based on https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken
|
|
119
|
+
# and https://cookbook.openai.com/examples/chat_finetuning_data_prep
|
|
120
|
+
try:
|
|
121
|
+
encoding = encoding_for_model(self.config.base_model)
|
|
122
|
+
except KeyError as error:
|
|
123
|
+
encoding = get_encoding("o200k_base")
|
|
124
|
+
Console(stderr=True, style="logging.level.warning").print(f"[Warning] Using encoding '{encoding.name}': {''.join(error.args)}\n")
|
|
125
|
+
|
|
126
|
+
with self.config.examples_file.open(encoding="utf_8") as fp:
|
|
127
|
+
for line in fp:
|
|
128
|
+
example = Example.model_validate_json(line)
|
|
129
|
+
yield len(encoding.encode("assistant")) # every reply is primed with <|start|>assistant<|message|>
|
|
130
|
+
for message in example.messages:
|
|
131
|
+
yield 4 + len(encoding.encode(message.content))
|
|
132
|
+
|
|
116
133
|
def get_cost_string(self, total_tokens: int) -> str:
|
|
117
134
|
return f"${self.config.token_price / 1000000 * total_tokens:.2f}"
|
|
@@ -1,13 +1,18 @@
|
|
|
1
1
|
from random import random
|
|
2
|
+
from typing import override
|
|
2
3
|
|
|
3
4
|
import rich
|
|
5
|
+
from rich.prompt import IntPrompt
|
|
4
6
|
|
|
5
7
|
from tumblrbot.utils.common import FlowClass, PreviewLive
|
|
6
8
|
from tumblrbot.utils.models import Post
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class DraftGenerator(FlowClass):
|
|
10
|
-
|
|
12
|
+
@override
|
|
13
|
+
def main(self) -> None:
|
|
14
|
+
self.config.draft_count = IntPrompt.ask("How many drafts should be generated?", default=self.config.draft_count)
|
|
15
|
+
|
|
11
16
|
message = f"View drafts here: https://tumblr.com/blog/{self.config.upload_blog_identifier}/drafts"
|
|
12
17
|
|
|
13
18
|
with PreviewLive() as live:
|
|
@@ -24,10 +29,7 @@ class DraftGenerator(FlowClass):
|
|
|
24
29
|
|
|
25
30
|
def generate_post(self) -> Post:
|
|
26
31
|
content = self.generate_content()
|
|
27
|
-
post = Post(
|
|
28
|
-
content=[content],
|
|
29
|
-
state="draft",
|
|
30
|
-
)
|
|
32
|
+
post = Post(content=[content])
|
|
31
33
|
if tags := self.generate_tags(content):
|
|
32
34
|
post.tags = tags.tags
|
|
33
35
|
return post
|
|
@@ -39,16 +41,15 @@ class DraftGenerator(FlowClass):
|
|
|
39
41
|
model=self.config.fine_tuned_model,
|
|
40
42
|
).output_text
|
|
41
43
|
|
|
42
|
-
return Post.Block(
|
|
44
|
+
return Post.Block(text=content)
|
|
43
45
|
|
|
44
46
|
def generate_tags(self, content: Post.Block) -> Post | None:
|
|
45
47
|
if random() < self.config.tags_chance: # noqa: S311
|
|
46
48
|
return self.openai.responses.parse(
|
|
47
49
|
text_format=Post,
|
|
48
|
-
input=
|
|
49
|
-
instructions=
|
|
50
|
+
input=content.text,
|
|
51
|
+
instructions=self.config.tags_developer_message,
|
|
50
52
|
model=self.config.base_model,
|
|
51
|
-
temperature=0.5,
|
|
52
53
|
).output_parsed
|
|
53
54
|
|
|
54
55
|
return None
|
|
@@ -1,25 +1,37 @@
|
|
|
1
|
-
from
|
|
1
|
+
from abc import abstractmethod
|
|
2
2
|
from random import choice
|
|
3
3
|
from typing import ClassVar, Self, override
|
|
4
4
|
|
|
5
5
|
from openai import OpenAI
|
|
6
|
+
from pydantic import ConfigDict
|
|
6
7
|
from rich._spinners import SPINNERS
|
|
7
8
|
from rich.console import RenderableType
|
|
8
9
|
from rich.live import Live
|
|
9
10
|
from rich.progress import MofNCompleteColumn, Progress, SpinnerColumn, TimeElapsedColumn
|
|
10
11
|
from rich.table import Table
|
|
11
12
|
|
|
12
|
-
from tumblrbot.utils.config import Config
|
|
13
|
+
from tumblrbot.utils.config import Config, Path
|
|
14
|
+
from tumblrbot.utils.models import FullyValidatedModel
|
|
13
15
|
from tumblrbot.utils.tumblr import TumblrSession
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
+
class FlowClass(FullyValidatedModel):
|
|
19
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
20
|
+
|
|
18
21
|
config: ClassVar = Config() # pyright: ignore[reportCallIssue]
|
|
19
22
|
|
|
20
23
|
openai: OpenAI
|
|
21
24
|
tumblr: TumblrSession
|
|
22
25
|
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def main(self) -> None: ...
|
|
28
|
+
|
|
29
|
+
def get_data_paths(self) -> list[Path]:
|
|
30
|
+
return list(map(self.get_data_path, self.config.download_blog_identifiers))
|
|
31
|
+
|
|
32
|
+
def get_data_path(self, blog_identifier: str) -> Path:
|
|
33
|
+
return (self.config.data_directory / blog_identifier).with_suffix(".jsonl")
|
|
34
|
+
|
|
23
35
|
|
|
24
36
|
class PreviewLive(Live):
|
|
25
37
|
def __init__(self) -> None:
|
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Self, override
|
|
|
5
5
|
import rich
|
|
6
6
|
import tomlkit
|
|
7
7
|
from openai.types import ChatModel
|
|
8
|
-
from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt,
|
|
8
|
+
from pydantic import Field, NonNegativeFloat, PositiveFloat, PositiveInt, model_validator
|
|
9
9
|
from pydantic_settings import BaseSettings, PydanticBaseSettingsSource, SettingsConfigDict, TomlConfigSettingsSource
|
|
10
10
|
from rich.prompt import Prompt
|
|
11
11
|
from tomlkit import comment, document
|
|
@@ -31,7 +31,8 @@ class Config(BaseSettings):
|
|
|
31
31
|
data_directory: Path = Field(Path("data"), description="Where to store downloaded post data.")
|
|
32
32
|
|
|
33
33
|
# Writing Examples
|
|
34
|
-
|
|
34
|
+
max_moderation_batch_size: PositiveInt = Field(100, description="How many posts, at most, to submit to the OpenAI moderation API. This is also capped by the API.")
|
|
35
|
+
custom_prompts_file: Path = Field(Path("custom_prompts.jsonl"), description="Where to read in custom prompts from.")
|
|
35
36
|
|
|
36
37
|
# Writing Examples & Fine-Tuning
|
|
37
38
|
examples_file: Path = Field(Path("examples.jsonl"), description="Where to output the examples that will be used to fine-tune the model.")
|
|
@@ -53,6 +54,7 @@ class Config(BaseSettings):
|
|
|
53
54
|
upload_blog_identifier: str = Field("", description="The identifier of the blog which generated drafts will be uploaded to. This must be a blog associated with the same account as the configured Tumblr secret tokens.")
|
|
54
55
|
draft_count: PositiveInt = Field(150, description="The number of drafts to process. This will affect the number of tokens used with OpenAI")
|
|
55
56
|
tags_chance: NonNegativeFloat = Field(0.1, description="The chance to generate tags for any given post. This will incur extra calls to OpenAI.")
|
|
57
|
+
tags_developer_message: str = Field("You will be provided with a block of text, and your task is to extract a very short list of the most important subjects from it.", description="The developer message used to generate tags.")
|
|
56
58
|
|
|
57
59
|
@override
|
|
58
60
|
@classmethod
|
|
@@ -87,8 +89,7 @@ class Config(BaseSettings):
|
|
|
87
89
|
for line in field.description.split(". "):
|
|
88
90
|
toml_table.add(comment(f"{line.removesuffix('.')}."))
|
|
89
91
|
|
|
90
|
-
|
|
91
|
-
toml_table[name] = value.get_secret_value() if isinstance(value, Secret) else dumped_model[name]
|
|
92
|
+
toml_table[name] = dumped_model[name]
|
|
92
93
|
|
|
93
94
|
Path(toml_file).write_text(
|
|
94
95
|
tomlkit.dumps(toml_table),
|
|
@@ -3,23 +3,14 @@ from typing import Annotated, Any, ClassVar, Literal, Self, override
|
|
|
3
3
|
|
|
4
4
|
import rich
|
|
5
5
|
from keyring import get_password, set_password
|
|
6
|
-
from niquests import Session
|
|
7
6
|
from openai import BaseModel
|
|
8
7
|
from pwinput import pwinput
|
|
9
|
-
from pydantic import ConfigDict, PlainSerializer
|
|
8
|
+
from pydantic import ConfigDict, PlainSerializer
|
|
10
9
|
from pydantic.json_schema import SkipJsonSchema
|
|
11
10
|
from requests_oauthlib import OAuth1Session
|
|
12
11
|
from rich.panel import Panel
|
|
13
12
|
from rich.prompt import Confirm
|
|
14
13
|
|
|
15
|
-
type SerializableSecretStr = Annotated[
|
|
16
|
-
SecretStr,
|
|
17
|
-
PlainSerializer(
|
|
18
|
-
SecretStr.get_secret_value,
|
|
19
|
-
when_used="json-unless-none",
|
|
20
|
-
),
|
|
21
|
-
]
|
|
22
|
-
|
|
23
14
|
|
|
24
15
|
class FullyValidatedModel(BaseModel):
|
|
25
16
|
model_config = ConfigDict(
|
|
@@ -33,24 +24,28 @@ class FullyValidatedModel(BaseModel):
|
|
|
33
24
|
|
|
34
25
|
class Tokens(FullyValidatedModel):
|
|
35
26
|
class Tumblr(FullyValidatedModel):
|
|
36
|
-
client_key:
|
|
37
|
-
client_secret:
|
|
38
|
-
resource_owner_key:
|
|
39
|
-
resource_owner_secret:
|
|
27
|
+
client_key: str = ""
|
|
28
|
+
client_secret: str = ""
|
|
29
|
+
resource_owner_key: str = ""
|
|
30
|
+
resource_owner_secret: str = ""
|
|
40
31
|
|
|
41
32
|
service_name: ClassVar = "tumblrbot"
|
|
42
33
|
username: ClassVar = "tokens"
|
|
43
34
|
|
|
44
|
-
openai_api_key:
|
|
35
|
+
openai_api_key: str = ""
|
|
45
36
|
tumblr: Tumblr = Tumblr()
|
|
46
37
|
|
|
47
38
|
@staticmethod
|
|
48
|
-
def
|
|
39
|
+
def get_oauth_tokens(token: dict[str, str]) -> tuple[str, str]:
|
|
40
|
+
return token["oauth_token"], token["oauth_token_secret"]
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def online_token_prompt(url: str, *tokens: str) -> Generator[str]:
|
|
49
44
|
formatted_token_string = " and ".join(f"[cyan]{token}[/]" for token in tokens)
|
|
50
45
|
|
|
51
46
|
rich.print(f"Retrieve your {formatted_token_string} from: {url}")
|
|
52
47
|
for token in tokens:
|
|
53
|
-
yield
|
|
48
|
+
yield pwinput(f"Enter your {token} (masked): ").strip()
|
|
54
49
|
|
|
55
50
|
rich.print()
|
|
56
51
|
|
|
@@ -64,47 +59,43 @@ class Tokens(FullyValidatedModel):
|
|
|
64
59
|
def model_post_init(self, context: object) -> None:
|
|
65
60
|
super().model_post_init(context)
|
|
66
61
|
|
|
67
|
-
if not self.openai_api_key
|
|
62
|
+
if not self.openai_api_key or Confirm.ask("Reset OpenAI API key?", default=False):
|
|
68
63
|
(self.openai_api_key,) = self.online_token_prompt("https://platform.openai.com/api-keys", "API key")
|
|
69
64
|
|
|
70
|
-
if not all(self.tumblr.model_dump(
|
|
65
|
+
if not all(self.tumblr.model_dump().values()) or Confirm.ask("Reset Tumblr API tokens?", default=False):
|
|
71
66
|
self.tumblr.client_key, self.tumblr.client_secret = self.online_token_prompt("https://tumblr.com/oauth/apps", "consumer key", "consumer secret")
|
|
72
67
|
|
|
73
|
-
OAuth1Session.__bases__ = (Session,)
|
|
74
|
-
|
|
75
68
|
with OAuth1Session(
|
|
76
|
-
self.tumblr.client_key
|
|
77
|
-
self.tumblr.client_secret
|
|
69
|
+
self.tumblr.client_key,
|
|
70
|
+
self.tumblr.client_secret,
|
|
78
71
|
) as oauth_session:
|
|
79
72
|
fetch_response = oauth_session.fetch_request_token("http://tumblr.com/oauth/request_token")
|
|
80
73
|
full_authorize_url = oauth_session.authorization_url("http://tumblr.com/oauth/authorize")
|
|
81
74
|
(redirect_response,) = self.online_token_prompt(full_authorize_url, "full redirect URL")
|
|
82
|
-
oauth_response = oauth_session.parse_authorization_response(redirect_response
|
|
75
|
+
oauth_response = oauth_session.parse_authorization_response(redirect_response)
|
|
83
76
|
|
|
84
77
|
with OAuth1Session(
|
|
85
|
-
self.tumblr.client_key
|
|
86
|
-
self.tumblr.client_secret
|
|
87
|
-
fetch_response
|
|
88
|
-
fetch_response["oauth_token_secret"],
|
|
78
|
+
self.tumblr.client_key,
|
|
79
|
+
self.tumblr.client_secret,
|
|
80
|
+
*self.get_oauth_tokens(fetch_response),
|
|
89
81
|
verifier=oauth_response["oauth_verifier"],
|
|
90
82
|
) as oauth_session:
|
|
91
83
|
oauth_tokens = oauth_session.fetch_access_token("http://tumblr.com/oauth/access_token")
|
|
92
84
|
|
|
93
|
-
self.tumblr.resource_owner_key = oauth_tokens
|
|
94
|
-
self.tumblr.resource_owner_secret = oauth_tokens["oauth_token_secret"]
|
|
85
|
+
self.tumblr.resource_owner_key, self.tumblr.resource_owner_secret = self.get_oauth_tokens(oauth_tokens)
|
|
95
86
|
|
|
96
87
|
set_password(self.service_name, self.username, self.model_dump_json())
|
|
97
88
|
|
|
98
89
|
|
|
99
90
|
class Post(FullyValidatedModel):
|
|
100
91
|
class Block(FullyValidatedModel):
|
|
101
|
-
type: str = ""
|
|
92
|
+
type: str = "text"
|
|
102
93
|
text: str = ""
|
|
103
94
|
blocks: list[int] = [] # noqa: RUF012
|
|
104
95
|
|
|
105
96
|
timestamp: SkipJsonSchema[int] = 0
|
|
106
97
|
tags: Annotated[list[str], PlainSerializer(",".join)] = [] # noqa: RUF012
|
|
107
|
-
state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "
|
|
98
|
+
state: SkipJsonSchema[Literal["published", "queued", "draft", "private", "unapproved"]] = "draft"
|
|
108
99
|
|
|
109
100
|
content: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
|
|
110
101
|
layout: SkipJsonSchema[list[Block]] = [] # noqa: RUF012
|
|
@@ -120,8 +111,8 @@ class Post(FullyValidatedModel):
|
|
|
120
111
|
subtitle_align="left",
|
|
121
112
|
)
|
|
122
113
|
|
|
123
|
-
def
|
|
124
|
-
return all(block.type == "text" for block in self.content) and not any(block.type == "ask" for block in self.layout)
|
|
114
|
+
def valid_text_post(self) -> bool:
|
|
115
|
+
return bool(self.content) and all(block.type == "text" for block in self.content) and not (self.is_submission or self.trail or any(block.type == "ask" for block in self.layout))
|
|
125
116
|
|
|
126
117
|
def get_content_text(self) -> str:
|
|
127
118
|
return "\n\n".join(block.text for block in self.content)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
from typing import Self
|
|
2
|
+
|
|
3
|
+
from requests import HTTPError, Response
|
|
4
|
+
from requests_oauthlib import OAuth1Session
|
|
5
|
+
|
|
6
|
+
from tumblrbot.utils.models import Post, Tokens
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TumblrSession(OAuth1Session):
|
|
10
|
+
def __init__(self, tokens: Tokens) -> None:
|
|
11
|
+
super().__init__(**tokens.tumblr.model_dump())
|
|
12
|
+
self.hooks["response"].append(self.response_hook)
|
|
13
|
+
|
|
14
|
+
def __enter__(self) -> Self:
|
|
15
|
+
super().__enter__()
|
|
16
|
+
return self
|
|
17
|
+
|
|
18
|
+
def response_hook(self, response: Response, *_args: object, **_kwargs: object) -> None:
|
|
19
|
+
try:
|
|
20
|
+
response.raise_for_status()
|
|
21
|
+
except HTTPError as error:
|
|
22
|
+
if response.text:
|
|
23
|
+
error.add_note(response.text)
|
|
24
|
+
raise
|
|
25
|
+
|
|
26
|
+
def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
|
|
27
|
+
return self.get(
|
|
28
|
+
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
29
|
+
params={
|
|
30
|
+
"after": after,
|
|
31
|
+
"sort": "asc",
|
|
32
|
+
"npf": True,
|
|
33
|
+
},
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def create_post(self, blog_identifier: str, post: Post) -> Response:
|
|
37
|
+
return self.post(
|
|
38
|
+
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
39
|
+
json=post.model_dump(),
|
|
40
|
+
)
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
2
|
-
from typing import Self
|
|
3
|
-
|
|
4
|
-
from niquests import HTTPError, PreparedRequest, Response, Session
|
|
5
|
-
from requests_oauthlib import OAuth1
|
|
6
|
-
|
|
7
|
-
from tumblrbot.utils.models import Post, Tokens
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@dataclass
|
|
11
|
-
class TumblrSession(Session):
|
|
12
|
-
tokens: Tokens
|
|
13
|
-
|
|
14
|
-
def __post_init__(self) -> None:
|
|
15
|
-
super().__init__(multiplexed=True, happy_eyeballs=True)
|
|
16
|
-
|
|
17
|
-
self.auth = OAuth1(**self.tokens.tumblr.model_dump(mode="json"))
|
|
18
|
-
self.hooks["response"].append(self.response_hook)
|
|
19
|
-
|
|
20
|
-
def __enter__(self) -> Self:
|
|
21
|
-
super().__enter__()
|
|
22
|
-
return self
|
|
23
|
-
|
|
24
|
-
def response_hook(self, response: PreparedRequest | Response) -> None:
|
|
25
|
-
if isinstance(response, Response):
|
|
26
|
-
try:
|
|
27
|
-
response.raise_for_status()
|
|
28
|
-
except HTTPError as error:
|
|
29
|
-
if response.text:
|
|
30
|
-
error.add_note(response.text)
|
|
31
|
-
raise
|
|
32
|
-
|
|
33
|
-
def retrieve_published_posts(self, blog_identifier: str, after: int) -> Response:
|
|
34
|
-
return self.get(
|
|
35
|
-
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
36
|
-
params={
|
|
37
|
-
"after": str(after),
|
|
38
|
-
"sort": "asc",
|
|
39
|
-
"npf": str(True),
|
|
40
|
-
},
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
def create_post(self, blog_identifier: str, post: Post) -> Response:
|
|
44
|
-
return self.post(
|
|
45
|
-
f"https://api.tumblr.com/v2/blog/{blog_identifier}/posts",
|
|
46
|
-
json=post.model_dump(mode="json"),
|
|
47
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|