tumblrbot 1.9.4__tar.gz → 1.9.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,221 +1,221 @@
1
- # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[codz]
4
- *$py.class
5
-
6
- # C extensions
7
- *.so
8
-
9
- # Distribution / packaging
10
- .Python
11
- build/
12
- develop-eggs/
13
- dist/
14
- downloads/
15
- eggs/
16
- .eggs/
17
- lib/
18
- lib64/
19
- parts/
20
- sdist/
21
- var/
22
- wheels/
23
- share/python-wheels/
24
- *.egg-info/
25
- .installed.cfg
26
- *.egg
27
- MANIFEST
28
-
29
- # PyInstaller
30
- # Usually these files are written by a python script from a template
31
- # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
- *.manifest
33
- *.spec
34
-
35
- # Installer logs
36
- pip-log.txt
37
- pip-delete-this-directory.txt
38
-
39
- # Unit test / coverage reports
40
- htmlcov/
41
- .tox/
42
- .nox/
43
- .coverage
44
- .coverage.*
45
- .cache
46
- nosetests.xml
47
- coverage.xml
48
- *.cover
49
- *.py.cover
50
- .hypothesis/
51
- .pytest_cache/
52
- cover/
53
-
54
- # Translations
55
- *.mo
56
- *.pot
57
-
58
- # Django stuff:
59
- *.log
60
- local_settings.py
61
- db.sqlite3
62
- db.sqlite3-journal
63
-
64
- # Flask stuff:
65
- instance/
66
- .webassets-cache
67
-
68
- # Scrapy stuff:
69
- .scrapy
70
-
71
- # Sphinx documentation
72
- docs/_build/
73
-
74
- # PyBuilder
75
- .pybuilder/
76
- target/
77
-
78
- # Jupyter Notebook
79
- .ipynb_checkpoints
80
-
81
- # IPython
82
- profile_default/
83
- ipython_config.py
84
-
85
- # pyenv
86
- # For a library or package, you might want to ignore these files since the code is
87
- # intended to run in multiple environments; otherwise, check them in:
88
- # .python-version
89
-
90
- # pipenv
91
- # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
- # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
- # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
- # install all needed dependencies.
95
- #Pipfile.lock
96
-
97
- # UV
98
- # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
- # This is especially recommended for binary packages to ensure reproducibility, and is more
100
- # commonly ignored for libraries.
101
- #uv.lock
102
-
103
- # poetry
104
- # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
- # This is especially recommended for binary packages to ensure reproducibility, and is more
106
- # commonly ignored for libraries.
107
- # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
- #poetry.lock
109
- #poetry.toml
110
-
111
- # pdm
112
- # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
- # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
- # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
- #pdm.lock
116
- #pdm.toml
117
- .pdm-python
118
- .pdm-build/
119
-
120
- # pixi
121
- # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
- #pixi.lock
123
- # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
- # in the .venv directory. It is recommended not to include this directory in version control.
125
- .pixi
126
-
127
- # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
- __pypackages__/
129
-
130
- # Celery stuff
131
- celerybeat-schedule
132
- celerybeat.pid
133
-
134
- # Redis
135
- *.rdb
136
- *.aof
137
- *.pid
138
-
139
- # RabbitMQ
140
- mnesia/
141
- rabbitmq/
142
- rabbitmq-data/
143
-
144
- # ActiveMQ
145
- activemq-data/
146
-
147
- # SageMath parsed files
148
- *.sage.py
149
-
150
- # Environments
151
- .env
152
- .envrc
153
- .venv
154
- env/
155
- venv/
156
- ENV/
157
- env.bak/
158
- venv.bak/
159
-
160
- # Spyder project settings
161
- .spyderproject
162
- .spyproject
163
-
164
- # Rope project settings
165
- .ropeproject
166
-
167
- # mkdocs documentation
168
- /site
169
-
170
- # mypy
171
- .mypy_cache/
172
- .dmypy.json
173
- dmypy.json
174
-
175
- # Pyre type checker
176
- .pyre/
177
-
178
- # pytype static type analyzer
179
- .pytype/
180
-
181
- # Cython debug symbols
182
- cython_debug/
183
-
184
- # PyCharm
185
- # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
- # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
- # and can be added to the global gitignore or merged into this file. For a more nuclear
188
- # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
- #.idea/
190
-
191
- # Abstra
192
- # Abstra is an AI-powered process automation framework.
193
- # Ignore directories containing user credentials, local state, and settings.
194
- # Learn more at https://abstra.io/docs
195
- .abstra/
196
-
197
- # Visual Studio Code
198
- # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
- # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
- # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
- # you could uncomment the following to ignore the entire vscode folder
202
- .vscode/
203
-
204
- # Ruff stuff:
205
- .ruff_cache/
206
-
207
- # PyPI configuration file
208
- .pypirc
209
-
210
- # Marimo
211
- marimo/_static/
212
- marimo/_lsp/
213
- __marimo__/
214
-
215
- # Streamlit
216
- .streamlit/secrets.toml
217
-
218
- data
219
- *.jsonl
220
- *.toml
221
- tumblrbot.exe.lnk
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ # Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ # poetry.lock
109
+ # poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ # pdm.lock
116
+ # pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ # pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # Redis
135
+ *.rdb
136
+ *.aof
137
+ *.pid
138
+
139
+ # RabbitMQ
140
+ mnesia/
141
+ rabbitmq/
142
+ rabbitmq-data/
143
+
144
+ # ActiveMQ
145
+ activemq-data/
146
+
147
+ # SageMath parsed files
148
+ *.sage.py
149
+
150
+ # Environments
151
+ .env
152
+ .envrc
153
+ .venv
154
+ env/
155
+ venv/
156
+ ENV/
157
+ env.bak/
158
+ venv.bak/
159
+
160
+ # Spyder project settings
161
+ .spyderproject
162
+ .spyproject
163
+
164
+ # Rope project settings
165
+ .ropeproject
166
+
167
+ # mkdocs documentation
168
+ /site
169
+
170
+ # mypy
171
+ .mypy_cache/
172
+ .dmypy.json
173
+ dmypy.json
174
+
175
+ # Pyre type checker
176
+ .pyre/
177
+
178
+ # pytype static type analyzer
179
+ .pytype/
180
+
181
+ # Cython debug symbols
182
+ cython_debug/
183
+
184
+ # PyCharm
185
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
186
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
187
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
188
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
189
+ # .idea/
190
+
191
+ # Abstra
192
+ # Abstra is an AI-powered process automation framework.
193
+ # Ignore directories containing user credentials, local state, and settings.
194
+ # Learn more at https://abstra.io/docs
195
+ .abstra/
196
+
197
+ # Visual Studio Code
198
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
199
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
200
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
201
+ # you could uncomment the following to ignore the entire vscode folder
202
+ # .vscode/
203
+
204
+ # Ruff stuff:
205
+ .ruff_cache/
206
+
207
+ # PyPI configuration file
208
+ .pypirc
209
+
210
+ # Marimo
211
+ marimo/_static/
212
+ marimo/_lsp/
213
+ __marimo__/
214
+
215
+ # Streamlit
216
+ .streamlit/secrets.toml
217
+
218
+ data
219
+ *.toml
220
+ *.jsonl
221
+ tumblrbot.ps1
@@ -1,16 +1,15 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tumblrbot
3
- Version: 1.9.4
3
+ Version: 1.9.6
4
4
  Summary: An updated bot that posts to Tumblr, based on your very own blog!
5
- Requires-Python: >= 3.13
5
+ Requires-Python: >= 3.14
6
6
  Description-Content-Type: text/markdown
7
- Requires-Dist: click
8
7
  Requires-Dist: openai
9
- Requires-Dist: pwinput
10
8
  Requires-Dist: pydantic
11
9
  Requires-Dist: requests
12
10
  Requires-Dist: requests-oauthlib
13
11
  Requires-Dist: rich
12
+ Requires-Dist: tenacity
14
13
  Requires-Dist: tiktoken
15
14
  Requires-Dist: tomlkit
16
15
  Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
@@ -18,6 +17,8 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
18
17
 
19
18
  # tumblrbot
20
19
 
20
+ [tumblrbot.exe]: https://github.com/MaidScientistIzutsumiMarin/tumblrbot/releases/latest/download/tumblrbot.exe
21
+
21
22
  [OAuth]: https://oauth.net/1
22
23
  [Python]: https://python.org/download
23
24
 
@@ -31,6 +32,7 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
31
32
  [OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
32
33
  [OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
33
34
  [OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
35
+ [Flags]: https://platform.openai.com/docs/guides/moderation/over#content-classifications
34
36
  [Fine-Tuning Portal]: https://platform.openai.com/finetune
35
37
 
36
38
  [Tumblr]: https://tumblr.com
@@ -84,26 +86,46 @@ Features:
84
86
 
85
87
  **Known Issues:**
86
88
 
89
+ - Fine-tuning can fail after the validation phase due to the examples file not passing [OpenAI] moderation checks. There are a few workarounds for this that can be tried in combination:
90
+ - You can retry with the same examples file. This has, on rare occasions, worked.
91
+ - You can submit the examples file to the [OpenAI] moderation API with this program's guided prompts. This has worked consistently for our dataset, but others have reported it not being thorough enough.
92
+ - You can use regular expressions to filter out training data in the [config][configurable]. This is more of a brute-force solution, but it can work if the other solutions do not.
93
+ - You can try limiting your dataset by specifying fewer blogs to download from or limiting the number of posts taken from each one in the [config][configurable].
94
+ - If all else fails, you can manually remove data from the examples file until it passes. It is unfortunately not a definitive resource, but it can help to read about what the [OpenAI moderation API flags][Flags].
87
95
  - Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
88
96
  - Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
89
- - During post downloading or post generation, you may receive a "Limit Exceeded" error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
97
+ - During post downloading or post generation, you may receive a Limit Exceeded error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
90
98
  - Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
91
99
 
92
100
  **Please submit an issue or contact us for features you want added/reimplemented.**
93
101
 
94
- ## Installation
102
+ ## Installation & Usage
103
+
104
+ ### Downloadable Binary
105
+
106
+ | Pros | Cons |
107
+ | --- | --- |
108
+ | Easier to install | Harder to update |
109
+ | No risk of dependencies breaking | Dependencies may be older |
110
+
111
+ 1. Download the latest release's [tumblrbot.exe].
112
+ 1. Launch `tumblrbot.exe` in the install location.
113
+
114
+ ### PyPi
115
+
116
+ | Pros | Cons |
117
+ | --- | --- |
118
+ | Easier to update | Harder to install |
119
+ | Dependencies may be newer | Dependencies may break |
95
120
 
96
121
  1. Install the latest version of [Python]:
97
122
  - Windows: `winget install python3`
98
123
  - Linux (apt): `apt install python-pip`
99
124
  - Linux (pacman): `pacman install python-pip`
100
125
  1. Install the [pip] package: `pip install tumblrbot`
101
- - Alternatively, you can install from this repository: `pip install git+https://github.com/MaidThatPrograms/tumblrbot.git`
126
+ - Alternatively, you can install from this repository: `pip install git+https://github.com/MaidScientistIzutsumiMarin/tumblrbot.git`
102
127
  - On Linux, you will have to make a virtual environment or use the flag to install packages system-wide.
103
-
104
- ## Usage
105
-
106
- Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
128
+ 1. Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
107
129
 
108
130
  ## Obtaining Tokens
109
131
 
@@ -160,7 +182,8 @@ Specific Options:
160
182
  To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
161
183
 
162
184
  - **`post_limit`** - At most, this many valid posts will be included in the training data. This effectively is a filter to select the `N` most recent valid posts from each blog. `0` will use every available valid post.
163
- - **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so "the" will also filter out posts with "them" or "thematic". This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with "\\\b", i.e. "\\\bthe\\\b". Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a "|" which is then used to search the post content for any matches.
185
+ - **`moderation_batch_size`** - This controls the batch size when submitting posts to the OpenAI moderation. There is no limit, but higher numbers will cause you to be rate-limited more, which can overall be slower. Low numbers reduce rate-limiting, but can sometimes take longer due to needing more requests. The best value will depend on your computer, internet connection, and any number of factors on OpenAI's side. The default value is just what worked best for our computer.
186
+ - **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so “the” will also filter out posts with “them” or “thematic”. This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with “\\\b”, i.e., “\\\bthe\\\b”. Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a “|” which is then used to search the post content for any matches.
164
187
  - **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
165
188
  - **`user_message`** - This setting is used and works in the same way as `developer_message`.
166
189
  - **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
@@ -1,5 +1,7 @@
1
1
  # tumblrbot
2
2
 
3
+ [tumblrbot.exe]: https://github.com/MaidScientistIzutsumiMarin/tumblrbot/releases/latest/download/tumblrbot.exe
4
+
3
5
  [OAuth]: https://oauth.net/1
4
6
  [Python]: https://python.org/download
5
7
 
@@ -13,6 +15,7 @@
13
15
  [OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
14
16
  [OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
15
17
  [OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
18
+ [Flags]: https://platform.openai.com/docs/guides/moderation/over#content-classifications
16
19
  [Fine-Tuning Portal]: https://platform.openai.com/finetune
17
20
 
18
21
  [Tumblr]: https://tumblr.com
@@ -66,26 +69,46 @@ Features:
66
69
 
67
70
  **Known Issues:**
68
71
 
72
+ - Fine-tuning can fail after the validation phase due to the examples file not passing [OpenAI] moderation checks. There are a few workarounds for this that can be tried in combination:
73
+ - You can retry with the same examples file. This has, on rare occasions, worked.
74
+ - You can submit the examples file to the [OpenAI] moderation API with this program's guided prompts. This has worked consistently for our dataset, but others have reported it not being thorough enough.
75
+ - You can use regular expressions to filter out training data in the [config][configurable]. This is more of a brute-force solution, but it can work if the other solutions do not.
76
+ - You can try limiting your dataset by specifying fewer blogs to download from or limiting the number of posts taken from each one in the [config][configurable].
77
+ - If all else fails, you can manually remove data from the examples file until it passes. It is unfortunately not a definitive resource, but it can help to read about what the [OpenAI moderation API flags][Flags].
69
78
  - Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
70
79
  - Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
71
- - During post downloading or post generation, you may receive a "Limit Exceeded" error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
80
+ - During post downloading or post generation, you may receive a Limit Exceeded error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
72
81
  - Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
73
82
 
74
83
  **Please submit an issue or contact us for features you want added/reimplemented.**
75
84
 
76
- ## Installation
85
+ ## Installation & Usage
86
+
87
+ ### Downloadable Binary
88
+
89
+ | Pros | Cons |
90
+ | --- | --- |
91
+ | Easier to install | Harder to update |
92
+ | No risk of dependencies breaking | Dependencies may be older |
93
+
94
+ 1. Download the latest release's [tumblrbot.exe].
95
+ 1. Launch `tumblrbot.exe` in the install location.
96
+
97
+ ### PyPi
98
+
99
+ | Pros | Cons |
100
+ | --- | --- |
101
+ | Easier to update | Harder to install |
102
+ | Dependencies may be newer | Dependencies may break |
77
103
 
78
104
  1. Install the latest version of [Python]:
79
105
  - Windows: `winget install python3`
80
106
  - Linux (apt): `apt install python-pip`
81
107
  - Linux (pacman): `pacman install python-pip`
82
108
  1. Install the [pip] package: `pip install tumblrbot`
83
- - Alternatively, you can install from this repository: `pip install git+https://github.com/MaidThatPrograms/tumblrbot.git`
109
+ - Alternatively, you can install from this repository: `pip install git+https://github.com/MaidScientistIzutsumiMarin/tumblrbot.git`
84
110
  - On Linux, you will have to make a virtual environment or use the flag to install packages system-wide.
85
-
86
- ## Usage
87
-
88
- Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
111
+ 1. Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
89
112
 
90
113
  ## Obtaining Tokens
91
114
 
@@ -142,7 +165,8 @@ Specific Options:
142
165
  To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
143
166
 
144
167
  - **`post_limit`** - At most, this many valid posts will be included in the training data. This effectively is a filter to select the `N` most recent valid posts from each blog. `0` will use every available valid post.
145
- - **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so "the" will also filter out posts with "them" or "thematic". This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with "\\\b", i.e. "\\\bthe\\\b". Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a "|" which is then used to search the post content for any matches.
168
+ - **`moderation_batch_size`** - This controls the batch size when submitting posts to the OpenAI moderation. There is no limit, but higher numbers will cause you to be rate-limited more, which can overall be slower. Low numbers reduce rate-limiting, but can sometimes take longer due to needing more requests. The best value will depend on your computer, internet connection, and any number of factors on OpenAI's side. The default value is just what worked best for our computer.
169
+ - **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so “the” will also filter out posts with “them” or “thematic”. This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with “\\\b”, i.e., “\\\bthe\\\b”. Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a “|” which is then used to search the post content for any matches.
146
170
  - **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
147
171
  - **`user_message`** - This setting is used and works in the same way as `developer_message`.
148
172
  - **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
@@ -0,0 +1 @@
1
+ ..\..\Powershell\build.ps1 -ExtraArgs '--collect-all tiktoken_ext'
@@ -1,19 +1,18 @@
1
1
  [project]
2
2
  name = "tumblrbot"
3
- version = "1.9.4"
3
+ version = "1.9.6"
4
4
  description = "An updated bot that posts to Tumblr, based on your very own blog!"
5
5
  readme = "README.md"
6
- requires-python = ">= 3.13"
6
+ requires-python = ">= 3.14"
7
7
  dependencies = [
8
- "click",
9
8
  "openai",
10
- "pwinput",
11
9
  "pydantic",
12
10
  "requests",
13
11
  "requests-oauthlib",
14
12
  "rich",
13
+ "tenacity",
15
14
  "tiktoken",
16
- "tomlkit",
15
+ "tomlkit"
17
16
  ]
18
17
 
19
18
  [project.urls]
@@ -25,4 +24,4 @@ tumblrbot = "tumblrbot.__main__:main"
25
24
 
26
25
  [build-system]
27
26
  requires = ["flit_core"]
28
- build-backend = "flit_core.buildapi"
27
+ build-backend = "flit_core.buildapi"