tumblrbot 1.9.4__tar.gz → 1.9.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/.gitignore +221 -221
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/PKG-INFO +35 -12
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/README.md +32 -8
- tumblrbot-1.9.6/build.ps1 +1 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/pyproject.toml +5 -6
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/sample_custom_prompts.jsonl +0 -26
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/__main__.py +6 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/flow/download.py +4 -2
- tumblrbot-1.9.6/src/tumblrbot/flow/examples.py +97 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/flow/fine_tune.py +13 -10
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/flow/generate.py +8 -6
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/utils/common.py +8 -5
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/utils/models.py +47 -31
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/utils/tumblr.py +14 -1
- tumblrbot-1.9.4/src/tumblrbot/flow/examples.py +0 -98
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/.github/FUNDING.yml +0 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/.github/dependabot.yml +0 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/UNLICENSE +0 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/__init__.py +0 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/flow/__init__.py +0 -0
- {tumblrbot-1.9.4 → tumblrbot-1.9.6}/src/tumblrbot/utils/__init__.py +0 -0
|
@@ -1,221 +1,221 @@
|
|
|
1
|
-
# Byte-compiled / optimized / DLL files
|
|
2
|
-
__pycache__/
|
|
3
|
-
*.py[codz]
|
|
4
|
-
*$py.class
|
|
5
|
-
|
|
6
|
-
# C extensions
|
|
7
|
-
*.so
|
|
8
|
-
|
|
9
|
-
# Distribution / packaging
|
|
10
|
-
.Python
|
|
11
|
-
build/
|
|
12
|
-
develop-eggs/
|
|
13
|
-
dist/
|
|
14
|
-
downloads/
|
|
15
|
-
eggs/
|
|
16
|
-
.eggs/
|
|
17
|
-
lib/
|
|
18
|
-
lib64/
|
|
19
|
-
parts/
|
|
20
|
-
sdist/
|
|
21
|
-
var/
|
|
22
|
-
wheels/
|
|
23
|
-
share/python-wheels/
|
|
24
|
-
*.egg-info/
|
|
25
|
-
.installed.cfg
|
|
26
|
-
*.egg
|
|
27
|
-
MANIFEST
|
|
28
|
-
|
|
29
|
-
# PyInstaller
|
|
30
|
-
#
|
|
31
|
-
#
|
|
32
|
-
*.manifest
|
|
33
|
-
*.spec
|
|
34
|
-
|
|
35
|
-
# Installer logs
|
|
36
|
-
pip-log.txt
|
|
37
|
-
pip-delete-this-directory.txt
|
|
38
|
-
|
|
39
|
-
# Unit test / coverage reports
|
|
40
|
-
htmlcov/
|
|
41
|
-
.tox/
|
|
42
|
-
.nox/
|
|
43
|
-
.coverage
|
|
44
|
-
.coverage.*
|
|
45
|
-
.cache
|
|
46
|
-
nosetests.xml
|
|
47
|
-
coverage.xml
|
|
48
|
-
*.cover
|
|
49
|
-
*.py.cover
|
|
50
|
-
.hypothesis/
|
|
51
|
-
.pytest_cache/
|
|
52
|
-
cover/
|
|
53
|
-
|
|
54
|
-
# Translations
|
|
55
|
-
*.mo
|
|
56
|
-
*.pot
|
|
57
|
-
|
|
58
|
-
# Django stuff:
|
|
59
|
-
*.log
|
|
60
|
-
local_settings.py
|
|
61
|
-
db.sqlite3
|
|
62
|
-
db.sqlite3-journal
|
|
63
|
-
|
|
64
|
-
# Flask stuff:
|
|
65
|
-
instance/
|
|
66
|
-
.webassets-cache
|
|
67
|
-
|
|
68
|
-
# Scrapy stuff:
|
|
69
|
-
.scrapy
|
|
70
|
-
|
|
71
|
-
# Sphinx documentation
|
|
72
|
-
docs/_build/
|
|
73
|
-
|
|
74
|
-
# PyBuilder
|
|
75
|
-
.pybuilder/
|
|
76
|
-
target/
|
|
77
|
-
|
|
78
|
-
# Jupyter Notebook
|
|
79
|
-
.ipynb_checkpoints
|
|
80
|
-
|
|
81
|
-
# IPython
|
|
82
|
-
profile_default/
|
|
83
|
-
ipython_config.py
|
|
84
|
-
|
|
85
|
-
# pyenv
|
|
86
|
-
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
-
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
-
# .python-version
|
|
89
|
-
|
|
90
|
-
# pipenv
|
|
91
|
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
-
# install all needed dependencies.
|
|
95
|
-
#Pipfile.lock
|
|
96
|
-
|
|
97
|
-
# UV
|
|
98
|
-
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
-
# commonly ignored for libraries.
|
|
101
|
-
#uv.lock
|
|
102
|
-
|
|
103
|
-
# poetry
|
|
104
|
-
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
-
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
-
# commonly ignored for libraries.
|
|
107
|
-
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
-
#poetry.lock
|
|
109
|
-
#poetry.toml
|
|
110
|
-
|
|
111
|
-
# pdm
|
|
112
|
-
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
-
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
-
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
-
#pdm.lock
|
|
116
|
-
#pdm.toml
|
|
117
|
-
.pdm-python
|
|
118
|
-
.pdm-build/
|
|
119
|
-
|
|
120
|
-
# pixi
|
|
121
|
-
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
-
#pixi.lock
|
|
123
|
-
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
-
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
-
.pixi
|
|
126
|
-
|
|
127
|
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
-
__pypackages__/
|
|
129
|
-
|
|
130
|
-
# Celery stuff
|
|
131
|
-
celerybeat-schedule
|
|
132
|
-
celerybeat.pid
|
|
133
|
-
|
|
134
|
-
# Redis
|
|
135
|
-
*.rdb
|
|
136
|
-
*.aof
|
|
137
|
-
*.pid
|
|
138
|
-
|
|
139
|
-
# RabbitMQ
|
|
140
|
-
mnesia/
|
|
141
|
-
rabbitmq/
|
|
142
|
-
rabbitmq-data/
|
|
143
|
-
|
|
144
|
-
# ActiveMQ
|
|
145
|
-
activemq-data/
|
|
146
|
-
|
|
147
|
-
# SageMath parsed files
|
|
148
|
-
*.sage.py
|
|
149
|
-
|
|
150
|
-
# Environments
|
|
151
|
-
.env
|
|
152
|
-
.envrc
|
|
153
|
-
.venv
|
|
154
|
-
env/
|
|
155
|
-
venv/
|
|
156
|
-
ENV/
|
|
157
|
-
env.bak/
|
|
158
|
-
venv.bak/
|
|
159
|
-
|
|
160
|
-
# Spyder project settings
|
|
161
|
-
.spyderproject
|
|
162
|
-
.spyproject
|
|
163
|
-
|
|
164
|
-
# Rope project settings
|
|
165
|
-
.ropeproject
|
|
166
|
-
|
|
167
|
-
# mkdocs documentation
|
|
168
|
-
/site
|
|
169
|
-
|
|
170
|
-
# mypy
|
|
171
|
-
.mypy_cache/
|
|
172
|
-
.dmypy.json
|
|
173
|
-
dmypy.json
|
|
174
|
-
|
|
175
|
-
# Pyre type checker
|
|
176
|
-
.pyre/
|
|
177
|
-
|
|
178
|
-
# pytype static type analyzer
|
|
179
|
-
.pytype/
|
|
180
|
-
|
|
181
|
-
# Cython debug symbols
|
|
182
|
-
cython_debug/
|
|
183
|
-
|
|
184
|
-
# PyCharm
|
|
185
|
-
#
|
|
186
|
-
#
|
|
187
|
-
#
|
|
188
|
-
#
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
# Abstra
|
|
192
|
-
#
|
|
193
|
-
#
|
|
194
|
-
#
|
|
195
|
-
.abstra/
|
|
196
|
-
|
|
197
|
-
# Visual Studio Code
|
|
198
|
-
#
|
|
199
|
-
#
|
|
200
|
-
#
|
|
201
|
-
#
|
|
202
|
-
.vscode/
|
|
203
|
-
|
|
204
|
-
# Ruff stuff:
|
|
205
|
-
.ruff_cache/
|
|
206
|
-
|
|
207
|
-
# PyPI configuration file
|
|
208
|
-
.pypirc
|
|
209
|
-
|
|
210
|
-
# Marimo
|
|
211
|
-
marimo/_static/
|
|
212
|
-
marimo/_lsp/
|
|
213
|
-
__marimo__/
|
|
214
|
-
|
|
215
|
-
# Streamlit
|
|
216
|
-
.streamlit/secrets.toml
|
|
217
|
-
|
|
218
|
-
data
|
|
219
|
-
*.
|
|
220
|
-
*.
|
|
221
|
-
tumblrbot.
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
# Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
# uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
# poetry.lock
|
|
109
|
+
# poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
# pdm.lock
|
|
116
|
+
# pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
# pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# Redis
|
|
135
|
+
*.rdb
|
|
136
|
+
*.aof
|
|
137
|
+
*.pid
|
|
138
|
+
|
|
139
|
+
# RabbitMQ
|
|
140
|
+
mnesia/
|
|
141
|
+
rabbitmq/
|
|
142
|
+
rabbitmq-data/
|
|
143
|
+
|
|
144
|
+
# ActiveMQ
|
|
145
|
+
activemq-data/
|
|
146
|
+
|
|
147
|
+
# SageMath parsed files
|
|
148
|
+
*.sage.py
|
|
149
|
+
|
|
150
|
+
# Environments
|
|
151
|
+
.env
|
|
152
|
+
.envrc
|
|
153
|
+
.venv
|
|
154
|
+
env/
|
|
155
|
+
venv/
|
|
156
|
+
ENV/
|
|
157
|
+
env.bak/
|
|
158
|
+
venv.bak/
|
|
159
|
+
|
|
160
|
+
# Spyder project settings
|
|
161
|
+
.spyderproject
|
|
162
|
+
.spyproject
|
|
163
|
+
|
|
164
|
+
# Rope project settings
|
|
165
|
+
.ropeproject
|
|
166
|
+
|
|
167
|
+
# mkdocs documentation
|
|
168
|
+
/site
|
|
169
|
+
|
|
170
|
+
# mypy
|
|
171
|
+
.mypy_cache/
|
|
172
|
+
.dmypy.json
|
|
173
|
+
dmypy.json
|
|
174
|
+
|
|
175
|
+
# Pyre type checker
|
|
176
|
+
.pyre/
|
|
177
|
+
|
|
178
|
+
# pytype static type analyzer
|
|
179
|
+
.pytype/
|
|
180
|
+
|
|
181
|
+
# Cython debug symbols
|
|
182
|
+
cython_debug/
|
|
183
|
+
|
|
184
|
+
# PyCharm
|
|
185
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
186
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
188
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
189
|
+
# .idea/
|
|
190
|
+
|
|
191
|
+
# Abstra
|
|
192
|
+
# Abstra is an AI-powered process automation framework.
|
|
193
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
194
|
+
# Learn more at https://abstra.io/docs
|
|
195
|
+
.abstra/
|
|
196
|
+
|
|
197
|
+
# Visual Studio Code
|
|
198
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
199
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
200
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
201
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
202
|
+
# .vscode/
|
|
203
|
+
|
|
204
|
+
# Ruff stuff:
|
|
205
|
+
.ruff_cache/
|
|
206
|
+
|
|
207
|
+
# PyPI configuration file
|
|
208
|
+
.pypirc
|
|
209
|
+
|
|
210
|
+
# Marimo
|
|
211
|
+
marimo/_static/
|
|
212
|
+
marimo/_lsp/
|
|
213
|
+
__marimo__/
|
|
214
|
+
|
|
215
|
+
# Streamlit
|
|
216
|
+
.streamlit/secrets.toml
|
|
217
|
+
|
|
218
|
+
data
|
|
219
|
+
*.toml
|
|
220
|
+
*.jsonl
|
|
221
|
+
tumblrbot.ps1
|
|
@@ -1,16 +1,15 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: tumblrbot
|
|
3
|
-
Version: 1.9.
|
|
3
|
+
Version: 1.9.6
|
|
4
4
|
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
|
-
Requires-Python: >= 3.
|
|
5
|
+
Requires-Python: >= 3.14
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
7
|
-
Requires-Dist: click
|
|
8
7
|
Requires-Dist: openai
|
|
9
|
-
Requires-Dist: pwinput
|
|
10
8
|
Requires-Dist: pydantic
|
|
11
9
|
Requires-Dist: requests
|
|
12
10
|
Requires-Dist: requests-oauthlib
|
|
13
11
|
Requires-Dist: rich
|
|
12
|
+
Requires-Dist: tenacity
|
|
14
13
|
Requires-Dist: tiktoken
|
|
15
14
|
Requires-Dist: tomlkit
|
|
16
15
|
Project-URL: Funding, https://ko-fi.com/maidscientistizutsumimarin
|
|
@@ -18,6 +17,8 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
|
18
17
|
|
|
19
18
|
# tumblrbot
|
|
20
19
|
|
|
20
|
+
[tumblrbot.exe]: https://github.com/MaidScientistIzutsumiMarin/tumblrbot/releases/latest/download/tumblrbot.exe
|
|
21
|
+
|
|
21
22
|
[OAuth]: https://oauth.net/1
|
|
22
23
|
[Python]: https://python.org/download
|
|
23
24
|
|
|
@@ -31,6 +32,7 @@ Project-URL: Source, https://github.com/MaidScientistIzutsumiMarin/tumblrbot
|
|
|
31
32
|
[OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
|
|
32
33
|
[OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
|
|
33
34
|
[OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
|
|
35
|
+
[Flags]: https://platform.openai.com/docs/guides/moderation/over#content-classifications
|
|
34
36
|
[Fine-Tuning Portal]: https://platform.openai.com/finetune
|
|
35
37
|
|
|
36
38
|
[Tumblr]: https://tumblr.com
|
|
@@ -84,26 +86,46 @@ Features:
|
|
|
84
86
|
|
|
85
87
|
**Known Issues:**
|
|
86
88
|
|
|
89
|
+
- Fine-tuning can fail after the validation phase due to the examples file not passing [OpenAI] moderation checks. There are a few workarounds for this that can be tried in combination:
|
|
90
|
+
- You can retry with the same examples file. This has, on rare occasions, worked.
|
|
91
|
+
- You can submit the examples file to the [OpenAI] moderation API with this program's guided prompts. This has worked consistently for our dataset, but others have reported it not being thorough enough.
|
|
92
|
+
- You can use regular expressions to filter out training data in the [config][configurable]. This is more of a brute-force solution, but it can work if the other solutions do not.
|
|
93
|
+
- You can try limiting your dataset by specifying fewer blogs to download from or limiting the number of posts taken from each one in the [config][configurable].
|
|
94
|
+
- If all else fails, you can manually remove data from the examples file until it passes. It is unfortunately not a definitive resource, but it can help to read about what the [OpenAI moderation API flags][Flags].
|
|
87
95
|
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
|
|
88
96
|
- Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
|
|
89
|
-
- During post downloading or post generation, you may receive a
|
|
97
|
+
- During post downloading or post generation, you may receive a “Limit Exceeded” error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
|
|
90
98
|
- Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
|
|
91
99
|
|
|
92
100
|
**Please submit an issue or contact us for features you want added/reimplemented.**
|
|
93
101
|
|
|
94
|
-
## Installation
|
|
102
|
+
## Installation & Usage
|
|
103
|
+
|
|
104
|
+
### Downloadable Binary
|
|
105
|
+
|
|
106
|
+
| Pros | Cons |
|
|
107
|
+
| --- | --- |
|
|
108
|
+
| Easier to install | Harder to update |
|
|
109
|
+
| No risk of dependencies breaking | Dependencies may be older |
|
|
110
|
+
|
|
111
|
+
1. Download the latest release's [tumblrbot.exe].
|
|
112
|
+
1. Launch `tumblrbot.exe` in the install location.
|
|
113
|
+
|
|
114
|
+
### PyPi
|
|
115
|
+
|
|
116
|
+
| Pros | Cons |
|
|
117
|
+
| --- | --- |
|
|
118
|
+
| Easier to update | Harder to install |
|
|
119
|
+
| Dependencies may be newer | Dependencies may break |
|
|
95
120
|
|
|
96
121
|
1. Install the latest version of [Python]:
|
|
97
122
|
- Windows: `winget install python3`
|
|
98
123
|
- Linux (apt): `apt install python-pip`
|
|
99
124
|
- Linux (pacman): `pacman install python-pip`
|
|
100
125
|
1. Install the [pip] package: `pip install tumblrbot`
|
|
101
|
-
- Alternatively, you can install from this repository: `pip install git+https://github.com/
|
|
126
|
+
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidScientistIzutsumiMarin/tumblrbot.git`
|
|
102
127
|
- On Linux, you will have to make a virtual environment or use the flag to install packages system-wide.
|
|
103
|
-
|
|
104
|
-
## Usage
|
|
105
|
-
|
|
106
|
-
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
128
|
+
1. Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
107
129
|
|
|
108
130
|
## Obtaining Tokens
|
|
109
131
|
|
|
@@ -160,7 +182,8 @@ Specific Options:
|
|
|
160
182
|
To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
|
|
161
183
|
|
|
162
184
|
- **`post_limit`** - At most, this many valid posts will be included in the training data. This effectively is a filter to select the `N` most recent valid posts from each blog. `0` will use every available valid post.
|
|
163
|
-
- **`
|
|
185
|
+
- **`moderation_batch_size`** - This controls the batch size when submitting posts to the OpenAI moderation. There is no limit, but higher numbers will cause you to be rate-limited more, which can overall be slower. Low numbers reduce rate-limiting, but can sometimes take longer due to needing more requests. The best value will depend on your computer, internet connection, and any number of factors on OpenAI's side. The default value is just what worked best for our computer.
|
|
186
|
+
- **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so “the” will also filter out posts with “them” or “thematic”. This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with “\\\b”, i.e., “\\\bthe\\\b”. Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a “|” which is then used to search the post content for any matches.
|
|
164
187
|
- **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
|
|
165
188
|
- **`user_message`** - This setting is used and works in the same way as `developer_message`.
|
|
166
189
|
- **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# tumblrbot
|
|
2
2
|
|
|
3
|
+
[tumblrbot.exe]: https://github.com/MaidScientistIzutsumiMarin/tumblrbot/releases/latest/download/tumblrbot.exe
|
|
4
|
+
|
|
3
5
|
[OAuth]: https://oauth.net/1
|
|
4
6
|
[Python]: https://python.org/download
|
|
5
7
|
|
|
@@ -13,6 +15,7 @@
|
|
|
13
15
|
[OpenAI Pricing]: https://platform.openai.com/docs/pricing#fine-tuning
|
|
14
16
|
[OpenAI Tokens]: https://platform.openai.com/settings/organization/api-keys
|
|
15
17
|
[OpenAI Moderation API]: https://platform.openai.com/docs/guides/moderation
|
|
18
|
+
[Flags]: https://platform.openai.com/docs/guides/moderation/over#content-classifications
|
|
16
19
|
[Fine-Tuning Portal]: https://platform.openai.com/finetune
|
|
17
20
|
|
|
18
21
|
[Tumblr]: https://tumblr.com
|
|
@@ -66,26 +69,46 @@ Features:
|
|
|
66
69
|
|
|
67
70
|
**Known Issues:**
|
|
68
71
|
|
|
72
|
+
- Fine-tuning can fail after the validation phase due to the examples file not passing [OpenAI] moderation checks. There are a few workarounds for this that can be tried in combination:
|
|
73
|
+
- You can retry with the same examples file. This has, on rare occasions, worked.
|
|
74
|
+
- You can submit the examples file to the [OpenAI] moderation API with this program's guided prompts. This has worked consistently for our dataset, but others have reported it not being thorough enough.
|
|
75
|
+
- You can use regular expressions to filter out training data in the [config][configurable]. This is more of a brute-force solution, but it can work if the other solutions do not.
|
|
76
|
+
- You can try limiting your dataset by specifying fewer blogs to download from or limiting the number of posts taken from each one in the [config][configurable].
|
|
77
|
+
- If all else fails, you can manually remove data from the examples file until it passes. It is unfortunately not a definitive resource, but it can help to read about what the [OpenAI moderation API flags][Flags].
|
|
69
78
|
- Sometimes, you will get an error about the training file not being found when starting fine-tuning. We do not currently have a fix or workaround for this. You should instead use the online portal for fine-tuning if this continues to happen. Read more in [fine-tuning].
|
|
70
79
|
- Post counts are incorrect when downloading posts. We are not certain what the cause of this is, but our tests suggest this is a [Tumblr] API problem that is giving inaccurate numbers.
|
|
71
|
-
- During post downloading or post generation, you may receive a
|
|
80
|
+
- During post downloading or post generation, you may receive a “Limit Exceeded” error message from the [Tumblr] API. This is caused by server-side rate-limiting by [Tumblr]. The only workaround is trying again or waiting for a period of time before retrying. In most cases, you either have to wait for a minute or an hour for the limits to reset. You can read more about the limits in the [Tumblr API documentation on rate limits].
|
|
72
81
|
- Similar to the above issue, you may sometimes get a message saying your IP is blocked. This block is temporary and probably follows the same rules as previously described.
|
|
73
82
|
|
|
74
83
|
**Please submit an issue or contact us for features you want added/reimplemented.**
|
|
75
84
|
|
|
76
|
-
## Installation
|
|
85
|
+
## Installation & Usage
|
|
86
|
+
|
|
87
|
+
### Downloadable Binary
|
|
88
|
+
|
|
89
|
+
| Pros | Cons |
|
|
90
|
+
| --- | --- |
|
|
91
|
+
| Easier to install | Harder to update |
|
|
92
|
+
| No risk of dependencies breaking | Dependencies may be older |
|
|
93
|
+
|
|
94
|
+
1. Download the latest release's [tumblrbot.exe].
|
|
95
|
+
1. Launch `tumblrbot.exe` in the install location.
|
|
96
|
+
|
|
97
|
+
### PyPi
|
|
98
|
+
|
|
99
|
+
| Pros | Cons |
|
|
100
|
+
| --- | --- |
|
|
101
|
+
| Easier to update | Harder to install |
|
|
102
|
+
| Dependencies may be newer | Dependencies may break |
|
|
77
103
|
|
|
78
104
|
1. Install the latest version of [Python]:
|
|
79
105
|
- Windows: `winget install python3`
|
|
80
106
|
- Linux (apt): `apt install python-pip`
|
|
81
107
|
- Linux (pacman): `pacman install python-pip`
|
|
82
108
|
1. Install the [pip] package: `pip install tumblrbot`
|
|
83
|
-
- Alternatively, you can install from this repository: `pip install git+https://github.com/
|
|
109
|
+
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidScientistIzutsumiMarin/tumblrbot.git`
|
|
84
110
|
- On Linux, you will have to make a virtual environment or use the flag to install packages system-wide.
|
|
85
|
-
|
|
86
|
-
## Usage
|
|
87
|
-
|
|
88
|
-
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
111
|
+
1. Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options. Every command-line option corresponds to a value from the [config][configurable].
|
|
89
112
|
|
|
90
113
|
## Obtaining Tokens
|
|
91
114
|
|
|
@@ -142,7 +165,8 @@ Specific Options:
|
|
|
142
165
|
To be specific, it should follow the [JSON Lines] file format with one collection of name/value pairs (a dictionary) per line. You can validate your file using the [JSON Lines Validator].
|
|
143
166
|
|
|
144
167
|
- **`post_limit`** - At most, this many valid posts will be included in the training data. This effectively is a filter to select the `N` most recent valid posts from each blog. `0` will use every available valid post.
|
|
145
|
-
- **`
|
|
168
|
+
- **`moderation_batch_size`** - This controls the batch size when submitting posts to the OpenAI moderation. There is no limit, but higher numbers will cause you to be rate-limited more, which can overall be slower. Low numbers reduce rate-limiting, but can sometimes take longer due to needing more requests. The best value will depend on your computer, internet connection, and any number of factors on OpenAI's side. The default value is just what worked best for our computer.
|
|
169
|
+
- **`filtered_words`** - During training data generation, any posts with the specified words will be removed. Word boundaries are not checked by default, so “the” will also filter out posts with “them” or “thematic”. This setting supports regular expressions, so you can explicitly look for word boundaries by surrounding an entry with “\\\b”, i.e., “\\\bthe\\\b”. Regular expressions have to be escaped like so due to how JSON data is read in. If you are familiar with regular expressions, it could be useful for you to know that every entry is joined with a “|” which is then used to search the post content for any matches.
|
|
146
170
|
- **`developer_message`** - This message is used in for fine-tuning the AI as well as generating prompts. If you change this, you will need to run the fine-tuning again with the new value before generating posts.
|
|
147
171
|
- **`user_message`** - This setting is used and works in the same way as `developer_message`.
|
|
148
172
|
- **`expected_epochs`** - The default value here is the default number of epochs for `base_model`. You may have to change this value if you change `base_model`. After running fine-tuning once, you will see the number of epochs used in the [fine-tuning portal] under *Hyperparameters*. This value will also be updated automatically if you run fine-tuning through this program.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
..\..\Powershell\build.ps1 -ExtraArgs '--collect-all tiktoken_ext'
|
|
@@ -1,19 +1,18 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tumblrbot"
|
|
3
|
-
version = "1.9.
|
|
3
|
+
version = "1.9.6"
|
|
4
4
|
description = "An updated bot that posts to Tumblr, based on your very own blog!"
|
|
5
5
|
readme = "README.md"
|
|
6
|
-
requires-python = ">= 3.
|
|
6
|
+
requires-python = ">= 3.14"
|
|
7
7
|
dependencies = [
|
|
8
|
-
"click",
|
|
9
8
|
"openai",
|
|
10
|
-
"pwinput",
|
|
11
9
|
"pydantic",
|
|
12
10
|
"requests",
|
|
13
11
|
"requests-oauthlib",
|
|
14
12
|
"rich",
|
|
13
|
+
"tenacity",
|
|
15
14
|
"tiktoken",
|
|
16
|
-
"tomlkit"
|
|
15
|
+
"tomlkit"
|
|
17
16
|
]
|
|
18
17
|
|
|
19
18
|
[project.urls]
|
|
@@ -25,4 +24,4 @@ tumblrbot = "tumblrbot.__main__:main"
|
|
|
25
24
|
|
|
26
25
|
[build-system]
|
|
27
26
|
requires = ["flit_core"]
|
|
28
|
-
build-backend = "flit_core.buildapi"
|
|
27
|
+
build-backend = "flit_core.buildapi"
|