tumblrbot 1.0.0__tar.gz → 1.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tumblrbot-1.1.1/.github/dependabot.yml +11 -0
- tumblrbot-1.1.1/.gitignore +215 -0
- tumblrbot-1.1.1/PKG-INFO +15 -0
- tumblrbot-1.1.1/README.md +90 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/UNLICENSE +24 -24
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/pyproject.toml +4 -8
- tumblrbot-1.1.1/src/tumblrbot/__main__.py +33 -0
- tumblrbot-1.1.1/src/tumblrbot/flow/download.py +48 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/flow/examples.py +4 -4
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/flow/fine_tune.py +13 -17
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/flow/generate.py +10 -7
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/utils/models.py +6 -7
- tumblrbot-1.1.1/src/tumblrbot/utils/settings.py +131 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/utils/tumblr.py +23 -19
- tumblrbot-1.0.0/PKG-INFO +0 -369
- tumblrbot-1.0.0/README.md +0 -348
- tumblrbot-1.0.0/src/tumblrbot/__main__.py +0 -63
- tumblrbot-1.0.0/src/tumblrbot/flow/download.py +0 -53
- tumblrbot-1.0.0/src/tumblrbot/utils/settings.py +0 -118
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/__init__.py +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/flow/__init__.py +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/utils/__init__.py +0 -0
- {tumblrbot-1.0.0 → tumblrbot-1.1.1}/src/tumblrbot/utils/common.py +0 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# To get started with Dependabot version updates, you'll need to specify which
|
|
2
|
+
# package ecosystems to update and where the package manifests are located.
|
|
3
|
+
# Please see the documentation for all configuration options:
|
|
4
|
+
# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
|
|
5
|
+
|
|
6
|
+
version: 2
|
|
7
|
+
updates:
|
|
8
|
+
- package-ecosystem: "pip" # See documentation for possible values
|
|
9
|
+
directory: "/" # Location of package manifests
|
|
10
|
+
schedule:
|
|
11
|
+
interval: "weekly"
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[codz]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
# Usually these files are written by a python script from a template
|
|
31
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
32
|
+
*.manifest
|
|
33
|
+
*.spec
|
|
34
|
+
|
|
35
|
+
# Installer logs
|
|
36
|
+
pip-log.txt
|
|
37
|
+
pip-delete-this-directory.txt
|
|
38
|
+
|
|
39
|
+
# Unit test / coverage reports
|
|
40
|
+
htmlcov/
|
|
41
|
+
.tox/
|
|
42
|
+
.nox/
|
|
43
|
+
.coverage
|
|
44
|
+
.coverage.*
|
|
45
|
+
.cache
|
|
46
|
+
nosetests.xml
|
|
47
|
+
coverage.xml
|
|
48
|
+
*.cover
|
|
49
|
+
*.py.cover
|
|
50
|
+
.hypothesis/
|
|
51
|
+
.pytest_cache/
|
|
52
|
+
cover/
|
|
53
|
+
|
|
54
|
+
# Translations
|
|
55
|
+
*.mo
|
|
56
|
+
*.pot
|
|
57
|
+
|
|
58
|
+
# Django stuff:
|
|
59
|
+
*.log
|
|
60
|
+
local_settings.py
|
|
61
|
+
db.sqlite3
|
|
62
|
+
db.sqlite3-journal
|
|
63
|
+
|
|
64
|
+
# Flask stuff:
|
|
65
|
+
instance/
|
|
66
|
+
.webassets-cache
|
|
67
|
+
|
|
68
|
+
# Scrapy stuff:
|
|
69
|
+
.scrapy
|
|
70
|
+
|
|
71
|
+
# Sphinx documentation
|
|
72
|
+
docs/_build/
|
|
73
|
+
|
|
74
|
+
# PyBuilder
|
|
75
|
+
.pybuilder/
|
|
76
|
+
target/
|
|
77
|
+
|
|
78
|
+
# Jupyter Notebook
|
|
79
|
+
.ipynb_checkpoints
|
|
80
|
+
|
|
81
|
+
# IPython
|
|
82
|
+
profile_default/
|
|
83
|
+
ipython_config.py
|
|
84
|
+
|
|
85
|
+
# pyenv
|
|
86
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
87
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
88
|
+
# .python-version
|
|
89
|
+
|
|
90
|
+
# pipenv
|
|
91
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
92
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
93
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
94
|
+
# install all needed dependencies.
|
|
95
|
+
#Pipfile.lock
|
|
96
|
+
|
|
97
|
+
# UV
|
|
98
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
99
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
100
|
+
# commonly ignored for libraries.
|
|
101
|
+
#uv.lock
|
|
102
|
+
|
|
103
|
+
# poetry
|
|
104
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
105
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
106
|
+
# commonly ignored for libraries.
|
|
107
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
108
|
+
#poetry.lock
|
|
109
|
+
#poetry.toml
|
|
110
|
+
|
|
111
|
+
# pdm
|
|
112
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
113
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
114
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
115
|
+
#pdm.lock
|
|
116
|
+
#pdm.toml
|
|
117
|
+
.pdm-python
|
|
118
|
+
.pdm-build/
|
|
119
|
+
|
|
120
|
+
# pixi
|
|
121
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
122
|
+
#pixi.lock
|
|
123
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
124
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
125
|
+
.pixi
|
|
126
|
+
|
|
127
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
128
|
+
__pypackages__/
|
|
129
|
+
|
|
130
|
+
# Celery stuff
|
|
131
|
+
celerybeat-schedule
|
|
132
|
+
celerybeat.pid
|
|
133
|
+
|
|
134
|
+
# SageMath parsed files
|
|
135
|
+
*.sage.py
|
|
136
|
+
|
|
137
|
+
# Environments
|
|
138
|
+
.env
|
|
139
|
+
.envrc
|
|
140
|
+
.venv
|
|
141
|
+
env/
|
|
142
|
+
venv/
|
|
143
|
+
ENV/
|
|
144
|
+
env.bak/
|
|
145
|
+
venv.bak/
|
|
146
|
+
|
|
147
|
+
# Spyder project settings
|
|
148
|
+
.spyderproject
|
|
149
|
+
.spyproject
|
|
150
|
+
|
|
151
|
+
# Rope project settings
|
|
152
|
+
.ropeproject
|
|
153
|
+
|
|
154
|
+
# mkdocs documentation
|
|
155
|
+
/site
|
|
156
|
+
|
|
157
|
+
# mypy
|
|
158
|
+
.mypy_cache/
|
|
159
|
+
.dmypy.json
|
|
160
|
+
dmypy.json
|
|
161
|
+
|
|
162
|
+
# Pyre type checker
|
|
163
|
+
.pyre/
|
|
164
|
+
|
|
165
|
+
# pytype static type analyzer
|
|
166
|
+
.pytype/
|
|
167
|
+
|
|
168
|
+
# Cython debug symbols
|
|
169
|
+
cython_debug/
|
|
170
|
+
|
|
171
|
+
# PyCharm
|
|
172
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
173
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
174
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
175
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
176
|
+
#.idea/
|
|
177
|
+
|
|
178
|
+
# Abstra
|
|
179
|
+
# Abstra is an AI-powered process automation framework.
|
|
180
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
181
|
+
# Learn more at https://abstra.io/docs
|
|
182
|
+
.abstra/
|
|
183
|
+
|
|
184
|
+
# Visual Studio Code
|
|
185
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
186
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
187
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
188
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
189
|
+
# .vscode/
|
|
190
|
+
|
|
191
|
+
# Ruff stuff:
|
|
192
|
+
.ruff_cache/
|
|
193
|
+
|
|
194
|
+
# PyPI configuration file
|
|
195
|
+
.pypirc
|
|
196
|
+
|
|
197
|
+
# Cursor
|
|
198
|
+
# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
|
|
199
|
+
# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
|
|
200
|
+
# refer to https://docs.cursor.com/context/ignore-files
|
|
201
|
+
.cursorignore
|
|
202
|
+
.cursorindexingignore
|
|
203
|
+
|
|
204
|
+
# Marimo
|
|
205
|
+
marimo/_static/
|
|
206
|
+
marimo/_lsp/
|
|
207
|
+
__marimo__/
|
|
208
|
+
|
|
209
|
+
# Streamlit
|
|
210
|
+
.streamlit/secrets.toml
|
|
211
|
+
|
|
212
|
+
# Custom
|
|
213
|
+
data
|
|
214
|
+
*.toml
|
|
215
|
+
*.jsonl
|
tumblrbot-1.1.1/PKG-INFO
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tumblrbot
|
|
3
|
+
Version: 1.1.1
|
|
4
|
+
Summary: An updated bot that posts to Tumblr, based on your very own blog!
|
|
5
|
+
Requires-Python: >= 3.13
|
|
6
|
+
Requires-Dist: more-itertools
|
|
7
|
+
Requires-Dist: openai
|
|
8
|
+
Requires-Dist: pydantic
|
|
9
|
+
Requires-Dist: pydantic-settings
|
|
10
|
+
Requires-Dist: requests
|
|
11
|
+
Requires-Dist: requests-oauthlib
|
|
12
|
+
Requires-Dist: rich
|
|
13
|
+
Requires-Dist: tiktoken
|
|
14
|
+
Requires-Dist: tomlkit
|
|
15
|
+
Project-URL: Source, https://github.com/MaidThatPrograms/tumblrbot
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
[OpenAI]: https://pypi.org/project/openai
|
|
2
|
+
[Python]: https://python.org/download
|
|
3
|
+
[Rich]: https://pypi.org/project/rich
|
|
4
|
+
|
|
5
|
+
[gpt-4.1-nano-2025-04-14]: https://platform.openai.com/docs/models/gpt-4.1-nano
|
|
6
|
+
[Moderation API]: https://platform.openai.com/docs/api-reference/moderations
|
|
7
|
+
[New Post Format]: https://tumblr.com/docs/npf
|
|
8
|
+
[OAuth 2.0]: https://www.tumblr.com/docs/en/api/v2#oauth2-authorization
|
|
9
|
+
[pip]: https://pypi.org
|
|
10
|
+
|
|
11
|
+
[Download]: tumblrbot/flow/download.py
|
|
12
|
+
[Examples]: tumblrbot/flow/examples.py
|
|
13
|
+
[Fine-Tune]: tumblrbot/flow/fine_tune.py
|
|
14
|
+
[Generate]: tumblrbot/flow/generate.py
|
|
15
|
+
[Settings]: tumblrbot/utils/settings.py
|
|
16
|
+
[Main]: __main__.py
|
|
17
|
+
[README.md]: README.md
|
|
18
|
+
|
|
19
|
+
# tumblrbot
|
|
20
|
+
[](https://python.org/pypi/tumblrbot)
|
|
21
|
+
|
|
22
|
+
Description of original project:
|
|
23
|
+
> 4tv-tumblrbot was a collaborative project I embarked on with my close friend Dima, who goes by @smoqueen on Tumblr. The aim of this endeavor was straightforward yet silly: to develop a Tumblr bot powered by a machine-learning model. This bot would be specifically trained on the content from a particular Tumblr blog or a selected set of blogs, allowing it to mimic the style, tone, and thematic essence of the original posts.
|
|
24
|
+
|
|
25
|
+
This fork is largely a rewrite of the source code with similarities in its structure and process:
|
|
26
|
+
- Updates:
|
|
27
|
+
- Updated to [OAuth 2.0].
|
|
28
|
+
- Updated to the [New Post Format].
|
|
29
|
+
- Updated to the latest version of [OpenAI].
|
|
30
|
+
- Updated the [base model version][Settings] to [gpt-4.1-nano-2025-04-14].
|
|
31
|
+
- Removed features:
|
|
32
|
+
- [Generation][Generate]:
|
|
33
|
+
- Removed clearing drafts behavior.
|
|
34
|
+
- [Training][Examples]:
|
|
35
|
+
- Removed exports that had HTML or reblogs.
|
|
36
|
+
- Removed special word-replacement behavior.
|
|
37
|
+
- Removed filtering by year.
|
|
38
|
+
- Removed setup and related files.
|
|
39
|
+
- Changed/Added features:
|
|
40
|
+
- [Generation][Generate]:
|
|
41
|
+
- Added a link to the blog's draft page.
|
|
42
|
+
- Added error checking for uploading drafts.
|
|
43
|
+
- [Training][Examples]:
|
|
44
|
+
- Added the option to [Download] the latest posts from the [specified blogs][Settings].
|
|
45
|
+
- Added the option to remove posts flagged by the [Moderation API].
|
|
46
|
+
- Added the option to automatically [Fine-Tune] the examples on the [specified base model][Settings].
|
|
47
|
+
- Changed to now escape examples automatically.
|
|
48
|
+
- Set encoding for reading post data to `UTF-8` to fix decoding errors.
|
|
49
|
+
- Added newlines between paragraphs.
|
|
50
|
+
- Removed "ALT", submission, ask, and poll text from posts.
|
|
51
|
+
- Improved the estimated token counts and costs.
|
|
52
|
+
- Changed to [Rich] for output.
|
|
53
|
+
- Added progress bars.
|
|
54
|
+
- Added post previews.
|
|
55
|
+
- Added color, formatting, and more information to output.
|
|
56
|
+
- Created a [guided utility][Main] for every step of building your bot blog.
|
|
57
|
+
- Maid scripts wait for user input before the console closes.
|
|
58
|
+
- Added comand-line options to override [Settings] options.
|
|
59
|
+
- Added behavior to regenerate the default [config.toml][Settings] and [env.toml][Settings] if missing.
|
|
60
|
+
- Renamed several files.
|
|
61
|
+
- Renamed several [Settings] options.
|
|
62
|
+
- Changed the value of several [Settings] options.
|
|
63
|
+
- Added full type-checking coverage (fully importable from third-party scripts).
|
|
64
|
+
|
|
65
|
+
To-Do:
|
|
66
|
+
- Add documentation.
|
|
67
|
+
- Finish updating [README.md].
|
|
68
|
+
- Look into places more-itertools can help.
|
|
69
|
+
- Change the differences list to instead just be a list of features.
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
**Please submit an issue or contact us for features you want to added/reimplemented.**
|
|
73
|
+
|
|
74
|
+
## Installation
|
|
75
|
+
1. Install the latest version of [Python]:
|
|
76
|
+
- Windows: `winget install python3`
|
|
77
|
+
- Linux (apt): `apt install python-pip`
|
|
78
|
+
- Linux (pacman): `pacman install python-pip`
|
|
79
|
+
1. Install the [pip] package: `pip install tumblrbot`
|
|
80
|
+
- Alternatively, you can install from this repository: `pip install git+https://github.com/MaidThatPrograms/tumblrbot.git`
|
|
81
|
+
- On Linux, you will have to make a virtual environment.
|
|
82
|
+
|
|
83
|
+
## Usage
|
|
84
|
+
Run `tumblrbot` from anywhere. Run `tumblrbot --help` for command-line options.
|
|
85
|
+
|
|
86
|
+
## Obtaining Tokens
|
|
87
|
+
> WIP
|
|
88
|
+
|
|
89
|
+
## Configuration
|
|
90
|
+
> WIP
|
|
@@ -1,24 +1,24 @@
|
|
|
1
|
-
This is free and unencumbered software released into the public domain.
|
|
2
|
-
|
|
3
|
-
Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
4
|
-
distribute this software, either in source code form or as a compiled
|
|
5
|
-
binary, for any purpose, commercial or non-commercial, and by any
|
|
6
|
-
means.
|
|
7
|
-
|
|
8
|
-
In jurisdictions that recognize copyright laws, the author or authors
|
|
9
|
-
of this software dedicate any and all copyright interest in the
|
|
10
|
-
software to the public domain. We make this dedication for the benefit
|
|
11
|
-
of the public at large and to the detriment of our heirs and
|
|
12
|
-
successors. We intend this dedication to be an overt act of
|
|
13
|
-
relinquishment in perpetuity of all present and future rights to this
|
|
14
|
-
software under copyright law.
|
|
15
|
-
|
|
16
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
19
|
-
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
20
|
-
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
21
|
-
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
22
|
-
OTHER DEALINGS IN THE SOFTWARE.
|
|
23
|
-
|
|
24
|
-
For more information, please refer to <https://unlicense.org
|
|
1
|
+
This is free and unencumbered software released into the public domain.
|
|
2
|
+
|
|
3
|
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
4
|
+
distribute this software, either in source code form or as a compiled
|
|
5
|
+
binary, for any purpose, commercial or non-commercial, and by any
|
|
6
|
+
means.
|
|
7
|
+
|
|
8
|
+
In jurisdictions that recognize copyright laws, the author or authors
|
|
9
|
+
of this software dedicate any and all copyright interest in the
|
|
10
|
+
software to the public domain. We make this dedication for the benefit
|
|
11
|
+
of the public at large and to the detriment of our heirs and
|
|
12
|
+
successors. We intend this dedication to be an overt act of
|
|
13
|
+
relinquishment in perpetuity of all present and future rights to this
|
|
14
|
+
software under copyright law.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
19
|
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
20
|
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
21
|
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
For more information, please refer to <https://unlicense.org>
|
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "tumblrbot"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.1.1"
|
|
4
4
|
description = "An updated bot that posts to Tumblr, based on your very own blog!"
|
|
5
|
-
readme = "README.md"
|
|
6
5
|
requires-python = ">= 3.13"
|
|
7
|
-
license = "Unlicense"
|
|
8
|
-
license-files = ["UNLICENSE"]
|
|
9
6
|
dependencies = [
|
|
10
7
|
"more-itertools",
|
|
11
8
|
"openai",
|
|
@@ -19,12 +16,11 @@ dependencies = [
|
|
|
19
16
|
]
|
|
20
17
|
|
|
21
18
|
[project.urls]
|
|
22
|
-
|
|
23
|
-
Repository = "https://github.com/MaidThatPrograms/tumblrbot"
|
|
19
|
+
Source = "https://github.com/MaidThatPrograms/tumblrbot"
|
|
24
20
|
|
|
25
21
|
[project.scripts]
|
|
26
22
|
tumblrbot = "tumblrbot.__main__:main"
|
|
27
23
|
|
|
28
24
|
[build-system]
|
|
29
|
-
requires = ["
|
|
30
|
-
build-backend = "
|
|
25
|
+
requires = ["hatchling"]
|
|
26
|
+
build-backend = "hatchling.build"
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from openai import OpenAI
|
|
2
|
+
from rich.prompt import Confirm
|
|
3
|
+
from rich.traceback import install
|
|
4
|
+
|
|
5
|
+
from tumblrbot.flow.download import PostDownloader
|
|
6
|
+
from tumblrbot.flow.examples import ExamplesWriter
|
|
7
|
+
from tumblrbot.flow.fine_tune import FineTuner
|
|
8
|
+
from tumblrbot.flow.generate import DraftGenerator
|
|
9
|
+
from tumblrbot.utils.common import TumblrClient
|
|
10
|
+
from tumblrbot.utils.settings import Tokens
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def main() -> None:
|
|
14
|
+
install()
|
|
15
|
+
tokens = Tokens()
|
|
16
|
+
with OpenAI(api_key=tokens.openai_api_key.get_secret_value()) as openai, TumblrClient(tokens) as tumblr:
|
|
17
|
+
post_downloader = PostDownloader(openai, tumblr)
|
|
18
|
+
if Confirm.ask("Download latest posts?", default=False):
|
|
19
|
+
post_downloader.download()
|
|
20
|
+
download_paths = post_downloader.get_data_paths()
|
|
21
|
+
|
|
22
|
+
examples_writer = ExamplesWriter(openai, tumblr, download_paths)
|
|
23
|
+
if Confirm.ask("Create training data?", default=False):
|
|
24
|
+
examples_writer.write_examples()
|
|
25
|
+
estimated_tokens = sum(examples_writer.count_tokens())
|
|
26
|
+
|
|
27
|
+
fine_tuner = FineTuner(openai, tumblr, estimated_tokens)
|
|
28
|
+
fine_tuner.print_estimates()
|
|
29
|
+
if Confirm.ask("Upload data to OpenAI for fine-tuning? [bold]You must do this to set the model to generate drafts from. Alternatively, manually enter a model into the config.", default=False):
|
|
30
|
+
fine_tuner.fine_tune()
|
|
31
|
+
|
|
32
|
+
if Confirm.ask("Generate drafts?", default=False):
|
|
33
|
+
DraftGenerator(openai, tumblr).create_drafts()
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from io import TextIOBase
|
|
2
|
+
from json import dump
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from tumblrbot.utils.common import PreviewLive, UtilClass
|
|
6
|
+
from tumblrbot.utils.models import Post
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PostDownloader(UtilClass):
|
|
10
|
+
def paginate_posts(self, blog_identifier: str, offset: int, fp: TextIOBase, live: PreviewLive) -> None:
|
|
11
|
+
task_id = live.progress.add_task(f"Downloading posts from '{blog_identifier}'...", total=None, completed=offset)
|
|
12
|
+
|
|
13
|
+
while True:
|
|
14
|
+
response = self.tumblr.retrieve_published_posts(blog_identifier, offset).json()["response"]
|
|
15
|
+
live.progress.update(task_id, total=response["blog"]["posts"], completed=offset)
|
|
16
|
+
|
|
17
|
+
if posts := response["posts"]:
|
|
18
|
+
for post in posts:
|
|
19
|
+
dump(post, fp)
|
|
20
|
+
fp.write("\n")
|
|
21
|
+
|
|
22
|
+
model = Post.model_validate(post)
|
|
23
|
+
live.custom_update(model)
|
|
24
|
+
|
|
25
|
+
offset += len(posts)
|
|
26
|
+
else:
|
|
27
|
+
break
|
|
28
|
+
|
|
29
|
+
def get_data_path(self, blog_identifier: str) -> Path:
|
|
30
|
+
return (self.config.data_directory / blog_identifier).with_suffix(".jsonl")
|
|
31
|
+
|
|
32
|
+
def get_data_paths(self) -> list[Path]:
|
|
33
|
+
return list(map(self.get_data_path, self.config.download_blog_identifiers))
|
|
34
|
+
|
|
35
|
+
def download(self) -> None:
|
|
36
|
+
self.config.data_directory.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
with PreviewLive() as live:
|
|
39
|
+
for blog_identifier in self.config.download_blog_identifiers:
|
|
40
|
+
data_path = self.get_data_path(blog_identifier)
|
|
41
|
+
|
|
42
|
+
with data_path.open("a", encoding="utf_8") as fp:
|
|
43
|
+
self.paginate_posts(
|
|
44
|
+
blog_identifier,
|
|
45
|
+
len(data_path.read_text("utf_8").splitlines()) if data_path.exists() else 0,
|
|
46
|
+
fp,
|
|
47
|
+
live,
|
|
48
|
+
)
|
|
@@ -28,7 +28,7 @@ class ExamplesWriter(UtilClass):
|
|
|
28
28
|
encoding = get_encoding("o200k_base")
|
|
29
29
|
Console(stderr=True, style="logging.level.warning").print(f"[Warning] Using encoding '{encoding.name}': {''.join(error.args)}\n")
|
|
30
30
|
|
|
31
|
-
with self.config.
|
|
31
|
+
with self.config.examples_file.open(encoding="utf_8") as fp:
|
|
32
32
|
for line in fp:
|
|
33
33
|
example = Example.model_validate_json(line)
|
|
34
34
|
yield len(encoding.encode("assistant")) # every reply is primed with <|start|>assistant<|message|>
|
|
@@ -77,8 +77,8 @@ class ExamplesWriter(UtilClass):
|
|
|
77
77
|
yield from posts
|
|
78
78
|
|
|
79
79
|
def write_examples(self) -> None:
|
|
80
|
-
self.config.
|
|
81
|
-
with self.config.
|
|
80
|
+
self.config.examples_file.parent.mkdir(parents=True, exist_ok=True)
|
|
81
|
+
with self.config.examples_file.open("w", encoding="utf_8") as fp:
|
|
82
82
|
for post in self.get_filtered_posts():
|
|
83
83
|
example = Example(
|
|
84
84
|
messages=[
|
|
@@ -89,4 +89,4 @@ class ExamplesWriter(UtilClass):
|
|
|
89
89
|
)
|
|
90
90
|
fp.write(f"{example.model_dump_json()}\n")
|
|
91
91
|
|
|
92
|
-
rich.print(f"[bold]The examples file can be found at: '{self.config.
|
|
92
|
+
rich.print(f"[bold]The examples file can be found at: '{self.config.examples_file}'\n")
|
|
@@ -24,22 +24,19 @@ class FineTuner(UtilClass):
|
|
|
24
24
|
Cost: {self.get_cost_string(job.trained_tokens)}
|
|
25
25
|
""")
|
|
26
26
|
|
|
27
|
-
self.config.
|
|
28
|
-
self.config.model_post_init()
|
|
27
|
+
self.config.job_id = ""
|
|
29
28
|
|
|
30
29
|
if job.status == "failed" and job.error is not None:
|
|
31
30
|
raise RuntimeError(job.error.message)
|
|
32
31
|
|
|
33
32
|
if job.fine_tuned_model:
|
|
34
|
-
self.config.
|
|
35
|
-
self.config.model_post_init()
|
|
33
|
+
self.config.fine_tuned_model = job.fine_tuned_model or ""
|
|
36
34
|
|
|
37
35
|
def poll_job_status(self) -> FineTuningJob:
|
|
38
|
-
job = self.openai.fine_tuning.jobs.retrieve(self.config.
|
|
36
|
+
job = self.openai.fine_tuning.jobs.retrieve(self.config.job_id)
|
|
39
37
|
|
|
40
|
-
if self.config.
|
|
41
|
-
self.config.
|
|
42
|
-
self.config.model_post_init()
|
|
38
|
+
if self.config.expected_epochs != job.hyperparameters.n_epochs and isinstance(job.hyperparameters.n_epochs, int):
|
|
39
|
+
self.config.expected_epochs = job.hyperparameters.n_epochs
|
|
43
40
|
|
|
44
41
|
self.dedent_print(f"""
|
|
45
42
|
The number of epochs has been updated to {job.hyperparameters.n_epochs}!
|
|
@@ -50,11 +47,11 @@ class FineTuner(UtilClass):
|
|
|
50
47
|
return job
|
|
51
48
|
|
|
52
49
|
def create_job(self) -> FineTuningJob:
|
|
53
|
-
if self.config.
|
|
50
|
+
if self.config.job_id:
|
|
54
51
|
return self.poll_job_status()
|
|
55
52
|
|
|
56
53
|
file = self.openai.files.create(
|
|
57
|
-
file=self.config.
|
|
54
|
+
file=self.config.examples_file,
|
|
58
55
|
purpose="fine-tune",
|
|
59
56
|
)
|
|
60
57
|
job = self.openai.fine_tuning.jobs.create(
|
|
@@ -62,8 +59,7 @@ class FineTuner(UtilClass):
|
|
|
62
59
|
training_file=file.id,
|
|
63
60
|
)
|
|
64
61
|
|
|
65
|
-
self.config.
|
|
66
|
-
self.config.model_post_init()
|
|
62
|
+
self.config.job_id = job.id
|
|
67
63
|
return job
|
|
68
64
|
|
|
69
65
|
def fine_tune(self) -> None:
|
|
@@ -86,7 +82,7 @@ class FineTuner(UtilClass):
|
|
|
86
82
|
|
|
87
83
|
live.progress.update(
|
|
88
84
|
task_id,
|
|
89
|
-
description=f"Fine-tuning: {job.status}...",
|
|
85
|
+
description=f"Fine-tuning: [italic]{job.status.replace('_', ' ').title()}[/]...",
|
|
90
86
|
)
|
|
91
87
|
|
|
92
88
|
sleep(1)
|
|
@@ -94,16 +90,16 @@ class FineTuner(UtilClass):
|
|
|
94
90
|
self.process_completed_job(job)
|
|
95
91
|
|
|
96
92
|
def get_cost_string(self, total_tokens: int) -> str:
|
|
97
|
-
return f"${self.config.
|
|
93
|
+
return f"${self.config.token_price / 1000000 * total_tokens:.2f}"
|
|
98
94
|
|
|
99
95
|
def print_estimates(self) -> None:
|
|
100
|
-
total_tokens = self.config.
|
|
96
|
+
total_tokens = self.config.expected_epochs * self.estimated_tokens
|
|
101
97
|
cost_string = self.get_cost_string(total_tokens)
|
|
102
98
|
|
|
103
99
|
self.dedent_print(f"""
|
|
104
100
|
Tokens {self.estimated_tokens:,}:
|
|
105
|
-
Total tokens for [bold orange1]{self.config.
|
|
101
|
+
Total tokens for [bold orange1]{self.config.expected_epochs}[/] epoch(s): {total_tokens:,}
|
|
106
102
|
Expected cost when trained with [bold purple]{self.config.base_model}[/]: {cost_string}
|
|
107
103
|
NOTE: Token values are approximate and may not be 100% accurate, please be aware of this when using the data.
|
|
108
|
-
[italic red]
|
|
104
|
+
[italic red]Amelia, Mutsumi, and Marin are not responsible for any inaccuracies in the token count or estimated price.[/]
|
|
109
105
|
""")
|
|
@@ -8,7 +8,7 @@ from tumblrbot.utils.models import Post
|
|
|
8
8
|
|
|
9
9
|
class DraftGenerator(UtilClass):
|
|
10
10
|
def generate_tags(self, content: Post.Block) -> Post | None:
|
|
11
|
-
if random() < self.config.
|
|
11
|
+
if random() < self.config.tags_chance: # noqa: S311
|
|
12
12
|
return self.openai.responses.parse(
|
|
13
13
|
input=content.text,
|
|
14
14
|
model=self.config.base_model,
|
|
@@ -23,29 +23,32 @@ class DraftGenerator(UtilClass):
|
|
|
23
23
|
content = self.openai.responses.create(
|
|
24
24
|
input=self.config.user_input,
|
|
25
25
|
instructions=self.config.developer_message,
|
|
26
|
-
model=self.config.
|
|
26
|
+
model=self.config.fine_tuned_model,
|
|
27
27
|
).output_text
|
|
28
28
|
|
|
29
29
|
return Post.Block(type="text", text=content)
|
|
30
30
|
|
|
31
31
|
def generate_post(self) -> Post:
|
|
32
32
|
content = self.generate_content()
|
|
33
|
-
post = Post(
|
|
33
|
+
post = Post(
|
|
34
|
+
content=[content],
|
|
35
|
+
state="draft",
|
|
36
|
+
)
|
|
34
37
|
if tags := self.generate_tags(content):
|
|
35
38
|
post.tags = tags.tags
|
|
36
39
|
return post
|
|
37
40
|
|
|
38
41
|
def create_drafts(self) -> None:
|
|
39
|
-
message = f"View drafts here: https://tumblr.com/blog/{self.config.
|
|
42
|
+
message = f"View drafts here: https://tumblr.com/blog/{self.config.upload_blog_identifier}/drafts"
|
|
40
43
|
|
|
41
44
|
with PreviewLive() as live:
|
|
42
|
-
for i in live.progress.track(range(self.config.
|
|
45
|
+
for i in live.progress.track(range(self.config.draft_count), description="Generating drafts..."):
|
|
43
46
|
try:
|
|
44
47
|
post = self.generate_post()
|
|
45
|
-
self.tumblr.
|
|
48
|
+
self.tumblr.create_post(self.config.upload_blog_identifier, post)
|
|
46
49
|
live.custom_update(post)
|
|
47
50
|
except BaseException as exc:
|
|
48
51
|
exc.add_note(f"📉 An error occurred! Generated {i} draft(s) before failing. {message}")
|
|
49
52
|
raise
|
|
50
53
|
|
|
51
|
-
rich.print(f":chart_increasing: [bold green]Generated {self.config.
|
|
54
|
+
rich.print(f":chart_increasing: [bold green]Generated {self.config.draft_count} draft(s).[/] {message}")
|