task-checkpoint 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 1nvisibleCat
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,10 @@
1
+ include README.md
2
+ include LICENSE
3
+ include pyproject.toml
4
+ recursive-include task_pool *.py
5
+ recursive-include tests *.py
6
+ exclude LAB_PROMO_NOTES.md
7
+ exclude TASK_POOL_DESIGN_NOTES.zh.md
8
+ global-exclude .DS_Store
9
+ global-exclude __pycache__
10
+ global-exclude *.py[cod]
@@ -0,0 +1,242 @@
1
+ Metadata-Version: 2.4
2
+ Name: task-checkpoint
3
+ Version: 0.1.0
4
+ Summary: A small Python library for resumable task pools in long-running scripts.
5
+ Author: 1nvisibleCat
6
+ License-Expression: MIT
7
+ Keywords: checkpoint,task-checkpoint,task-pool,resume,batch-processing,job-queue
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: Operating System :: POSIX
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3 :: Only
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Dynamic: license-file
24
+
25
+ # Task Pool
26
+
27
+ Task Pool is a small Python library for long-running scripts that need to process many small jobs without losing progress.
28
+
29
+ Turn a fragile `for` loop into a resumable task pool.
30
+
31
+ When a run stops halfway, Task Pool remembers what is done, what is running, and what still needs work. You can restart the script and continue from the last known point.
32
+
33
+ Use it for crawling, experiments, data analysis, batch API calls, file processing, and other work where unfinished items should be easy to resume.
34
+
35
+ It feels like a tiny job queue, but stays inside your Python script. For local use, no database server, message queue, or service setup is needed.
36
+
37
+ ## Quickstart
38
+
39
+ Install it with:
40
+
41
+ ```bash
42
+ pip install task-checkpoint
43
+ ```
44
+
45
+ Put one URL per line:
46
+
47
+ ```text
48
+ https://example.com/a
49
+ https://example.com/b
50
+ https://example.com/c
51
+ ```
52
+
53
+ Then process the file as a task pool:
54
+
55
+ ```python
56
+ from task_pool import TaskPool
57
+
58
+
59
+ pool = TaskPool("urls.txt")
60
+
61
+ def fetch_one(payload):
62
+ fetch_url(payload["line"])
63
+
64
+
65
+ pool.for_each(fetch_one)
66
+ ```
67
+
68
+ You can also use a regular loop:
69
+
70
+ ```python
71
+ for payload in pool:
72
+ fetch_url(payload["line"])
73
+ ```
74
+
75
+ If you need task metadata, use `iter_tasks()`:
76
+
77
+ ```python
78
+ for task in pool.iter_tasks():
79
+ url = task.payload["line"]
80
+ result = fetch_url(url)
81
+ save_result(task.key, result)
82
+ ```
83
+
84
+ Each completed task is committed. If an exception is raised, the current task is rolled back to `not_start` and the exception is raised again.
85
+
86
+ For very small scripts, a lambda also works:
87
+
88
+ ```python
89
+ pool.for_each(lambda payload: fetch_url(payload["line"]))
90
+ ```
91
+
92
+ You can print a small progress summary:
93
+
94
+ ```python
95
+ print(pool.stats())
96
+ # {"total": 10, "not_start": 3, "pending": 0, "committed": 7}
97
+ ```
98
+
99
+ For structured jobs, use a JSON task pool:
100
+
101
+ ```python
102
+ pool = TaskPool("tasks.json")
103
+
104
+ pool.append({"input_path": "data/a.json", "method": "baseline"})
105
+ pool.append({"input_path": "data/b.json", "method": "baseline"})
106
+ ```
107
+
108
+ ## Manual Control
109
+
110
+ Use `lease()` when you want one task at a time and need explicit control.
111
+
112
+ ```python
113
+ pool = TaskPool("tasks.json")
114
+
115
+ with pool.lease() as task:
116
+ if task is None:
117
+ return
118
+
119
+ do_work(task.payload)
120
+ ```
121
+
122
+ If there is no task to process, `lease()` returns `None`.
123
+
124
+ Leaving the `with` block normally commits the task. If an exception is raised, the task is rolled back automatically.
125
+
126
+ ## Store Selection
127
+
128
+ You can choose a store by file suffix:
129
+
130
+ ```python
131
+ TaskPool("tasks.json") # JSONFileStore
132
+ TaskPool("tasks.sqlite") # SQLiteStore
133
+ TaskPool("tasks.sqlite3") # SQLiteStore
134
+ TaskPool("tasks.db") # SQLiteStore
135
+ TaskPool("rows.csv") # CSVRowStore with sidecar state
136
+ TaskPool("rows.tsv") # CSVRowStore with sidecar state
137
+ TaskPool("urls.txt") # TextLineStore with sidecar state
138
+ ```
139
+
140
+ You can also pass a store explicitly:
141
+
142
+ ```python
143
+ from task_pool import JSONFileStore, SQLiteStore, TaskPool
144
+
145
+
146
+ pool = TaskPool(JSONFileStore("tasks.json"))
147
+ pool = TaskPool(SQLiteStore("tasks.db"))
148
+ ```
149
+
150
+ ## Source Files
151
+
152
+ Task Pool can also read tasks from source files.
153
+
154
+ ### Text files
155
+
156
+ ```python
157
+ pool = TaskPool("urls.txt")
158
+ ```
159
+
160
+ Example:
161
+
162
+ ```text
163
+ https://example.com/a
164
+ https://example.com/b
165
+ https://example.com/c
166
+ ```
167
+
168
+ Each non-empty line becomes a task:
169
+
170
+ ```python
171
+ {
172
+ "line": "https://example.com/a",
173
+ "line_number": 1,
174
+ }
175
+ ```
176
+
177
+ ### CSV and TSV files
178
+
179
+ ```python
180
+ pool = TaskPool("rows.csv")
181
+ pool = TaskPool("rows.tsv")
182
+ ```
183
+
184
+ By default, CSV and TSV files are read with a header row:
185
+
186
+ ```csv
187
+ code,name
188
+ A0001,Alpha
189
+ A0002,Beta
190
+ ```
191
+
192
+ The first row becomes:
193
+
194
+ ```python
195
+ {"code": "A0001", "name": "Alpha"}
196
+ ```
197
+
198
+ For files without headers:
199
+
200
+ ```python
201
+ pool = TaskPool.csv("rows.csv", has_header=False)
202
+ pool = TaskPool.tsv("rows.tsv", has_header=False)
203
+ ```
204
+
205
+ Rows are named `col1`, `col2`, and so on:
206
+
207
+ ```python
208
+ {"col1": "A0001", "col2": "Alpha"}
209
+ ```
210
+
211
+ Source files are not modified. Progress is stored in a sidecar JSON file, for example:
212
+
213
+ ```text
214
+ rows.csv.task_pool.json
215
+ urls.txt.task_pool.json
216
+ ```
217
+
218
+ ## Task Status
219
+
220
+ Each task has one of these statuses:
221
+
222
+ - `not_start`
223
+ - `pending`
224
+ - `committed`
225
+
226
+ The common flow is:
227
+
228
+ ```text
229
+ not_start -> pending -> committed
230
+ |
231
+ -> not_start
232
+ ```
233
+
234
+ `pending` means a worker has leased the task and is currently processing it.
235
+
236
+ ## Notes
237
+
238
+ - The default store is `JSONFileStore("task_pool.json")`.
239
+ - Payloads are stored as JSON.
240
+ - JSON and source-backed stores use file locks and atomic writes.
241
+ - SQLite is better for larger pools or heavier concurrent use.
242
+ - Source-backed stores keep the source file unchanged and write progress to a sidecar JSON file.
@@ -0,0 +1,218 @@
1
+ # Task Pool
2
+
3
+ Task Pool is a small Python library for long-running scripts that need to process many small jobs without losing progress.
4
+
5
+ Turn a fragile `for` loop into a resumable task pool.
6
+
7
+ When a run stops halfway, Task Pool remembers what is done, what is running, and what still needs work. You can restart the script and continue from the last known point.
8
+
9
+ Use it for crawling, experiments, data analysis, batch API calls, file processing, and other work where unfinished items should be easy to resume.
10
+
11
+ It feels like a tiny job queue, but stays inside your Python script. For local use, no database server, message queue, or service setup is needed.
12
+
13
+ ## Quickstart
14
+
15
+ Install it with:
16
+
17
+ ```bash
18
+ pip install task-checkpoint
19
+ ```
20
+
21
+ Put one URL per line:
22
+
23
+ ```text
24
+ https://example.com/a
25
+ https://example.com/b
26
+ https://example.com/c
27
+ ```
28
+
29
+ Then process the file as a task pool:
30
+
31
+ ```python
32
+ from task_pool import TaskPool
33
+
34
+
35
+ pool = TaskPool("urls.txt")
36
+
37
+ def fetch_one(payload):
38
+ fetch_url(payload["line"])
39
+
40
+
41
+ pool.for_each(fetch_one)
42
+ ```
43
+
44
+ You can also use a regular loop:
45
+
46
+ ```python
47
+ for payload in pool:
48
+ fetch_url(payload["line"])
49
+ ```
50
+
51
+ If you need task metadata, use `iter_tasks()`:
52
+
53
+ ```python
54
+ for task in pool.iter_tasks():
55
+ url = task.payload["line"]
56
+ result = fetch_url(url)
57
+ save_result(task.key, result)
58
+ ```
59
+
60
+ Each completed task is committed. If an exception is raised, the current task is rolled back to `not_start` and the exception is raised again.
61
+
62
+ For very small scripts, a lambda also works:
63
+
64
+ ```python
65
+ pool.for_each(lambda payload: fetch_url(payload["line"]))
66
+ ```
67
+
68
+ You can print a small progress summary:
69
+
70
+ ```python
71
+ print(pool.stats())
72
+ # {"total": 10, "not_start": 3, "pending": 0, "committed": 7}
73
+ ```
74
+
75
+ For structured jobs, use a JSON task pool:
76
+
77
+ ```python
78
+ pool = TaskPool("tasks.json")
79
+
80
+ pool.append({"input_path": "data/a.json", "method": "baseline"})
81
+ pool.append({"input_path": "data/b.json", "method": "baseline"})
82
+ ```
83
+
84
+ ## Manual Control
85
+
86
+ Use `lease()` when you want one task at a time and need explicit control.
87
+
88
+ ```python
89
+ pool = TaskPool("tasks.json")
90
+
91
+ with pool.lease() as task:
92
+ if task is None:
93
+ return
94
+
95
+ do_work(task.payload)
96
+ ```
97
+
98
+ If there is no task to process, `lease()` returns `None`.
99
+
100
+ Leaving the `with` block normally commits the task. If an exception is raised, the task is rolled back automatically.
101
+
102
+ ## Store Selection
103
+
104
+ You can choose a store by file suffix:
105
+
106
+ ```python
107
+ TaskPool("tasks.json") # JSONFileStore
108
+ TaskPool("tasks.sqlite") # SQLiteStore
109
+ TaskPool("tasks.sqlite3") # SQLiteStore
110
+ TaskPool("tasks.db") # SQLiteStore
111
+ TaskPool("rows.csv") # CSVRowStore with sidecar state
112
+ TaskPool("rows.tsv") # CSVRowStore with sidecar state
113
+ TaskPool("urls.txt") # TextLineStore with sidecar state
114
+ ```
115
+
116
+ You can also pass a store explicitly:
117
+
118
+ ```python
119
+ from task_pool import JSONFileStore, SQLiteStore, TaskPool
120
+
121
+
122
+ pool = TaskPool(JSONFileStore("tasks.json"))
123
+ pool = TaskPool(SQLiteStore("tasks.db"))
124
+ ```
125
+
126
+ ## Source Files
127
+
128
+ Task Pool can also read tasks from source files.
129
+
130
+ ### Text files
131
+
132
+ ```python
133
+ pool = TaskPool("urls.txt")
134
+ ```
135
+
136
+ Example:
137
+
138
+ ```text
139
+ https://example.com/a
140
+ https://example.com/b
141
+ https://example.com/c
142
+ ```
143
+
144
+ Each non-empty line becomes a task:
145
+
146
+ ```python
147
+ {
148
+ "line": "https://example.com/a",
149
+ "line_number": 1,
150
+ }
151
+ ```
152
+
153
+ ### CSV and TSV files
154
+
155
+ ```python
156
+ pool = TaskPool("rows.csv")
157
+ pool = TaskPool("rows.tsv")
158
+ ```
159
+
160
+ By default, CSV and TSV files are read with a header row:
161
+
162
+ ```csv
163
+ code,name
164
+ A0001,Alpha
165
+ A0002,Beta
166
+ ```
167
+
168
+ The first row becomes:
169
+
170
+ ```python
171
+ {"code": "A0001", "name": "Alpha"}
172
+ ```
173
+
174
+ For files without headers:
175
+
176
+ ```python
177
+ pool = TaskPool.csv("rows.csv", has_header=False)
178
+ pool = TaskPool.tsv("rows.tsv", has_header=False)
179
+ ```
180
+
181
+ Rows are named `col1`, `col2`, and so on:
182
+
183
+ ```python
184
+ {"col1": "A0001", "col2": "Alpha"}
185
+ ```
186
+
187
+ Source files are not modified. Progress is stored in a sidecar JSON file, for example:
188
+
189
+ ```text
190
+ rows.csv.task_pool.json
191
+ urls.txt.task_pool.json
192
+ ```
193
+
194
+ ## Task Status
195
+
196
+ Each task has one of these statuses:
197
+
198
+ - `not_start`
199
+ - `pending`
200
+ - `committed`
201
+
202
+ The common flow is:
203
+
204
+ ```text
205
+ not_start -> pending -> committed
206
+ |
207
+ -> not_start
208
+ ```
209
+
210
+ `pending` means a worker has leased the task and is currently processing it.
211
+
212
+ ## Notes
213
+
214
+ - The default store is `JSONFileStore("task_pool.json")`.
215
+ - Payloads are stored as JSON.
216
+ - JSON and source-backed stores use file locks and atomic writes.
217
+ - SQLite is better for larger pools or heavier concurrent use.
218
+ - Source-backed stores keep the source file unchanged and write progress to a sidecar JSON file.
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "task-checkpoint"
7
+ version = "0.1.0"
8
+ description = "A small Python library for resumable task pools in long-running scripts."
9
+ readme = "README.md"
10
+ requires-python = ">=3.9"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ { name = "1nvisibleCat" }
15
+ ]
16
+ keywords = [
17
+ "checkpoint",
18
+ "task-checkpoint",
19
+ "task-pool",
20
+ "resume",
21
+ "batch-processing",
22
+ "job-queue",
23
+ ]
24
+ classifiers = [
25
+ "Development Status :: 3 - Alpha",
26
+ "Intended Audience :: Developers",
27
+ "Operating System :: POSIX",
28
+ "Programming Language :: Python :: 3",
29
+ "Programming Language :: Python :: 3 :: Only",
30
+ "Programming Language :: Python :: 3.9",
31
+ "Programming Language :: Python :: 3.10",
32
+ "Programming Language :: Python :: 3.11",
33
+ "Programming Language :: Python :: 3.12",
34
+ "Programming Language :: Python :: 3.13",
35
+ "Topic :: Software Development :: Libraries :: Python Modules",
36
+ "Topic :: Utilities",
37
+ ]
38
+ dependencies = []
39
+
40
+ [tool.setuptools]
41
+ include-package-data = false
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["."]
45
+ include = ["task_pool*"]
46
+ exclude = ["tests*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+