taskmux 0.2.7__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {taskmux-0.2.7 → taskmux-0.3.0}/PKG-INFO +91 -43
- {taskmux-0.2.7 → taskmux-0.3.0}/README.md +90 -42
- {taskmux-0.2.7 → taskmux-0.3.0}/pyproject.toml +1 -1
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/cli.py +85 -15
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/config.py +3 -1
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/daemon.py +2 -54
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/models.py +10 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/tmux_manager.py +110 -9
- {taskmux-0.2.7 → taskmux-0.3.0}/.gitignore +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/LICENSE +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/__init__.py +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/agent.py +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/hooks.py +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/init.py +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/main.py +0 -0
- {taskmux-0.2.7 → taskmux-0.3.0}/taskmux/templates/claude.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: taskmux
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Modern tmux-based task manager for LLM development tools
|
|
5
5
|
Project-URL: Homepage, https://github.com/nc9/taskmux
|
|
6
6
|
Project-URL: Repository, https://github.com/nc9/taskmux
|
|
@@ -35,7 +35,7 @@ Description-Content-Type: text/markdown
|
|
|
35
35
|
|
|
36
36
|
# Taskmux
|
|
37
37
|
|
|
38
|
-
A modern tmux session manager for LLM development tools with health monitoring,
|
|
38
|
+
A modern tmux session manager for LLM development tools with health monitoring, restart policies, and WebSocket API.
|
|
39
39
|
|
|
40
40
|
## Why Taskmux?
|
|
41
41
|
|
|
@@ -134,7 +134,8 @@ stop_grace_period = 10
|
|
|
134
134
|
command = "celery -A myapp worker -l info"
|
|
135
135
|
cwd = "apps/api"
|
|
136
136
|
depends_on = ["db"]
|
|
137
|
-
|
|
137
|
+
restart_policy = "always"
|
|
138
|
+
max_restarts = 10
|
|
138
139
|
restart_backoff = 3.0
|
|
139
140
|
|
|
140
141
|
[tasks.web]
|
|
@@ -173,46 +174,49 @@ taskmux start storybook # Start a manual task
|
|
|
173
174
|
## Commands
|
|
174
175
|
|
|
175
176
|
```bash
|
|
176
|
-
# Session
|
|
177
|
-
taskmux start # Start all auto_start tasks
|
|
178
|
-
taskmux start <task>
|
|
179
|
-
taskmux
|
|
180
|
-
taskmux stop
|
|
177
|
+
# Session lifecycle
|
|
178
|
+
taskmux start # Start all auto_start tasks in dependency order
|
|
179
|
+
taskmux start <task> [task2...] # Start specific tasks
|
|
180
|
+
taskmux start -m # Start + stay in foreground monitoring health/restarting
|
|
181
|
+
taskmux stop # Stop all (C-c → SIGTERM → SIGKILL), prevents auto-restart
|
|
182
|
+
taskmux stop <task> [task2...] # Stop specific tasks
|
|
181
183
|
taskmux restart # Restart all tasks
|
|
182
|
-
taskmux restart <task>
|
|
183
|
-
taskmux status # Show session status
|
|
184
|
-
taskmux list # List tasks with health indicators
|
|
184
|
+
taskmux restart <task> [task2...] # Restart specific tasks, re-enables auto-restart
|
|
185
185
|
|
|
186
|
-
#
|
|
187
|
-
taskmux kill <task> # Hard-kill
|
|
188
|
-
taskmux add <task> "<command>" # Add task to
|
|
189
|
-
taskmux remove <task> # Remove task
|
|
190
|
-
taskmux inspect <task> # JSON
|
|
186
|
+
# Task management
|
|
187
|
+
taskmux kill <task> # Hard-kill (SIGKILL + destroy window), prevents auto-restart
|
|
188
|
+
taskmux add <task> "<command>" # Add task to taskmux.toml
|
|
189
|
+
taskmux remove <task> # Remove task (kills if running)
|
|
190
|
+
taskmux inspect <task> # JSON state: pid, health, restart_policy, pane info
|
|
191
|
+
|
|
192
|
+
# Status & health
|
|
193
|
+
taskmux status # Session + task overview (aliases: list, ls)
|
|
194
|
+
taskmux health # Health check table for all tasks
|
|
191
195
|
|
|
192
196
|
# Logs
|
|
193
197
|
taskmux logs # Interleaved logs from all tasks
|
|
194
|
-
taskmux logs <task> #
|
|
195
|
-
taskmux logs -f
|
|
196
|
-
taskmux logs -f <task> # Follow a task's logs live
|
|
198
|
+
taskmux logs <task> # Recent logs for a task
|
|
199
|
+
taskmux logs -f [task] # Follow logs live (colored prefixes)
|
|
197
200
|
taskmux logs -n 200 <task> # Last N lines
|
|
198
|
-
taskmux logs -g "error" #
|
|
199
|
-
taskmux logs <task> -g "
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
#
|
|
203
|
-
taskmux init
|
|
204
|
-
taskmux
|
|
205
|
-
|
|
206
|
-
# Monitoring
|
|
207
|
-
taskmux health # Health check table
|
|
208
|
-
taskmux watch # Watch config for changes, reload on edit
|
|
209
|
-
taskmux daemon --port 8765 # Run with WebSocket API + auto-restart
|
|
201
|
+
taskmux logs -g "error" # Grep all tasks
|
|
202
|
+
taskmux logs <task> -g "err" -C 5 # Grep one task with context
|
|
203
|
+
|
|
204
|
+
# Setup & monitoring
|
|
205
|
+
taskmux init # Interactive project setup + agent context injection
|
|
206
|
+
taskmux init --defaults # Non-interactive setup
|
|
207
|
+
taskmux watch # Watch taskmux.toml, reload on change
|
|
208
|
+
taskmux daemon --port 8765 # Daemon mode: WebSocket API + health monitoring
|
|
210
209
|
```
|
|
211
210
|
|
|
212
|
-
### stop vs kill
|
|
211
|
+
### stop vs kill vs restart
|
|
212
|
+
|
|
213
|
+
| Command | Signal | Window | Auto-restart |
|
|
214
|
+
|---------|--------|--------|--------------|
|
|
215
|
+
| `stop` | C-c → SIGTERM → SIGKILL (graceful) | Stays alive | Blocked (manually stopped) |
|
|
216
|
+
| `kill` | SIGKILL (immediate) | Destroyed | Blocked (manually stopped) |
|
|
217
|
+
| `restart` | Full stop + restart | Reused | Re-enabled |
|
|
213
218
|
|
|
214
|
-
|
|
215
|
-
- **`kill`** kills the process group and destroys the window immediately.
|
|
219
|
+
Both `stop` and `kill` mark the task as **manually stopped**, preventing auto-restart even with `restart_policy = "always"`. Use `restart` or `start` to clear this flag and re-enable auto-restart.
|
|
216
220
|
|
|
217
221
|
## Configuration
|
|
218
222
|
|
|
@@ -246,11 +250,13 @@ health_check = "pg_isready -h localhost"
|
|
|
246
250
|
[tasks.worker]
|
|
247
251
|
command = "celery worker -A myapp"
|
|
248
252
|
depends_on = ["db"]
|
|
249
|
-
|
|
253
|
+
restart_policy = "always"
|
|
254
|
+
max_restarts = 10
|
|
250
255
|
|
|
251
256
|
[tasks.tailwind]
|
|
252
257
|
command = "npx tailwindcss -w"
|
|
253
258
|
auto_start = false
|
|
259
|
+
restart_policy = "no"
|
|
254
260
|
```
|
|
255
261
|
|
|
256
262
|
### Fields
|
|
@@ -270,10 +276,11 @@ auto_start = false
|
|
|
270
276
|
| `tasks.<name>.health_check` | — | Shell command to check health (exit 0 = healthy) |
|
|
271
277
|
| `tasks.<name>.health_interval` | `10` | Seconds between health checks |
|
|
272
278
|
| `tasks.<name>.health_timeout` | `5` | Seconds before health check times out |
|
|
273
|
-
| `tasks.<name>.health_retries` | `3` | Consecutive failures before
|
|
279
|
+
| `tasks.<name>.health_retries` | `3` | Consecutive health failures before triggering a restart |
|
|
274
280
|
| `tasks.<name>.stop_grace_period` | `5` | Seconds to wait after C-c before escalating to SIGTERM |
|
|
275
|
-
| `tasks.<name>.
|
|
276
|
-
| `tasks.<name>.
|
|
281
|
+
| `tasks.<name>.restart_policy` | `"on-failure"` | When to auto-restart: `"no"`, `"on-failure"`, or `"always"` (see below) |
|
|
282
|
+
| `tasks.<name>.max_restarts` | `5` | Max auto-restarts before giving up (resets after 60s healthy) |
|
|
283
|
+
| `tasks.<name>.restart_backoff` | `2.0` | Exponential backoff base for restart delay (1s, 2s, 4s… capped at 60s) |
|
|
277
284
|
| `tasks.<name>.depends_on` | `[]` | Task names that must be healthy before this task starts |
|
|
278
285
|
| `tasks.<name>.hooks.*` | — | Per-task lifecycle hooks (same fields as global) |
|
|
279
286
|
|
|
@@ -285,14 +292,42 @@ Circular dependencies and references to nonexistent tasks are rejected at config
|
|
|
285
292
|
|
|
286
293
|
When starting a single task with `taskmux start <task>`, dependencies are not auto-started — you get a warning if they aren't running.
|
|
287
294
|
|
|
295
|
+
### Restart Policies
|
|
296
|
+
|
|
297
|
+
Each task has a `restart_policy` that controls automatic restart behavior. Restart policies are enforced by `taskmux start --monitor` and `taskmux daemon`.
|
|
298
|
+
|
|
299
|
+
| Policy | Behavior |
|
|
300
|
+
|--------|----------|
|
|
301
|
+
| `"no"` | Never auto-restart. Task stays stopped after crash or health failure. |
|
|
302
|
+
| `"on-failure"` | **(default)** Restart on crash (process exits) or after `health_retries` consecutive health check failures. |
|
|
303
|
+
| `"always"` | Restart whenever the task stops, including clean exits. |
|
|
304
|
+
|
|
305
|
+
**Manual stops override all policies.** Running `taskmux stop` or `taskmux kill` marks the task as manually stopped — it will not auto-restart even with `restart_policy = "always"`. Use `taskmux restart` or `taskmux start` to clear this flag.
|
|
306
|
+
|
|
307
|
+
**`restart_policy` vs `auto_start`** — these are orthogonal. `auto_start` controls whether a task launches on `taskmux start`. `restart_policy` controls what happens after a running task exits or fails. A task with `auto_start = false` and `restart_policy = "always"` won't start automatically, but once started manually, it will auto-restart on exit.
|
|
308
|
+
|
|
309
|
+
| `restart_policy` | `auto_start` | Behavior |
|
|
310
|
+
|---|---|---|
|
|
311
|
+
| `"no"` | `true` | Starts with session, never auto-restarts |
|
|
312
|
+
| `"no"` | `false` | Manual start only, never auto-restarts |
|
|
313
|
+
| `"on-failure"` | `true` | Starts with session, restarts on crash/health failure |
|
|
314
|
+
| `"on-failure"` | `false` | Manual start, restarts on crash/health failure once running |
|
|
315
|
+
| `"always"` | `true` | Starts with session, restarts on any exit |
|
|
316
|
+
| `"always"` | `false` | Manual start, restarts on any exit once running |
|
|
317
|
+
|
|
318
|
+
**Backoff & limits:** When a task keeps failing, restart delays increase exponentially: `restart_backoff ^ attempt` seconds (capped at 60s). After `max_restarts` consecutive restarts, the task is left stopped. The restart counter resets after 60 seconds of healthy uptime.
|
|
319
|
+
|
|
288
320
|
### Health Checks
|
|
289
321
|
|
|
290
322
|
If `health_check` is set, taskmux runs it as a shell command. Exit code 0 means healthy. If not set, taskmux falls back to checking if the tmux pane has a running process (not just a shell prompt).
|
|
291
323
|
|
|
324
|
+
A task must fail `health_retries` consecutive health checks (default 3) before being considered unhealthy and triggering a restart. If the task becomes healthy again, the failure counter resets.
|
|
325
|
+
|
|
292
326
|
Health checks are used by:
|
|
293
327
|
- `taskmux health` — shows a table of all task health
|
|
294
328
|
- `taskmux start` — waits for dependencies to be healthy before starting dependents
|
|
295
|
-
- `taskmux
|
|
329
|
+
- `taskmux start --monitor` — continuously monitors and auto-restarts per restart_policy
|
|
330
|
+
- `taskmux daemon` — same as --monitor, plus WebSocket API and config watching
|
|
296
331
|
|
|
297
332
|
### Hook Cascade
|
|
298
333
|
|
|
@@ -314,7 +349,7 @@ Taskmux ensures processes are fully stopped before restarting and that orphaned
|
|
|
314
349
|
|
|
315
350
|
**Port cleanup** (`start`, `restart`): If `port` is configured, taskmux kills any process listening on that port before starting. This handles orphaned processes from crashed sessions.
|
|
316
351
|
|
|
317
|
-
**Auto-restart
|
|
352
|
+
**Auto-restart** (`start --monitor`, `daemon`): Tasks with `restart_policy = "on-failure"` or `"always"` are automatically restarted. Health checks must fail `health_retries` times before triggering a restart. Restart delays increase exponentially (`restart_backoff` base, capped at 60s). After `max_restarts` failures, the task is left stopped. The counter resets after 60 seconds of healthy uptime.
|
|
318
353
|
|
|
319
354
|
### Init & Agent Context
|
|
320
355
|
|
|
@@ -336,6 +371,7 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
336
371
|
"name": "api",
|
|
337
372
|
"command": "python manage.py runserver 0.0.0.0:8000",
|
|
338
373
|
"auto_start": true,
|
|
374
|
+
"restart_policy": "on-failure",
|
|
339
375
|
"cwd": "apps/api",
|
|
340
376
|
"health_check": "curl -sf http://localhost:8000/health",
|
|
341
377
|
"depends_on": ["db"],
|
|
@@ -349,16 +385,28 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
349
385
|
}
|
|
350
386
|
```
|
|
351
387
|
|
|
352
|
-
##
|
|
388
|
+
## Monitoring & Auto-restart
|
|
389
|
+
|
|
390
|
+
### start --monitor (lightweight)
|
|
391
|
+
|
|
392
|
+
Start tasks and stay in the foreground monitoring health:
|
|
393
|
+
|
|
394
|
+
```bash
|
|
395
|
+
taskmux start --monitor # or: taskmux start -m
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Checks health every 30 seconds and auto-restarts tasks according to their `restart_policy`. No WebSocket API — just monitoring and restart. Press Ctrl+C to stop monitoring (tasks keep running).
|
|
399
|
+
|
|
400
|
+
### Daemon Mode (full)
|
|
353
401
|
|
|
354
|
-
Run as a background daemon with WebSocket API and auto-restart
|
|
402
|
+
Run as a background daemon with WebSocket API, config watching, and auto-restart:
|
|
355
403
|
|
|
356
404
|
```bash
|
|
357
405
|
taskmux daemon # Default port 8765
|
|
358
406
|
taskmux daemon --port 9000 # Custom port
|
|
359
407
|
```
|
|
360
408
|
|
|
361
|
-
The daemon monitors task health every 30 seconds.
|
|
409
|
+
The daemon monitors task health every 30 seconds. Tasks are restarted per their `restart_policy` with exponential backoff (controlled by `restart_backoff` and `max_restarts`). Tasks that stay healthy for 60+ seconds have their restart counter reset. Config file changes are detected and applied automatically.
|
|
362
410
|
|
|
363
411
|
WebSocket API:
|
|
364
412
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Taskmux
|
|
2
2
|
|
|
3
|
-
A modern tmux session manager for LLM development tools with health monitoring,
|
|
3
|
+
A modern tmux session manager for LLM development tools with health monitoring, restart policies, and WebSocket API.
|
|
4
4
|
|
|
5
5
|
## Why Taskmux?
|
|
6
6
|
|
|
@@ -99,7 +99,8 @@ stop_grace_period = 10
|
|
|
99
99
|
command = "celery -A myapp worker -l info"
|
|
100
100
|
cwd = "apps/api"
|
|
101
101
|
depends_on = ["db"]
|
|
102
|
-
|
|
102
|
+
restart_policy = "always"
|
|
103
|
+
max_restarts = 10
|
|
103
104
|
restart_backoff = 3.0
|
|
104
105
|
|
|
105
106
|
[tasks.web]
|
|
@@ -138,46 +139,49 @@ taskmux start storybook # Start a manual task
|
|
|
138
139
|
## Commands
|
|
139
140
|
|
|
140
141
|
```bash
|
|
141
|
-
# Session
|
|
142
|
-
taskmux start # Start all auto_start tasks
|
|
143
|
-
taskmux start <task>
|
|
144
|
-
taskmux
|
|
145
|
-
taskmux stop
|
|
142
|
+
# Session lifecycle
|
|
143
|
+
taskmux start # Start all auto_start tasks in dependency order
|
|
144
|
+
taskmux start <task> [task2...] # Start specific tasks
|
|
145
|
+
taskmux start -m # Start + stay in foreground monitoring health/restarting
|
|
146
|
+
taskmux stop # Stop all (C-c → SIGTERM → SIGKILL), prevents auto-restart
|
|
147
|
+
taskmux stop <task> [task2...] # Stop specific tasks
|
|
146
148
|
taskmux restart # Restart all tasks
|
|
147
|
-
taskmux restart <task>
|
|
148
|
-
taskmux status # Show session status
|
|
149
|
-
taskmux list # List tasks with health indicators
|
|
149
|
+
taskmux restart <task> [task2...] # Restart specific tasks, re-enables auto-restart
|
|
150
150
|
|
|
151
|
-
#
|
|
152
|
-
taskmux kill <task> # Hard-kill
|
|
153
|
-
taskmux add <task> "<command>" # Add task to
|
|
154
|
-
taskmux remove <task> # Remove task
|
|
155
|
-
taskmux inspect <task> # JSON
|
|
151
|
+
# Task management
|
|
152
|
+
taskmux kill <task> # Hard-kill (SIGKILL + destroy window), prevents auto-restart
|
|
153
|
+
taskmux add <task> "<command>" # Add task to taskmux.toml
|
|
154
|
+
taskmux remove <task> # Remove task (kills if running)
|
|
155
|
+
taskmux inspect <task> # JSON state: pid, health, restart_policy, pane info
|
|
156
|
+
|
|
157
|
+
# Status & health
|
|
158
|
+
taskmux status # Session + task overview (aliases: list, ls)
|
|
159
|
+
taskmux health # Health check table for all tasks
|
|
156
160
|
|
|
157
161
|
# Logs
|
|
158
162
|
taskmux logs # Interleaved logs from all tasks
|
|
159
|
-
taskmux logs <task> #
|
|
160
|
-
taskmux logs -f
|
|
161
|
-
taskmux logs -f <task> # Follow a task's logs live
|
|
163
|
+
taskmux logs <task> # Recent logs for a task
|
|
164
|
+
taskmux logs -f [task] # Follow logs live (colored prefixes)
|
|
162
165
|
taskmux logs -n 200 <task> # Last N lines
|
|
163
|
-
taskmux logs -g "error" #
|
|
164
|
-
taskmux logs <task> -g "
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
#
|
|
168
|
-
taskmux init
|
|
169
|
-
taskmux
|
|
170
|
-
|
|
171
|
-
# Monitoring
|
|
172
|
-
taskmux health # Health check table
|
|
173
|
-
taskmux watch # Watch config for changes, reload on edit
|
|
174
|
-
taskmux daemon --port 8765 # Run with WebSocket API + auto-restart
|
|
166
|
+
taskmux logs -g "error" # Grep all tasks
|
|
167
|
+
taskmux logs <task> -g "err" -C 5 # Grep one task with context
|
|
168
|
+
|
|
169
|
+
# Setup & monitoring
|
|
170
|
+
taskmux init # Interactive project setup + agent context injection
|
|
171
|
+
taskmux init --defaults # Non-interactive setup
|
|
172
|
+
taskmux watch # Watch taskmux.toml, reload on change
|
|
173
|
+
taskmux daemon --port 8765 # Daemon mode: WebSocket API + health monitoring
|
|
175
174
|
```
|
|
176
175
|
|
|
177
|
-
### stop vs kill
|
|
176
|
+
### stop vs kill vs restart
|
|
177
|
+
|
|
178
|
+
| Command | Signal | Window | Auto-restart |
|
|
179
|
+
|---------|--------|--------|--------------|
|
|
180
|
+
| `stop` | C-c → SIGTERM → SIGKILL (graceful) | Stays alive | Blocked (manually stopped) |
|
|
181
|
+
| `kill` | SIGKILL (immediate) | Destroyed | Blocked (manually stopped) |
|
|
182
|
+
| `restart` | Full stop + restart | Reused | Re-enabled |
|
|
178
183
|
|
|
179
|
-
|
|
180
|
-
- **`kill`** kills the process group and destroys the window immediately.
|
|
184
|
+
Both `stop` and `kill` mark the task as **manually stopped**, preventing auto-restart even with `restart_policy = "always"`. Use `restart` or `start` to clear this flag and re-enable auto-restart.
|
|
181
185
|
|
|
182
186
|
## Configuration
|
|
183
187
|
|
|
@@ -211,11 +215,13 @@ health_check = "pg_isready -h localhost"
|
|
|
211
215
|
[tasks.worker]
|
|
212
216
|
command = "celery worker -A myapp"
|
|
213
217
|
depends_on = ["db"]
|
|
214
|
-
|
|
218
|
+
restart_policy = "always"
|
|
219
|
+
max_restarts = 10
|
|
215
220
|
|
|
216
221
|
[tasks.tailwind]
|
|
217
222
|
command = "npx tailwindcss -w"
|
|
218
223
|
auto_start = false
|
|
224
|
+
restart_policy = "no"
|
|
219
225
|
```
|
|
220
226
|
|
|
221
227
|
### Fields
|
|
@@ -235,10 +241,11 @@ auto_start = false
|
|
|
235
241
|
| `tasks.<name>.health_check` | — | Shell command to check health (exit 0 = healthy) |
|
|
236
242
|
| `tasks.<name>.health_interval` | `10` | Seconds between health checks |
|
|
237
243
|
| `tasks.<name>.health_timeout` | `5` | Seconds before health check times out |
|
|
238
|
-
| `tasks.<name>.health_retries` | `3` | Consecutive failures before
|
|
244
|
+
| `tasks.<name>.health_retries` | `3` | Consecutive health failures before triggering a restart |
|
|
239
245
|
| `tasks.<name>.stop_grace_period` | `5` | Seconds to wait after C-c before escalating to SIGTERM |
|
|
240
|
-
| `tasks.<name>.
|
|
241
|
-
| `tasks.<name>.
|
|
246
|
+
| `tasks.<name>.restart_policy` | `"on-failure"` | When to auto-restart: `"no"`, `"on-failure"`, or `"always"` (see below) |
|
|
247
|
+
| `tasks.<name>.max_restarts` | `5` | Max auto-restarts before giving up (resets after 60s healthy) |
|
|
248
|
+
| `tasks.<name>.restart_backoff` | `2.0` | Exponential backoff base for restart delay (1s, 2s, 4s… capped at 60s) |
|
|
242
249
|
| `tasks.<name>.depends_on` | `[]` | Task names that must be healthy before this task starts |
|
|
243
250
|
| `tasks.<name>.hooks.*` | — | Per-task lifecycle hooks (same fields as global) |
|
|
244
251
|
|
|
@@ -250,14 +257,42 @@ Circular dependencies and references to nonexistent tasks are rejected at config
|
|
|
250
257
|
|
|
251
258
|
When starting a single task with `taskmux start <task>`, dependencies are not auto-started — you get a warning if they aren't running.
|
|
252
259
|
|
|
260
|
+
### Restart Policies
|
|
261
|
+
|
|
262
|
+
Each task has a `restart_policy` that controls automatic restart behavior. Restart policies are enforced by `taskmux start --monitor` and `taskmux daemon`.
|
|
263
|
+
|
|
264
|
+
| Policy | Behavior |
|
|
265
|
+
|--------|----------|
|
|
266
|
+
| `"no"` | Never auto-restart. Task stays stopped after crash or health failure. |
|
|
267
|
+
| `"on-failure"` | **(default)** Restart on crash (process exits) or after `health_retries` consecutive health check failures. |
|
|
268
|
+
| `"always"` | Restart whenever the task stops, including clean exits. |
|
|
269
|
+
|
|
270
|
+
**Manual stops override all policies.** Running `taskmux stop` or `taskmux kill` marks the task as manually stopped — it will not auto-restart even with `restart_policy = "always"`. Use `taskmux restart` or `taskmux start` to clear this flag.
|
|
271
|
+
|
|
272
|
+
**`restart_policy` vs `auto_start`** — these are orthogonal. `auto_start` controls whether a task launches on `taskmux start`. `restart_policy` controls what happens after a running task exits or fails. A task with `auto_start = false` and `restart_policy = "always"` won't start automatically, but once started manually, it will auto-restart on exit.
|
|
273
|
+
|
|
274
|
+
| `restart_policy` | `auto_start` | Behavior |
|
|
275
|
+
|---|---|---|
|
|
276
|
+
| `"no"` | `true` | Starts with session, never auto-restarts |
|
|
277
|
+
| `"no"` | `false` | Manual start only, never auto-restarts |
|
|
278
|
+
| `"on-failure"` | `true` | Starts with session, restarts on crash/health failure |
|
|
279
|
+
| `"on-failure"` | `false` | Manual start, restarts on crash/health failure once running |
|
|
280
|
+
| `"always"` | `true` | Starts with session, restarts on any exit |
|
|
281
|
+
| `"always"` | `false` | Manual start, restarts on any exit once running |
|
|
282
|
+
|
|
283
|
+
**Backoff & limits:** When a task keeps failing, restart delays increase exponentially: `restart_backoff ^ attempt` seconds (capped at 60s). After `max_restarts` consecutive restarts, the task is left stopped. The restart counter resets after 60 seconds of healthy uptime.
|
|
284
|
+
|
|
253
285
|
### Health Checks
|
|
254
286
|
|
|
255
287
|
If `health_check` is set, taskmux runs it as a shell command. Exit code 0 means healthy. If not set, taskmux falls back to checking if the tmux pane has a running process (not just a shell prompt).
|
|
256
288
|
|
|
289
|
+
A task must fail `health_retries` consecutive health checks (default 3) before being considered unhealthy and triggering a restart. If the task becomes healthy again, the failure counter resets.
|
|
290
|
+
|
|
257
291
|
Health checks are used by:
|
|
258
292
|
- `taskmux health` — shows a table of all task health
|
|
259
293
|
- `taskmux start` — waits for dependencies to be healthy before starting dependents
|
|
260
|
-
- `taskmux
|
|
294
|
+
- `taskmux start --monitor` — continuously monitors and auto-restarts per restart_policy
|
|
295
|
+
- `taskmux daemon` — same as --monitor, plus WebSocket API and config watching
|
|
261
296
|
|
|
262
297
|
### Hook Cascade
|
|
263
298
|
|
|
@@ -279,7 +314,7 @@ Taskmux ensures processes are fully stopped before restarting and that orphaned
|
|
|
279
314
|
|
|
280
315
|
**Port cleanup** (`start`, `restart`): If `port` is configured, taskmux kills any process listening on that port before starting. This handles orphaned processes from crashed sessions.
|
|
281
316
|
|
|
282
|
-
**Auto-restart
|
|
317
|
+
**Auto-restart** (`start --monitor`, `daemon`): Tasks with `restart_policy = "on-failure"` or `"always"` are automatically restarted. Health checks must fail `health_retries` times before triggering a restart. Restart delays increase exponentially (`restart_backoff` base, capped at 60s). After `max_restarts` failures, the task is left stopped. The counter resets after 60 seconds of healthy uptime.
|
|
283
318
|
|
|
284
319
|
### Init & Agent Context
|
|
285
320
|
|
|
@@ -301,6 +336,7 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
301
336
|
"name": "api",
|
|
302
337
|
"command": "python manage.py runserver 0.0.0.0:8000",
|
|
303
338
|
"auto_start": true,
|
|
339
|
+
"restart_policy": "on-failure",
|
|
304
340
|
"cwd": "apps/api",
|
|
305
341
|
"health_check": "curl -sf http://localhost:8000/health",
|
|
306
342
|
"depends_on": ["db"],
|
|
@@ -314,16 +350,28 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
314
350
|
}
|
|
315
351
|
```
|
|
316
352
|
|
|
317
|
-
##
|
|
353
|
+
## Monitoring & Auto-restart
|
|
354
|
+
|
|
355
|
+
### start --monitor (lightweight)
|
|
356
|
+
|
|
357
|
+
Start tasks and stay in the foreground monitoring health:
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
taskmux start --monitor # or: taskmux start -m
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
Checks health every 30 seconds and auto-restarts tasks according to their `restart_policy`. No WebSocket API — just monitoring and restart. Press Ctrl+C to stop monitoring (tasks keep running).
|
|
364
|
+
|
|
365
|
+
### Daemon Mode (full)
|
|
318
366
|
|
|
319
|
-
Run as a background daemon with WebSocket API and auto-restart
|
|
367
|
+
Run as a background daemon with WebSocket API, config watching, and auto-restart:
|
|
320
368
|
|
|
321
369
|
```bash
|
|
322
370
|
taskmux daemon # Default port 8765
|
|
323
371
|
taskmux daemon --port 9000 # Custom port
|
|
324
372
|
```
|
|
325
373
|
|
|
326
|
-
The daemon monitors task health every 30 seconds.
|
|
374
|
+
The daemon monitors task health every 30 seconds. Tasks are restarted per their `restart_policy` with exponential backoff (controlled by `restart_backoff` and `max_restarts`). Tasks that stay healthy for 60+ seconds have their restart counter reset. Config file changes are detected and applied automatically.
|
|
327
375
|
|
|
328
376
|
WebSocket API:
|
|
329
377
|
|
|
@@ -16,8 +16,14 @@ from .tmux_manager import TmuxManager
|
|
|
16
16
|
|
|
17
17
|
app = typer.Typer(
|
|
18
18
|
name="taskmux",
|
|
19
|
-
help=
|
|
20
|
-
|
|
19
|
+
help=(
|
|
20
|
+
"Tmux session manager for development environments.\n\n"
|
|
21
|
+
"Reads task definitions from taskmux.toml, manages tmux sessions/windows, "
|
|
22
|
+
"provides health monitoring, restart policies (no/on-failure/always), "
|
|
23
|
+
"dependency ordering, lifecycle hooks, and a WebSocket API.\n\n"
|
|
24
|
+
"Quick start: taskmux init → edit taskmux.toml → taskmux start"
|
|
25
|
+
),
|
|
26
|
+
epilog="Docs: https://github.com/nc9/taskmux",
|
|
21
27
|
rich_markup_mode="rich",
|
|
22
28
|
)
|
|
23
29
|
|
|
@@ -49,15 +55,32 @@ class TaskmuxCLI:
|
|
|
49
55
|
def init(
|
|
50
56
|
defaults: bool = typer.Option(False, "--defaults", help="Accept all defaults"),
|
|
51
57
|
):
|
|
52
|
-
"""Initialize taskmux config in current directory.
|
|
58
|
+
"""Initialize taskmux config in current directory.
|
|
59
|
+
|
|
60
|
+
Creates taskmux.toml with session name (defaults to directory name).
|
|
61
|
+
Detects installed AI coding agents (Claude, Codex, OpenCode) and injects
|
|
62
|
+
taskmux usage instructions into their context files.
|
|
63
|
+
Use --defaults to skip interactive prompts.
|
|
64
|
+
"""
|
|
53
65
|
initProject(defaults=defaults)
|
|
54
66
|
|
|
55
67
|
|
|
56
68
|
@app.command()
|
|
57
69
|
def start(
|
|
58
70
|
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
71
|
+
monitor: bool = typer.Option( # noqa: B008
|
|
72
|
+
False, "-m", "--monitor", help="Stay running, auto-restart per restart_policy"
|
|
73
|
+
),
|
|
59
74
|
):
|
|
60
|
-
"""Start tasks (all if none specified).
|
|
75
|
+
"""Start tasks (all auto_start tasks if none specified).
|
|
76
|
+
|
|
77
|
+
Starts tasks in dependency order, waiting for each dependency's health check
|
|
78
|
+
to pass before starting dependents. With --monitor, stays in the foreground
|
|
79
|
+
and auto-restarts tasks according to their restart_policy (no/on-failure/always),
|
|
80
|
+
respecting health_retries, max_restarts, and exponential backoff.
|
|
81
|
+
"""
|
|
82
|
+
import time
|
|
83
|
+
|
|
61
84
|
cli = TaskmuxCLI()
|
|
62
85
|
if tasks:
|
|
63
86
|
for task in tasks:
|
|
@@ -65,12 +88,26 @@ def start(
|
|
|
65
88
|
else:
|
|
66
89
|
cli.tmux.start_all()
|
|
67
90
|
|
|
91
|
+
if monitor:
|
|
92
|
+
console.print("Monitoring tasks (Ctrl+C to stop)...")
|
|
93
|
+
try:
|
|
94
|
+
while True:
|
|
95
|
+
time.sleep(30)
|
|
96
|
+
cli.tmux.auto_restart_tasks()
|
|
97
|
+
except KeyboardInterrupt:
|
|
98
|
+
console.print("\nStopped monitoring")
|
|
99
|
+
|
|
68
100
|
|
|
69
101
|
@app.command()
|
|
70
102
|
def stop(
|
|
71
103
|
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
72
104
|
):
|
|
73
|
-
"""Stop tasks (all if none specified).
|
|
105
|
+
"""Stop tasks (all if none specified).
|
|
106
|
+
|
|
107
|
+
Uses signal escalation: C-c → SIGTERM → SIGKILL. Waits stop_grace_period
|
|
108
|
+
seconds (default 5) after C-c before escalating. Stopped tasks are marked
|
|
109
|
+
as manually stopped and will not be auto-restarted even with restart_policy="always".
|
|
110
|
+
"""
|
|
74
111
|
cli = TaskmuxCLI()
|
|
75
112
|
if tasks:
|
|
76
113
|
for task in tasks:
|
|
@@ -83,7 +120,11 @@ def stop(
|
|
|
83
120
|
def restart(
|
|
84
121
|
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
85
122
|
):
|
|
86
|
-
"""Restart tasks (all if none specified).
|
|
123
|
+
"""Restart tasks (all if none specified).
|
|
124
|
+
|
|
125
|
+
Full stop with signal escalation, port cleanup, then restart.
|
|
126
|
+
Clears the manually-stopped flag so auto-restart policies resume.
|
|
127
|
+
"""
|
|
87
128
|
cli = TaskmuxCLI()
|
|
88
129
|
if tasks:
|
|
89
130
|
for task in tasks:
|
|
@@ -96,7 +137,11 @@ def restart(
|
|
|
96
137
|
def kill(
|
|
97
138
|
task: str = typer.Argument(..., help="Task name to kill"),
|
|
98
139
|
):
|
|
99
|
-
"""Kill a specific task.
|
|
140
|
+
"""Kill a specific task (SIGKILL + destroy window).
|
|
141
|
+
|
|
142
|
+
Unlike stop, kill is immediate with no grace period. The tmux window is
|
|
143
|
+
destroyed. The task is marked as manually stopped (no auto-restart).
|
|
144
|
+
"""
|
|
100
145
|
cli = TaskmuxCLI()
|
|
101
146
|
cli.tmux.kill_task(task)
|
|
102
147
|
|
|
@@ -109,7 +154,11 @@ def logs(
|
|
|
109
154
|
grep: str | None = typer.Option(None, "-g", "--grep", help="Filter logs by pattern"),
|
|
110
155
|
context: int = typer.Option(3, "-C", "--context", help="Context lines around grep matches"),
|
|
111
156
|
):
|
|
112
|
-
"""Show logs for a task, or
|
|
157
|
+
"""Show logs for a task, or interleaved logs from all tasks.
|
|
158
|
+
|
|
159
|
+
Without -f, prints recent output. With -f, follows logs live with colored
|
|
160
|
+
task prefixes. Use -g to grep across tasks and -C for context lines.
|
|
161
|
+
"""
|
|
113
162
|
cli = TaskmuxCLI()
|
|
114
163
|
cli.tmux.show_logs(task, follow, lines, grep=grep, context=context)
|
|
115
164
|
|
|
@@ -118,7 +167,11 @@ def logs(
|
|
|
118
167
|
def inspect(
|
|
119
168
|
task: str = typer.Argument(..., help="Task name to inspect"),
|
|
120
169
|
):
|
|
121
|
-
"""Inspect task state as JSON.
|
|
170
|
+
"""Inspect task state as JSON.
|
|
171
|
+
|
|
172
|
+
Returns detailed info: name, command, restart_policy, running/healthy status,
|
|
173
|
+
pid, pane command, cwd, window/pane IDs, health_check, and depends_on.
|
|
174
|
+
"""
|
|
122
175
|
cli = TaskmuxCLI()
|
|
123
176
|
data = cli.tmux.inspect_task(task)
|
|
124
177
|
console.print_json(json.dumps(data))
|
|
@@ -134,7 +187,7 @@ def add(
|
|
|
134
187
|
None, "--depends-on", help="Dependency task names"
|
|
135
188
|
),
|
|
136
189
|
):
|
|
137
|
-
"""Add a new task."""
|
|
190
|
+
"""Add a new task to taskmux.toml."""
|
|
138
191
|
addTask(None, task, command, cwd=cwd, health_check=health_check, depends_on=depends_on)
|
|
139
192
|
console.print(f"Added task '{task}': {command}")
|
|
140
193
|
|
|
@@ -143,7 +196,7 @@ def add(
|
|
|
143
196
|
def remove(
|
|
144
197
|
task: str = typer.Argument(..., help="Task name to remove"),
|
|
145
198
|
):
|
|
146
|
-
"""Remove a task."""
|
|
199
|
+
"""Remove a task from taskmux.toml (kills it first if running)."""
|
|
147
200
|
cli = TaskmuxCLI()
|
|
148
201
|
|
|
149
202
|
if cli.tmux.session_exists():
|
|
@@ -157,7 +210,11 @@ def remove(
|
|
|
157
210
|
|
|
158
211
|
|
|
159
212
|
def _status():
|
|
160
|
-
"""Show session and task status.
|
|
213
|
+
"""Show session and task status.
|
|
214
|
+
|
|
215
|
+
Lists all tasks with health indicators, running state, ports, restart policy
|
|
216
|
+
(if non-default), working directory, and dependencies. Aliases: list, ls.
|
|
217
|
+
"""
|
|
161
218
|
cli = TaskmuxCLI()
|
|
162
219
|
cli.tmux.list_tasks()
|
|
163
220
|
|
|
@@ -169,7 +226,11 @@ app.command(name="ls", hidden=True)(_status)
|
|
|
169
226
|
|
|
170
227
|
@app.command()
|
|
171
228
|
def health():
|
|
172
|
-
"""Check health of all tasks.
|
|
229
|
+
"""Check health of all tasks.
|
|
230
|
+
|
|
231
|
+
Runs each task's health_check command (or falls back to pane-alive check).
|
|
232
|
+
Displays a table with health status for every configured task.
|
|
233
|
+
"""
|
|
173
234
|
cli = TaskmuxCLI()
|
|
174
235
|
|
|
175
236
|
if not cli.tmux.session_exists():
|
|
@@ -200,7 +261,11 @@ def health():
|
|
|
200
261
|
|
|
201
262
|
@app.command()
|
|
202
263
|
def watch():
|
|
203
|
-
"""Watch
|
|
264
|
+
"""Watch taskmux.toml for changes and reload on edit.
|
|
265
|
+
|
|
266
|
+
Stays in the foreground. When the config file changes, reloads it and
|
|
267
|
+
restarts affected tasks.
|
|
268
|
+
"""
|
|
204
269
|
cli = TaskmuxCLI()
|
|
205
270
|
watcher = SimpleConfigWatcher(cli)
|
|
206
271
|
watcher.watch_config()
|
|
@@ -210,7 +275,12 @@ def watch():
|
|
|
210
275
|
def daemon(
|
|
211
276
|
port: int = typer.Option(8765, "--port", help="WebSocket API port"),
|
|
212
277
|
):
|
|
213
|
-
"""Run in daemon mode with API.
|
|
278
|
+
"""Run in daemon mode with WebSocket API and health monitoring.
|
|
279
|
+
|
|
280
|
+
Monitors task health every 30s and auto-restarts per restart_policy with
|
|
281
|
+
exponential backoff. Watches config for changes. Exposes a WebSocket API
|
|
282
|
+
for status, restart, kill, and logs commands.
|
|
283
|
+
"""
|
|
214
284
|
d = TaskmuxDaemon(api_port=port)
|
|
215
285
|
asyncio.run(d.start())
|
|
216
286
|
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
|
|
7
7
|
import tomlkit
|
|
8
8
|
|
|
9
|
-
from .models import HookConfig, TaskConfig, TaskmuxConfig
|
|
9
|
+
from .models import HookConfig, RestartPolicy, TaskConfig, TaskmuxConfig
|
|
10
10
|
|
|
11
11
|
CONFIG_FILENAME = "taskmux.toml"
|
|
12
12
|
|
|
@@ -122,6 +122,8 @@ def writeConfig(path: Path | None, config: TaskmuxConfig) -> Path:
|
|
|
122
122
|
inner.add("max_restarts", task_cfg.max_restarts)
|
|
123
123
|
if task_cfg.restart_backoff != 2.0:
|
|
124
124
|
inner.add("restart_backoff", task_cfg.restart_backoff)
|
|
125
|
+
if task_cfg.restart_policy != RestartPolicy.ON_FAILURE:
|
|
126
|
+
inner.add("restart_policy", str(task_cfg.restart_policy))
|
|
125
127
|
if task_cfg.depends_on:
|
|
126
128
|
inner.add("depends_on", task_cfg.depends_on)
|
|
127
129
|
# Task-level hooks
|
|
@@ -39,26 +39,6 @@ class ConfigWatcher(FileSystemEventHandler):
|
|
|
39
39
|
self.taskmux_cli.handle_config_reload()
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class RestartTracker:
|
|
43
|
-
"""Tracks per-task restart counts and timestamps for backoff."""
|
|
44
|
-
|
|
45
|
-
def __init__(self) -> None:
|
|
46
|
-
self._data: dict[str, dict[str, float]] = {}
|
|
47
|
-
|
|
48
|
-
def get(self, task_name: str) -> dict[str, float]:
|
|
49
|
-
return self._data.get(task_name, {"count": 0, "last": 0.0})
|
|
50
|
-
|
|
51
|
-
def record(self, task_name: str) -> None:
|
|
52
|
-
info = self.get(task_name)
|
|
53
|
-
self._data[task_name] = {
|
|
54
|
-
"count": info["count"] + 1,
|
|
55
|
-
"last": time.time(),
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
def reset(self, task_name: str) -> None:
|
|
59
|
-
self._data.pop(task_name, None)
|
|
60
|
-
|
|
61
|
-
|
|
62
42
|
class TaskmuxDaemon:
|
|
63
43
|
"""Daemon mode for Taskmux with enhanced monitoring and API"""
|
|
64
44
|
|
|
@@ -71,7 +51,6 @@ class TaskmuxDaemon:
|
|
|
71
51
|
self.health_check_interval = 30
|
|
72
52
|
self.health_check_task: asyncio.Task | None = None
|
|
73
53
|
self.websocket_clients: set = set()
|
|
74
|
-
self.restart_tracker = RestartTracker()
|
|
75
54
|
self.logger = self._setup_logging()
|
|
76
55
|
|
|
77
56
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
@@ -161,40 +140,9 @@ class TaskmuxDaemon:
|
|
|
161
140
|
await asyncio.sleep(5)
|
|
162
141
|
|
|
163
142
|
def _auto_restart_with_backoff(self) -> None:
|
|
164
|
-
"""Auto-restart
|
|
143
|
+
"""Auto-restart tasks using TmuxManager's restart policy logic."""
|
|
165
144
|
assert self.cli is not None
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
for task_name, task_cfg in self.cli.config.tasks.items():
|
|
169
|
-
healthy = self.cli.tmux.check_task_health(task_name)
|
|
170
|
-
|
|
171
|
-
if healthy:
|
|
172
|
-
# Reset tracker if healthy for >60s
|
|
173
|
-
info = self.restart_tracker.get(task_name)
|
|
174
|
-
if info["count"] > 0 and now - info["last"] > 60:
|
|
175
|
-
self.restart_tracker.reset(task_name)
|
|
176
|
-
continue
|
|
177
|
-
|
|
178
|
-
# Skip if not previously healthy (avoid restart loop on first check)
|
|
179
|
-
prev_health = self.cli.tmux.task_health.get(task_name, {}).get("healthy", True)
|
|
180
|
-
if not prev_health:
|
|
181
|
-
info = self.restart_tracker.get(task_name)
|
|
182
|
-
|
|
183
|
-
# Check max_restarts
|
|
184
|
-
if task_cfg.max_restarts and info["count"] >= task_cfg.max_restarts:
|
|
185
|
-
self.logger.warning(
|
|
186
|
-
f"Task '{task_name}' exceeded max restarts ({task_cfg.max_restarts})"
|
|
187
|
-
)
|
|
188
|
-
continue
|
|
189
|
-
|
|
190
|
-
# Check backoff delay
|
|
191
|
-
delay = min(task_cfg.restart_backoff ** info["count"], 60)
|
|
192
|
-
if info["last"] and now - info["last"] < delay:
|
|
193
|
-
continue
|
|
194
|
-
|
|
195
|
-
self.logger.info(f"Auto-restarting unhealthy task: {task_name}")
|
|
196
|
-
self.cli.tmux.restart_task(task_name)
|
|
197
|
-
self.restart_tracker.record(task_name)
|
|
145
|
+
self.cli.tmux.auto_restart_tasks()
|
|
198
146
|
|
|
199
147
|
async def _start_api_server(self) -> None:
|
|
200
148
|
"""Start WebSocket API server"""
|
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
"""Pydantic models for Taskmux configuration."""
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
+
from enum import StrEnum
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, ConfigDict, model_validator
|
|
6
7
|
|
|
7
8
|
|
|
9
|
+
class RestartPolicy(StrEnum):
|
|
10
|
+
"""Docker-style restart policy for tasks."""
|
|
11
|
+
|
|
12
|
+
NO = "no"
|
|
13
|
+
ON_FAILURE = "on-failure"
|
|
14
|
+
ALWAYS = "always"
|
|
15
|
+
|
|
16
|
+
|
|
8
17
|
class _StrictConfig(BaseModel):
|
|
9
18
|
"""Base config: frozen, warns on unknown keys."""
|
|
10
19
|
|
|
@@ -45,6 +54,7 @@ class TaskConfig(_StrictConfig):
|
|
|
45
54
|
stop_grace_period: int = 5
|
|
46
55
|
max_restarts: int = 5
|
|
47
56
|
restart_backoff: float = 2.0
|
|
57
|
+
restart_policy: RestartPolicy = RestartPolicy.ON_FAILURE
|
|
48
58
|
depends_on: list[str] = []
|
|
49
59
|
hooks: HookConfig = HookConfig()
|
|
50
60
|
|
|
@@ -15,13 +15,52 @@ from rich.console import Console
|
|
|
15
15
|
from rich.markup import escape
|
|
16
16
|
|
|
17
17
|
from .hooks import runHook
|
|
18
|
-
from .models import TaskmuxConfig
|
|
18
|
+
from .models import RestartPolicy, TaskmuxConfig
|
|
19
19
|
|
|
20
20
|
SHELL_NAMES = frozenset(("bash", "zsh", "sh", "fish"))
|
|
21
21
|
|
|
22
22
|
TASK_COLORS = ["cyan", "green", "yellow", "magenta", "blue", "red"]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
class RestartTracker:
|
|
26
|
+
"""Tracks per-task restart counts, health failures, and manual-stop state."""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
self._data: dict[str, dict[str, float]] = {}
|
|
30
|
+
self._consecutive_failures: dict[str, int] = {}
|
|
31
|
+
self._manually_stopped: set[str] = set()
|
|
32
|
+
|
|
33
|
+
def get(self, task_name: str) -> dict[str, float]:
|
|
34
|
+
return self._data.get(task_name, {"count": 0, "last": 0.0})
|
|
35
|
+
|
|
36
|
+
def record(self, task_name: str) -> None:
|
|
37
|
+
info = self.get(task_name)
|
|
38
|
+
self._data[task_name] = {
|
|
39
|
+
"count": info["count"] + 1,
|
|
40
|
+
"last": time.time(),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def reset(self, task_name: str) -> None:
|
|
44
|
+
self._data.pop(task_name, None)
|
|
45
|
+
|
|
46
|
+
def record_health_failure(self, task_name: str) -> int:
|
|
47
|
+
count = self._consecutive_failures.get(task_name, 0) + 1
|
|
48
|
+
self._consecutive_failures[task_name] = count
|
|
49
|
+
return count
|
|
50
|
+
|
|
51
|
+
def reset_health_failures(self, task_name: str) -> None:
|
|
52
|
+
self._consecutive_failures.pop(task_name, None)
|
|
53
|
+
|
|
54
|
+
def mark_manually_stopped(self, task_name: str) -> None:
|
|
55
|
+
self._manually_stopped.add(task_name)
|
|
56
|
+
|
|
57
|
+
def clear_manually_stopped(self, task_name: str) -> None:
|
|
58
|
+
self._manually_stopped.discard(task_name)
|
|
59
|
+
|
|
60
|
+
def is_manually_stopped(self, task_name: str) -> bool:
|
|
61
|
+
return task_name in self._manually_stopped
|
|
62
|
+
|
|
63
|
+
|
|
25
64
|
def _find_new_lines(current: list[str], prev_tail: list[str]) -> list[str]:
|
|
26
65
|
"""Return lines in current that are new since prev_tail."""
|
|
27
66
|
if not prev_tail:
|
|
@@ -43,6 +82,7 @@ class TmuxManager:
|
|
|
43
82
|
self.server = libtmux.Server()
|
|
44
83
|
self.session: libtmux.Session | None = None
|
|
45
84
|
self.task_health: dict = {}
|
|
85
|
+
self.restart_tracker = RestartTracker()
|
|
46
86
|
self._refresh_session()
|
|
47
87
|
|
|
48
88
|
def _refresh_session(self) -> None:
|
|
@@ -232,6 +272,7 @@ class TmuxManager:
|
|
|
232
272
|
|
|
233
273
|
def start_task(self, task_name: str) -> None:
|
|
234
274
|
"""Start a single task (create window + send command)."""
|
|
275
|
+
self.restart_tracker.clear_manually_stopped(task_name)
|
|
235
276
|
if task_name not in self.config.tasks:
|
|
236
277
|
print(f"Task '{task_name}' not found in config")
|
|
237
278
|
return
|
|
@@ -286,6 +327,7 @@ class TmuxManager:
|
|
|
286
327
|
|
|
287
328
|
def stop_task(self, task_name: str) -> None:
|
|
288
329
|
"""Graceful stop with signal escalation: C-c → SIGTERM → SIGKILL."""
|
|
330
|
+
self.restart_tracker.mark_manually_stopped(task_name)
|
|
289
331
|
if not self.session_exists():
|
|
290
332
|
print(f"Session '{self.config.name}' doesn't exist")
|
|
291
333
|
return
|
|
@@ -396,6 +438,9 @@ class TmuxManager:
|
|
|
396
438
|
|
|
397
439
|
def stop_all(self) -> None:
|
|
398
440
|
"""Stop all tasks with signal escalation then kill session."""
|
|
441
|
+
for task_name in self.config.tasks:
|
|
442
|
+
self.restart_tracker.mark_manually_stopped(task_name)
|
|
443
|
+
|
|
399
444
|
if not self.session_exists():
|
|
400
445
|
print("No session running")
|
|
401
446
|
return
|
|
@@ -456,6 +501,7 @@ class TmuxManager:
|
|
|
456
501
|
|
|
457
502
|
def restart_task(self, task_name: str) -> None:
|
|
458
503
|
"""Restart a specific task with full stop escalation."""
|
|
504
|
+
self.restart_tracker.clear_manually_stopped(task_name)
|
|
459
505
|
if not self.session_exists():
|
|
460
506
|
print(f"Session '{self.config.name}' doesn't exist. Run 'taskmux start' first.")
|
|
461
507
|
return
|
|
@@ -508,6 +554,7 @@ class TmuxManager:
|
|
|
508
554
|
|
|
509
555
|
def kill_task(self, task_name: str) -> None:
|
|
510
556
|
"""Kill a specific task (process group + window)."""
|
|
557
|
+
self.restart_tracker.mark_manually_stopped(task_name)
|
|
511
558
|
if not self.session_exists():
|
|
512
559
|
print(f"Session '{self.config.name}' doesn't exist")
|
|
513
560
|
return
|
|
@@ -534,6 +581,7 @@ class TmuxManager:
|
|
|
534
581
|
"name": task_name,
|
|
535
582
|
"command": task_cfg.command,
|
|
536
583
|
"auto_start": task_cfg.auto_start,
|
|
584
|
+
"restart_policy": str(task_cfg.restart_policy),
|
|
537
585
|
"cwd": task_cfg.cwd,
|
|
538
586
|
"health_check": task_cfg.health_check,
|
|
539
587
|
"depends_on": task_cfg.depends_on,
|
|
@@ -717,6 +765,8 @@ class TmuxManager:
|
|
|
717
765
|
extras = ""
|
|
718
766
|
if task_cfg.cwd:
|
|
719
767
|
extras += f" cwd={task_cfg.cwd}"
|
|
768
|
+
if task_cfg.restart_policy != RestartPolicy.ON_FAILURE:
|
|
769
|
+
extras += f" restart={task_cfg.restart_policy}"
|
|
720
770
|
if task_cfg.depends_on:
|
|
721
771
|
extras += f" deps=[{','.join(task_cfg.depends_on)}]"
|
|
722
772
|
line = f"{health_icon} {status_text:8} {task_name:15}{port:7} {task_cfg.command}"
|
|
@@ -735,17 +785,68 @@ class TmuxManager:
|
|
|
735
785
|
|
|
736
786
|
return is_healthy
|
|
737
787
|
|
|
738
|
-
def
|
|
739
|
-
"""Auto-restart tasks
|
|
788
|
+
def auto_restart_tasks(self) -> None:
|
|
789
|
+
"""Auto-restart tasks based on restart_policy, health_retries, max_restarts, and backoff."""
|
|
740
790
|
if not self.session_exists():
|
|
741
791
|
return
|
|
742
792
|
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
793
|
+
now = time.time()
|
|
794
|
+
|
|
795
|
+
for task_name, task_cfg in self.config.tasks.items():
|
|
796
|
+
if task_cfg.restart_policy == RestartPolicy.NO:
|
|
797
|
+
continue
|
|
798
|
+
if self.restart_tracker.is_manually_stopped(task_name):
|
|
799
|
+
continue
|
|
800
|
+
|
|
801
|
+
healthy = self.check_task_health(task_name)
|
|
802
|
+
pane_alive = self._is_pane_alive(task_name)
|
|
803
|
+
|
|
804
|
+
if healthy:
|
|
805
|
+
self.restart_tracker.reset_health_failures(task_name)
|
|
806
|
+
# Reset restart tracker after 60s stable
|
|
807
|
+
info = self.restart_tracker.get(task_name)
|
|
808
|
+
if info["count"] > 0 and now - info["last"] > 60:
|
|
809
|
+
self.restart_tracker.reset(task_name)
|
|
810
|
+
continue
|
|
811
|
+
|
|
812
|
+
# "on-failure": restart on crash or health_retries exceeded
|
|
813
|
+
# "always": restart whenever pane is dead (even clean exit)
|
|
814
|
+
should_restart = False
|
|
815
|
+
|
|
816
|
+
if not pane_alive:
|
|
817
|
+
# Process exited — restart for both on-failure and always
|
|
818
|
+
should_restart = True
|
|
819
|
+
elif task_cfg.restart_policy == RestartPolicy.ON_FAILURE:
|
|
820
|
+
# Pane alive but health check failing — count consecutive failures
|
|
821
|
+
failures = self.restart_tracker.record_health_failure(task_name)
|
|
822
|
+
if failures >= task_cfg.health_retries:
|
|
823
|
+
should_restart = True
|
|
824
|
+
elif task_cfg.restart_policy == RestartPolicy.ALWAYS:
|
|
825
|
+
failures = self.restart_tracker.record_health_failure(task_name)
|
|
826
|
+
if failures >= task_cfg.health_retries:
|
|
827
|
+
should_restart = True
|
|
828
|
+
|
|
829
|
+
if not should_restart:
|
|
830
|
+
continue
|
|
831
|
+
|
|
832
|
+
# Check max_restarts limit
|
|
833
|
+
info = self.restart_tracker.get(task_name)
|
|
834
|
+
if task_cfg.max_restarts and info["count"] >= task_cfg.max_restarts:
|
|
835
|
+
continue
|
|
836
|
+
|
|
837
|
+
# Check backoff delay
|
|
838
|
+
delay = min(task_cfg.restart_backoff ** info["count"], 60)
|
|
839
|
+
if info["last"] and now - info["last"] < delay:
|
|
840
|
+
continue
|
|
841
|
+
|
|
842
|
+
print(f"Auto-restarting task: {task_name}")
|
|
843
|
+
self.restart_task(task_name)
|
|
844
|
+
self.restart_tracker.record(task_name)
|
|
845
|
+
self.restart_tracker.reset_health_failures(task_name)
|
|
846
|
+
|
|
847
|
+
def auto_restart_unhealthy_tasks(self) -> None:
|
|
848
|
+
"""Deprecated: use auto_restart_tasks() instead."""
|
|
849
|
+
self.auto_restart_tasks()
|
|
749
850
|
|
|
750
851
|
def stop_session(self) -> None:
|
|
751
852
|
"""Stop the entire tmux session (legacy, wraps stop_all)."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|