taskmux 0.2.6__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {taskmux-0.2.6 → taskmux-0.3.0}/PKG-INFO +113 -37
- {taskmux-0.2.6 → taskmux-0.3.0}/README.md +112 -36
- {taskmux-0.2.6 → taskmux-0.3.0}/pyproject.toml +1 -1
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/agent.py +5 -2
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/cli.py +104 -34
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/config.py +3 -1
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/daemon.py +2 -54
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/models.py +10 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/tmux_manager.py +119 -22
- {taskmux-0.2.6 → taskmux-0.3.0}/.gitignore +0 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/LICENSE +0 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/__init__.py +0 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/hooks.py +0 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/init.py +0 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/main.py +0 -0
- {taskmux-0.2.6 → taskmux-0.3.0}/taskmux/templates/claude.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: taskmux
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Modern tmux-based task manager for LLM development tools
|
|
5
5
|
Project-URL: Homepage, https://github.com/nc9/taskmux
|
|
6
6
|
Project-URL: Repository, https://github.com/nc9/taskmux
|
|
@@ -35,7 +35,7 @@ Description-Content-Type: text/markdown
|
|
|
35
35
|
|
|
36
36
|
# Taskmux
|
|
37
37
|
|
|
38
|
-
A modern tmux session manager for LLM development tools with health monitoring,
|
|
38
|
+
A modern tmux session manager for LLM development tools with health monitoring, restart policies, and WebSocket API.
|
|
39
39
|
|
|
40
40
|
## Why Taskmux?
|
|
41
41
|
|
|
@@ -125,17 +125,23 @@ health_check = "test -f .migrate-complete"
|
|
|
125
125
|
[tasks.api]
|
|
126
126
|
command = "python manage.py runserver 0.0.0.0:8000"
|
|
127
127
|
cwd = "apps/api"
|
|
128
|
+
port = 8000
|
|
128
129
|
depends_on = ["migrate"]
|
|
129
130
|
health_check = "curl -sf http://localhost:8000/health"
|
|
131
|
+
stop_grace_period = 10
|
|
130
132
|
|
|
131
133
|
[tasks.worker]
|
|
132
134
|
command = "celery -A myapp worker -l info"
|
|
133
135
|
cwd = "apps/api"
|
|
134
136
|
depends_on = ["db"]
|
|
137
|
+
restart_policy = "always"
|
|
138
|
+
max_restarts = 10
|
|
139
|
+
restart_backoff = 3.0
|
|
135
140
|
|
|
136
141
|
[tasks.web]
|
|
137
142
|
command = "bun dev"
|
|
138
143
|
cwd = "apps/web"
|
|
144
|
+
port = 3000
|
|
139
145
|
depends_on = ["api"]
|
|
140
146
|
health_check = "curl -sf http://localhost:3000"
|
|
141
147
|
|
|
@@ -168,46 +174,49 @@ taskmux start storybook # Start a manual task
|
|
|
168
174
|
## Commands
|
|
169
175
|
|
|
170
176
|
```bash
|
|
171
|
-
# Session
|
|
172
|
-
taskmux start # Start all auto_start tasks
|
|
173
|
-
taskmux start <task>
|
|
174
|
-
taskmux
|
|
175
|
-
taskmux stop
|
|
177
|
+
# Session lifecycle
|
|
178
|
+
taskmux start # Start all auto_start tasks in dependency order
|
|
179
|
+
taskmux start <task> [task2...] # Start specific tasks
|
|
180
|
+
taskmux start -m # Start + stay in foreground monitoring health/restarting
|
|
181
|
+
taskmux stop # Stop all (C-c → SIGTERM → SIGKILL), prevents auto-restart
|
|
182
|
+
taskmux stop <task> [task2...] # Stop specific tasks
|
|
176
183
|
taskmux restart # Restart all tasks
|
|
177
|
-
taskmux restart <task>
|
|
178
|
-
taskmux status # Show session status
|
|
179
|
-
taskmux list # List tasks with health indicators
|
|
184
|
+
taskmux restart <task> [task2...] # Restart specific tasks, re-enables auto-restart
|
|
180
185
|
|
|
181
|
-
#
|
|
182
|
-
taskmux kill <task> # Hard-kill
|
|
183
|
-
taskmux add <task> "<command>" # Add task to
|
|
184
|
-
taskmux remove <task> # Remove task
|
|
185
|
-
taskmux inspect <task> # JSON
|
|
186
|
+
# Task management
|
|
187
|
+
taskmux kill <task> # Hard-kill (SIGKILL + destroy window), prevents auto-restart
|
|
188
|
+
taskmux add <task> "<command>" # Add task to taskmux.toml
|
|
189
|
+
taskmux remove <task> # Remove task (kills if running)
|
|
190
|
+
taskmux inspect <task> # JSON state: pid, health, restart_policy, pane info
|
|
191
|
+
|
|
192
|
+
# Status & health
|
|
193
|
+
taskmux status # Session + task overview (aliases: list, ls)
|
|
194
|
+
taskmux health # Health check table for all tasks
|
|
186
195
|
|
|
187
196
|
# Logs
|
|
188
197
|
taskmux logs # Interleaved logs from all tasks
|
|
189
|
-
taskmux logs <task> #
|
|
190
|
-
taskmux logs -f
|
|
191
|
-
taskmux logs -f <task> # Follow a task's logs live
|
|
198
|
+
taskmux logs <task> # Recent logs for a task
|
|
199
|
+
taskmux logs -f [task] # Follow logs live (colored prefixes)
|
|
192
200
|
taskmux logs -n 200 <task> # Last N lines
|
|
193
|
-
taskmux logs -g "error" #
|
|
194
|
-
taskmux logs <task> -g "
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
#
|
|
198
|
-
taskmux init
|
|
199
|
-
taskmux
|
|
200
|
-
|
|
201
|
-
# Monitoring
|
|
202
|
-
taskmux health # Health check table
|
|
203
|
-
taskmux watch # Watch config for changes, reload on edit
|
|
204
|
-
taskmux daemon --port 8765 # Run with WebSocket API + auto-restart
|
|
201
|
+
taskmux logs -g "error" # Grep all tasks
|
|
202
|
+
taskmux logs <task> -g "err" -C 5 # Grep one task with context
|
|
203
|
+
|
|
204
|
+
# Setup & monitoring
|
|
205
|
+
taskmux init # Interactive project setup + agent context injection
|
|
206
|
+
taskmux init --defaults # Non-interactive setup
|
|
207
|
+
taskmux watch # Watch taskmux.toml, reload on change
|
|
208
|
+
taskmux daemon --port 8765 # Daemon mode: WebSocket API + health monitoring
|
|
205
209
|
```
|
|
206
210
|
|
|
207
|
-
### stop vs kill
|
|
211
|
+
### stop vs kill vs restart
|
|
208
212
|
|
|
209
|
-
|
|
210
|
-
|
|
213
|
+
| Command | Signal | Window | Auto-restart |
|
|
214
|
+
|---------|--------|--------|--------------|
|
|
215
|
+
| `stop` | C-c → SIGTERM → SIGKILL (graceful) | Stays alive | Blocked (manually stopped) |
|
|
216
|
+
| `kill` | SIGKILL (immediate) | Destroyed | Blocked (manually stopped) |
|
|
217
|
+
| `restart` | Full stop + restart | Reused | Re-enabled |
|
|
218
|
+
|
|
219
|
+
Both `stop` and `kill` mark the task as **manually stopped**, preventing auto-restart even with `restart_policy = "always"`. Use `restart` or `start` to clear this flag and re-enable auto-restart.
|
|
211
220
|
|
|
212
221
|
## Configuration
|
|
213
222
|
|
|
@@ -226,7 +235,9 @@ after_stop = "echo done"
|
|
|
226
235
|
[tasks.server]
|
|
227
236
|
command = "python manage.py runserver"
|
|
228
237
|
cwd = "apps/api"
|
|
238
|
+
port = 8000
|
|
229
239
|
health_check = "curl -sf http://localhost:8000/health"
|
|
240
|
+
stop_grace_period = 10
|
|
230
241
|
depends_on = ["db"]
|
|
231
242
|
|
|
232
243
|
[tasks.server.hooks]
|
|
@@ -239,10 +250,13 @@ health_check = "pg_isready -h localhost"
|
|
|
239
250
|
[tasks.worker]
|
|
240
251
|
command = "celery worker -A myapp"
|
|
241
252
|
depends_on = ["db"]
|
|
253
|
+
restart_policy = "always"
|
|
254
|
+
max_restarts = 10
|
|
242
255
|
|
|
243
256
|
[tasks.tailwind]
|
|
244
257
|
command = "npx tailwindcss -w"
|
|
245
258
|
auto_start = false
|
|
259
|
+
restart_policy = "no"
|
|
246
260
|
```
|
|
247
261
|
|
|
248
262
|
### Fields
|
|
@@ -258,10 +272,15 @@ auto_start = false
|
|
|
258
272
|
| `tasks.<name>.command` | — | Shell command to run |
|
|
259
273
|
| `tasks.<name>.auto_start` | `true` | Start with `taskmux start` |
|
|
260
274
|
| `tasks.<name>.cwd` | — | Working directory for the task |
|
|
275
|
+
| `tasks.<name>.port` | — | Port to clean up before starting (kills orphaned listeners) |
|
|
261
276
|
| `tasks.<name>.health_check` | — | Shell command to check health (exit 0 = healthy) |
|
|
262
277
|
| `tasks.<name>.health_interval` | `10` | Seconds between health checks |
|
|
263
278
|
| `tasks.<name>.health_timeout` | `5` | Seconds before health check times out |
|
|
264
|
-
| `tasks.<name>.health_retries` | `3` | Consecutive failures before
|
|
279
|
+
| `tasks.<name>.health_retries` | `3` | Consecutive health failures before triggering a restart |
|
|
280
|
+
| `tasks.<name>.stop_grace_period` | `5` | Seconds to wait after C-c before escalating to SIGTERM |
|
|
281
|
+
| `tasks.<name>.restart_policy` | `"on-failure"` | When to auto-restart: `"no"`, `"on-failure"`, or `"always"` (see below) |
|
|
282
|
+
| `tasks.<name>.max_restarts` | `5` | Max auto-restarts before giving up (resets after 60s healthy) |
|
|
283
|
+
| `tasks.<name>.restart_backoff` | `2.0` | Exponential backoff base for restart delay (1s, 2s, 4s… capped at 60s) |
|
|
265
284
|
| `tasks.<name>.depends_on` | `[]` | Task names that must be healthy before this task starts |
|
|
266
285
|
| `tasks.<name>.hooks.*` | — | Per-task lifecycle hooks (same fields as global) |
|
|
267
286
|
|
|
@@ -273,14 +292,42 @@ Circular dependencies and references to nonexistent tasks are rejected at config
|
|
|
273
292
|
|
|
274
293
|
When starting a single task with `taskmux start <task>`, dependencies are not auto-started — you get a warning if they aren't running.
|
|
275
294
|
|
|
295
|
+
### Restart Policies
|
|
296
|
+
|
|
297
|
+
Each task has a `restart_policy` that controls automatic restart behavior. Restart policies are enforced by `taskmux start --monitor` and `taskmux daemon`.
|
|
298
|
+
|
|
299
|
+
| Policy | Behavior |
|
|
300
|
+
|--------|----------|
|
|
301
|
+
| `"no"` | Never auto-restart. Task stays stopped after crash or health failure. |
|
|
302
|
+
| `"on-failure"` | **(default)** Restart on crash (process exits) or after `health_retries` consecutive health check failures. |
|
|
303
|
+
| `"always"` | Restart whenever the task stops, including clean exits. |
|
|
304
|
+
|
|
305
|
+
**Manual stops override all policies.** Running `taskmux stop` or `taskmux kill` marks the task as manually stopped — it will not auto-restart even with `restart_policy = "always"`. Use `taskmux restart` or `taskmux start` to clear this flag.
|
|
306
|
+
|
|
307
|
+
**`restart_policy` vs `auto_start`** — these are orthogonal. `auto_start` controls whether a task launches on `taskmux start`. `restart_policy` controls what happens after a running task exits or fails. A task with `auto_start = false` and `restart_policy = "always"` won't start automatically, but once started manually, it will auto-restart on exit.
|
|
308
|
+
|
|
309
|
+
| `restart_policy` | `auto_start` | Behavior |
|
|
310
|
+
|---|---|---|
|
|
311
|
+
| `"no"` | `true` | Starts with session, never auto-restarts |
|
|
312
|
+
| `"no"` | `false` | Manual start only, never auto-restarts |
|
|
313
|
+
| `"on-failure"` | `true` | Starts with session, restarts on crash/health failure |
|
|
314
|
+
| `"on-failure"` | `false` | Manual start, restarts on crash/health failure once running |
|
|
315
|
+
| `"always"` | `true` | Starts with session, restarts on any exit |
|
|
316
|
+
| `"always"` | `false` | Manual start, restarts on any exit once running |
|
|
317
|
+
|
|
318
|
+
**Backoff & limits:** When a task keeps failing, restart delays increase exponentially: `restart_backoff ^ attempt` seconds (capped at 60s). After `max_restarts` consecutive restarts, the task is left stopped. The restart counter resets after 60 seconds of healthy uptime.
|
|
319
|
+
|
|
276
320
|
### Health Checks
|
|
277
321
|
|
|
278
322
|
If `health_check` is set, taskmux runs it as a shell command. Exit code 0 means healthy. If not set, taskmux falls back to checking if the tmux pane has a running process (not just a shell prompt).
|
|
279
323
|
|
|
324
|
+
A task must fail `health_retries` consecutive health checks (default 3) before being considered unhealthy and triggering a restart. If the task becomes healthy again, the failure counter resets.
|
|
325
|
+
|
|
280
326
|
Health checks are used by:
|
|
281
327
|
- `taskmux health` — shows a table of all task health
|
|
282
328
|
- `taskmux start` — waits for dependencies to be healthy before starting dependents
|
|
283
|
-
- `taskmux
|
|
329
|
+
- `taskmux start --monitor` — continuously monitors and auto-restarts per restart_policy
|
|
330
|
+
- `taskmux daemon` — same as --monitor, plus WebSocket API and config watching
|
|
284
331
|
|
|
285
332
|
### Hook Cascade
|
|
286
333
|
|
|
@@ -290,6 +337,20 @@ Hooks fire in this order:
|
|
|
290
337
|
|
|
291
338
|
If a `before_*` hook fails (non-zero exit), the action is aborted.
|
|
292
339
|
|
|
340
|
+
### Process Lifecycle
|
|
341
|
+
|
|
342
|
+
Taskmux ensures processes are fully stopped before restarting and that orphaned port listeners don't block new starts.
|
|
343
|
+
|
|
344
|
+
**Stop escalation** (`stop`, `restart`):
|
|
345
|
+
|
|
346
|
+
1. **C-c** (SIGINT) — waits `stop_grace_period` seconds (default 5)
|
|
347
|
+
2. **SIGTERM** to process group — waits 3 seconds
|
|
348
|
+
3. **SIGKILL** to process group — force kill
|
|
349
|
+
|
|
350
|
+
**Port cleanup** (`start`, `restart`): If `port` is configured, taskmux kills any process listening on that port before starting. This handles orphaned processes from crashed sessions.
|
|
351
|
+
|
|
352
|
+
**Auto-restart** (`start --monitor`, `daemon`): Tasks with `restart_policy = "on-failure"` or `"always"` are automatically restarted. Health checks must fail `health_retries` times before triggering a restart. Restart delays increase exponentially (`restart_backoff` base, capped at 60s). After `max_restarts` failures, the task is left stopped. The counter resets after 60 seconds of healthy uptime.
|
|
353
|
+
|
|
293
354
|
### Init & Agent Context
|
|
294
355
|
|
|
295
356
|
`taskmux init` bootstraps your project:
|
|
@@ -310,6 +371,7 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
310
371
|
"name": "api",
|
|
311
372
|
"command": "python manage.py runserver 0.0.0.0:8000",
|
|
312
373
|
"auto_start": true,
|
|
374
|
+
"restart_policy": "on-failure",
|
|
313
375
|
"cwd": "apps/api",
|
|
314
376
|
"health_check": "curl -sf http://localhost:8000/health",
|
|
315
377
|
"depends_on": ["db"],
|
|
@@ -323,15 +385,29 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
323
385
|
}
|
|
324
386
|
```
|
|
325
387
|
|
|
326
|
-
##
|
|
388
|
+
## Monitoring & Auto-restart
|
|
389
|
+
|
|
390
|
+
### start --monitor (lightweight)
|
|
327
391
|
|
|
328
|
-
|
|
392
|
+
Start tasks and stay in the foreground monitoring health:
|
|
393
|
+
|
|
394
|
+
```bash
|
|
395
|
+
taskmux start --monitor # or: taskmux start -m
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
Checks health every 30 seconds and auto-restarts tasks according to their `restart_policy`. No WebSocket API — just monitoring and restart. Press Ctrl+C to stop monitoring (tasks keep running).
|
|
399
|
+
|
|
400
|
+
### Daemon Mode (full)
|
|
401
|
+
|
|
402
|
+
Run as a background daemon with WebSocket API, config watching, and auto-restart:
|
|
329
403
|
|
|
330
404
|
```bash
|
|
331
405
|
taskmux daemon # Default port 8765
|
|
332
406
|
taskmux daemon --port 9000 # Custom port
|
|
333
407
|
```
|
|
334
408
|
|
|
409
|
+
The daemon monitors task health every 30 seconds. Tasks are restarted per their `restart_policy` with exponential backoff (controlled by `restart_backoff` and `max_restarts`). Tasks that stay healthy for 60+ seconds have their restart counter reset. Config file changes are detected and applied automatically.
|
|
410
|
+
|
|
335
411
|
WebSocket API:
|
|
336
412
|
|
|
337
413
|
```javascript
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Taskmux
|
|
2
2
|
|
|
3
|
-
A modern tmux session manager for LLM development tools with health monitoring,
|
|
3
|
+
A modern tmux session manager for LLM development tools with health monitoring, restart policies, and WebSocket API.
|
|
4
4
|
|
|
5
5
|
## Why Taskmux?
|
|
6
6
|
|
|
@@ -90,17 +90,23 @@ health_check = "test -f .migrate-complete"
|
|
|
90
90
|
[tasks.api]
|
|
91
91
|
command = "python manage.py runserver 0.0.0.0:8000"
|
|
92
92
|
cwd = "apps/api"
|
|
93
|
+
port = 8000
|
|
93
94
|
depends_on = ["migrate"]
|
|
94
95
|
health_check = "curl -sf http://localhost:8000/health"
|
|
96
|
+
stop_grace_period = 10
|
|
95
97
|
|
|
96
98
|
[tasks.worker]
|
|
97
99
|
command = "celery -A myapp worker -l info"
|
|
98
100
|
cwd = "apps/api"
|
|
99
101
|
depends_on = ["db"]
|
|
102
|
+
restart_policy = "always"
|
|
103
|
+
max_restarts = 10
|
|
104
|
+
restart_backoff = 3.0
|
|
100
105
|
|
|
101
106
|
[tasks.web]
|
|
102
107
|
command = "bun dev"
|
|
103
108
|
cwd = "apps/web"
|
|
109
|
+
port = 3000
|
|
104
110
|
depends_on = ["api"]
|
|
105
111
|
health_check = "curl -sf http://localhost:3000"
|
|
106
112
|
|
|
@@ -133,46 +139,49 @@ taskmux start storybook # Start a manual task
|
|
|
133
139
|
## Commands
|
|
134
140
|
|
|
135
141
|
```bash
|
|
136
|
-
# Session
|
|
137
|
-
taskmux start # Start all auto_start tasks
|
|
138
|
-
taskmux start <task>
|
|
139
|
-
taskmux
|
|
140
|
-
taskmux stop
|
|
142
|
+
# Session lifecycle
|
|
143
|
+
taskmux start # Start all auto_start tasks in dependency order
|
|
144
|
+
taskmux start <task> [task2...] # Start specific tasks
|
|
145
|
+
taskmux start -m # Start + stay in foreground monitoring health/restarting
|
|
146
|
+
taskmux stop # Stop all (C-c → SIGTERM → SIGKILL), prevents auto-restart
|
|
147
|
+
taskmux stop <task> [task2...] # Stop specific tasks
|
|
141
148
|
taskmux restart # Restart all tasks
|
|
142
|
-
taskmux restart <task>
|
|
143
|
-
taskmux status # Show session status
|
|
144
|
-
taskmux list # List tasks with health indicators
|
|
149
|
+
taskmux restart <task> [task2...] # Restart specific tasks, re-enables auto-restart
|
|
145
150
|
|
|
146
|
-
#
|
|
147
|
-
taskmux kill <task> # Hard-kill
|
|
148
|
-
taskmux add <task> "<command>" # Add task to
|
|
149
|
-
taskmux remove <task> # Remove task
|
|
150
|
-
taskmux inspect <task> # JSON
|
|
151
|
+
# Task management
|
|
152
|
+
taskmux kill <task> # Hard-kill (SIGKILL + destroy window), prevents auto-restart
|
|
153
|
+
taskmux add <task> "<command>" # Add task to taskmux.toml
|
|
154
|
+
taskmux remove <task> # Remove task (kills if running)
|
|
155
|
+
taskmux inspect <task> # JSON state: pid, health, restart_policy, pane info
|
|
156
|
+
|
|
157
|
+
# Status & health
|
|
158
|
+
taskmux status # Session + task overview (aliases: list, ls)
|
|
159
|
+
taskmux health # Health check table for all tasks
|
|
151
160
|
|
|
152
161
|
# Logs
|
|
153
162
|
taskmux logs # Interleaved logs from all tasks
|
|
154
|
-
taskmux logs <task> #
|
|
155
|
-
taskmux logs -f
|
|
156
|
-
taskmux logs -f <task> # Follow a task's logs live
|
|
163
|
+
taskmux logs <task> # Recent logs for a task
|
|
164
|
+
taskmux logs -f [task] # Follow logs live (colored prefixes)
|
|
157
165
|
taskmux logs -n 200 <task> # Last N lines
|
|
158
|
-
taskmux logs -g "error" #
|
|
159
|
-
taskmux logs <task> -g "
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
#
|
|
163
|
-
taskmux init
|
|
164
|
-
taskmux
|
|
165
|
-
|
|
166
|
-
# Monitoring
|
|
167
|
-
taskmux health # Health check table
|
|
168
|
-
taskmux watch # Watch config for changes, reload on edit
|
|
169
|
-
taskmux daemon --port 8765 # Run with WebSocket API + auto-restart
|
|
166
|
+
taskmux logs -g "error" # Grep all tasks
|
|
167
|
+
taskmux logs <task> -g "err" -C 5 # Grep one task with context
|
|
168
|
+
|
|
169
|
+
# Setup & monitoring
|
|
170
|
+
taskmux init # Interactive project setup + agent context injection
|
|
171
|
+
taskmux init --defaults # Non-interactive setup
|
|
172
|
+
taskmux watch # Watch taskmux.toml, reload on change
|
|
173
|
+
taskmux daemon --port 8765 # Daemon mode: WebSocket API + health monitoring
|
|
170
174
|
```
|
|
171
175
|
|
|
172
|
-
### stop vs kill
|
|
176
|
+
### stop vs kill vs restart
|
|
173
177
|
|
|
174
|
-
|
|
175
|
-
|
|
178
|
+
| Command | Signal | Window | Auto-restart |
|
|
179
|
+
|---------|--------|--------|--------------|
|
|
180
|
+
| `stop` | C-c → SIGTERM → SIGKILL (graceful) | Stays alive | Blocked (manually stopped) |
|
|
181
|
+
| `kill` | SIGKILL (immediate) | Destroyed | Blocked (manually stopped) |
|
|
182
|
+
| `restart` | Full stop + restart | Reused | Re-enabled |
|
|
183
|
+
|
|
184
|
+
Both `stop` and `kill` mark the task as **manually stopped**, preventing auto-restart even with `restart_policy = "always"`. Use `restart` or `start` to clear this flag and re-enable auto-restart.
|
|
176
185
|
|
|
177
186
|
## Configuration
|
|
178
187
|
|
|
@@ -191,7 +200,9 @@ after_stop = "echo done"
|
|
|
191
200
|
[tasks.server]
|
|
192
201
|
command = "python manage.py runserver"
|
|
193
202
|
cwd = "apps/api"
|
|
203
|
+
port = 8000
|
|
194
204
|
health_check = "curl -sf http://localhost:8000/health"
|
|
205
|
+
stop_grace_period = 10
|
|
195
206
|
depends_on = ["db"]
|
|
196
207
|
|
|
197
208
|
[tasks.server.hooks]
|
|
@@ -204,10 +215,13 @@ health_check = "pg_isready -h localhost"
|
|
|
204
215
|
[tasks.worker]
|
|
205
216
|
command = "celery worker -A myapp"
|
|
206
217
|
depends_on = ["db"]
|
|
218
|
+
restart_policy = "always"
|
|
219
|
+
max_restarts = 10
|
|
207
220
|
|
|
208
221
|
[tasks.tailwind]
|
|
209
222
|
command = "npx tailwindcss -w"
|
|
210
223
|
auto_start = false
|
|
224
|
+
restart_policy = "no"
|
|
211
225
|
```
|
|
212
226
|
|
|
213
227
|
### Fields
|
|
@@ -223,10 +237,15 @@ auto_start = false
|
|
|
223
237
|
| `tasks.<name>.command` | — | Shell command to run |
|
|
224
238
|
| `tasks.<name>.auto_start` | `true` | Start with `taskmux start` |
|
|
225
239
|
| `tasks.<name>.cwd` | — | Working directory for the task |
|
|
240
|
+
| `tasks.<name>.port` | — | Port to clean up before starting (kills orphaned listeners) |
|
|
226
241
|
| `tasks.<name>.health_check` | — | Shell command to check health (exit 0 = healthy) |
|
|
227
242
|
| `tasks.<name>.health_interval` | `10` | Seconds between health checks |
|
|
228
243
|
| `tasks.<name>.health_timeout` | `5` | Seconds before health check times out |
|
|
229
|
-
| `tasks.<name>.health_retries` | `3` | Consecutive failures before
|
|
244
|
+
| `tasks.<name>.health_retries` | `3` | Consecutive health failures before triggering a restart |
|
|
245
|
+
| `tasks.<name>.stop_grace_period` | `5` | Seconds to wait after C-c before escalating to SIGTERM |
|
|
246
|
+
| `tasks.<name>.restart_policy` | `"on-failure"` | When to auto-restart: `"no"`, `"on-failure"`, or `"always"` (see below) |
|
|
247
|
+
| `tasks.<name>.max_restarts` | `5` | Max auto-restarts before giving up (resets after 60s healthy) |
|
|
248
|
+
| `tasks.<name>.restart_backoff` | `2.0` | Exponential backoff base for restart delay (1s, 2s, 4s… capped at 60s) |
|
|
230
249
|
| `tasks.<name>.depends_on` | `[]` | Task names that must be healthy before this task starts |
|
|
231
250
|
| `tasks.<name>.hooks.*` | — | Per-task lifecycle hooks (same fields as global) |
|
|
232
251
|
|
|
@@ -238,14 +257,42 @@ Circular dependencies and references to nonexistent tasks are rejected at config
|
|
|
238
257
|
|
|
239
258
|
When starting a single task with `taskmux start <task>`, dependencies are not auto-started — you get a warning if they aren't running.
|
|
240
259
|
|
|
260
|
+
### Restart Policies
|
|
261
|
+
|
|
262
|
+
Each task has a `restart_policy` that controls automatic restart behavior. Restart policies are enforced by `taskmux start --monitor` and `taskmux daemon`.
|
|
263
|
+
|
|
264
|
+
| Policy | Behavior |
|
|
265
|
+
|--------|----------|
|
|
266
|
+
| `"no"` | Never auto-restart. Task stays stopped after crash or health failure. |
|
|
267
|
+
| `"on-failure"` | **(default)** Restart on crash (process exits) or after `health_retries` consecutive health check failures. |
|
|
268
|
+
| `"always"` | Restart whenever the task stops, including clean exits. |
|
|
269
|
+
|
|
270
|
+
**Manual stops override all policies.** Running `taskmux stop` or `taskmux kill` marks the task as manually stopped — it will not auto-restart even with `restart_policy = "always"`. Use `taskmux restart` or `taskmux start` to clear this flag.
|
|
271
|
+
|
|
272
|
+
**`restart_policy` vs `auto_start`** — these are orthogonal. `auto_start` controls whether a task launches on `taskmux start`. `restart_policy` controls what happens after a running task exits or fails. A task with `auto_start = false` and `restart_policy = "always"` won't start automatically, but once started manually, it will auto-restart on exit.
|
|
273
|
+
|
|
274
|
+
| `restart_policy` | `auto_start` | Behavior |
|
|
275
|
+
|---|---|---|
|
|
276
|
+
| `"no"` | `true` | Starts with session, never auto-restarts |
|
|
277
|
+
| `"no"` | `false` | Manual start only, never auto-restarts |
|
|
278
|
+
| `"on-failure"` | `true` | Starts with session, restarts on crash/health failure |
|
|
279
|
+
| `"on-failure"` | `false` | Manual start, restarts on crash/health failure once running |
|
|
280
|
+
| `"always"` | `true` | Starts with session, restarts on any exit |
|
|
281
|
+
| `"always"` | `false` | Manual start, restarts on any exit once running |
|
|
282
|
+
|
|
283
|
+
**Backoff & limits:** When a task keeps failing, restart delays increase exponentially: `restart_backoff ^ attempt` seconds (capped at 60s). After `max_restarts` consecutive restarts, the task is left stopped. The restart counter resets after 60 seconds of healthy uptime.
|
|
284
|
+
|
|
241
285
|
### Health Checks
|
|
242
286
|
|
|
243
287
|
If `health_check` is set, taskmux runs it as a shell command. Exit code 0 means healthy. If not set, taskmux falls back to checking if the tmux pane has a running process (not just a shell prompt).
|
|
244
288
|
|
|
289
|
+
A task must fail `health_retries` consecutive health checks (default 3) before being considered unhealthy and triggering a restart. If the task becomes healthy again, the failure counter resets.
|
|
290
|
+
|
|
245
291
|
Health checks are used by:
|
|
246
292
|
- `taskmux health` — shows a table of all task health
|
|
247
293
|
- `taskmux start` — waits for dependencies to be healthy before starting dependents
|
|
248
|
-
- `taskmux
|
|
294
|
+
- `taskmux start --monitor` — continuously monitors and auto-restarts per restart_policy
|
|
295
|
+
- `taskmux daemon` — same as --monitor, plus WebSocket API and config watching
|
|
249
296
|
|
|
250
297
|
### Hook Cascade
|
|
251
298
|
|
|
@@ -255,6 +302,20 @@ Hooks fire in this order:
|
|
|
255
302
|
|
|
256
303
|
If a `before_*` hook fails (non-zero exit), the action is aborted.
|
|
257
304
|
|
|
305
|
+
### Process Lifecycle
|
|
306
|
+
|
|
307
|
+
Taskmux ensures processes are fully stopped before restarting and that orphaned port listeners don't block new starts.
|
|
308
|
+
|
|
309
|
+
**Stop escalation** (`stop`, `restart`):
|
|
310
|
+
|
|
311
|
+
1. **C-c** (SIGINT) — waits `stop_grace_period` seconds (default 5)
|
|
312
|
+
2. **SIGTERM** to process group — waits 3 seconds
|
|
313
|
+
3. **SIGKILL** to process group — force kill
|
|
314
|
+
|
|
315
|
+
**Port cleanup** (`start`, `restart`): If `port` is configured, taskmux kills any process listening on that port before starting. This handles orphaned processes from crashed sessions.
|
|
316
|
+
|
|
317
|
+
**Auto-restart** (`start --monitor`, `daemon`): Tasks with `restart_policy = "on-failure"` or `"always"` are automatically restarted. Health checks must fail `health_retries` times before triggering a restart. Restart delays increase exponentially (`restart_backoff` base, capped at 60s). After `max_restarts` failures, the task is left stopped. The counter resets after 60 seconds of healthy uptime.
|
|
318
|
+
|
|
258
319
|
### Init & Agent Context
|
|
259
320
|
|
|
260
321
|
`taskmux init` bootstraps your project:
|
|
@@ -275,6 +336,7 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
275
336
|
"name": "api",
|
|
276
337
|
"command": "python manage.py runserver 0.0.0.0:8000",
|
|
277
338
|
"auto_start": true,
|
|
339
|
+
"restart_policy": "on-failure",
|
|
278
340
|
"cwd": "apps/api",
|
|
279
341
|
"health_check": "curl -sf http://localhost:8000/health",
|
|
280
342
|
"depends_on": ["db"],
|
|
@@ -288,15 +350,29 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
288
350
|
}
|
|
289
351
|
```
|
|
290
352
|
|
|
291
|
-
##
|
|
353
|
+
## Monitoring & Auto-restart
|
|
354
|
+
|
|
355
|
+
### start --monitor (lightweight)
|
|
292
356
|
|
|
293
|
-
|
|
357
|
+
Start tasks and stay in the foreground monitoring health:
|
|
358
|
+
|
|
359
|
+
```bash
|
|
360
|
+
taskmux start --monitor # or: taskmux start -m
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
Checks health every 30 seconds and auto-restarts tasks according to their `restart_policy`. No WebSocket API — just monitoring and restart. Press Ctrl+C to stop monitoring (tasks keep running).
|
|
364
|
+
|
|
365
|
+
### Daemon Mode (full)
|
|
366
|
+
|
|
367
|
+
Run as a background daemon with WebSocket API, config watching, and auto-restart:
|
|
294
368
|
|
|
295
369
|
```bash
|
|
296
370
|
taskmux daemon # Default port 8765
|
|
297
371
|
taskmux daemon --port 9000 # Custom port
|
|
298
372
|
```
|
|
299
373
|
|
|
374
|
+
The daemon monitors task health every 30 seconds. Tasks are restarted per their `restart_policy` with exponential backoff (controlled by `restart_backoff` and `max_restarts`). Tasks that stay healthy for 60+ seconds have their restart counter reset. Config file changes are detected and applied automatically.
|
|
375
|
+
|
|
300
376
|
WebSocket API:
|
|
301
377
|
|
|
302
378
|
```javascript
|
|
@@ -40,9 +40,12 @@ def buildContextBlock(config: TaskmuxConfig) -> str:
|
|
|
40
40
|
]
|
|
41
41
|
|
|
42
42
|
if config.tasks:
|
|
43
|
+
lines.append("| Task | Port | Auto-start | Command |")
|
|
44
|
+
lines.append("|------|------|------------|---------|")
|
|
43
45
|
for name, task in config.tasks.items():
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
port = str(task.port) if task.port else "—"
|
|
47
|
+
auto = "yes" if task.auto_start else "no"
|
|
48
|
+
lines.append(f"| {name} | {port} | {auto} | `{task.command}` |")
|
|
46
49
|
else:
|
|
47
50
|
lines.append('_No tasks configured yet. Use `taskmux add <name> "<command>"` to add._')
|
|
48
51
|
|
|
@@ -16,8 +16,14 @@ from .tmux_manager import TmuxManager
|
|
|
16
16
|
|
|
17
17
|
app = typer.Typer(
|
|
18
18
|
name="taskmux",
|
|
19
|
-
help=
|
|
20
|
-
|
|
19
|
+
help=(
|
|
20
|
+
"Tmux session manager for development environments.\n\n"
|
|
21
|
+
"Reads task definitions from taskmux.toml, manages tmux sessions/windows, "
|
|
22
|
+
"provides health monitoring, restart policies (no/on-failure/always), "
|
|
23
|
+
"dependency ordering, lifecycle hooks, and a WebSocket API.\n\n"
|
|
24
|
+
"Quick start: taskmux init → edit taskmux.toml → taskmux start"
|
|
25
|
+
),
|
|
26
|
+
epilog="Docs: https://github.com/nc9/taskmux",
|
|
21
27
|
rich_markup_mode="rich",
|
|
22
28
|
)
|
|
23
29
|
|
|
@@ -49,49 +55,80 @@ class TaskmuxCLI:
|
|
|
49
55
|
def init(
|
|
50
56
|
defaults: bool = typer.Option(False, "--defaults", help="Accept all defaults"),
|
|
51
57
|
):
|
|
52
|
-
"""Initialize taskmux config in current directory.
|
|
53
|
-
initProject(defaults=defaults)
|
|
58
|
+
"""Initialize taskmux config in current directory.
|
|
54
59
|
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
60
|
+
Creates taskmux.toml with session name (defaults to directory name).
|
|
61
|
+
Detects installed AI coding agents (Claude, Codex, OpenCode) and injects
|
|
62
|
+
taskmux usage instructions into their context files.
|
|
63
|
+
Use --defaults to skip interactive prompts.
|
|
64
|
+
"""
|
|
65
|
+
initProject(defaults=defaults)
|
|
61
66
|
|
|
62
67
|
|
|
63
68
|
@app.command()
|
|
64
69
|
def start(
|
|
65
|
-
|
|
70
|
+
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
71
|
+
monitor: bool = typer.Option( # noqa: B008
|
|
72
|
+
False, "-m", "--monitor", help="Stay running, auto-restart per restart_policy"
|
|
73
|
+
),
|
|
66
74
|
):
|
|
67
|
-
"""Start all tasks
|
|
75
|
+
"""Start tasks (all auto_start tasks if none specified).
|
|
76
|
+
|
|
77
|
+
Starts tasks in dependency order, waiting for each dependency's health check
|
|
78
|
+
to pass before starting dependents. With --monitor, stays in the foreground
|
|
79
|
+
and auto-restarts tasks according to their restart_policy (no/on-failure/always),
|
|
80
|
+
respecting health_retries, max_restarts, and exponential backoff.
|
|
81
|
+
"""
|
|
82
|
+
import time
|
|
83
|
+
|
|
68
84
|
cli = TaskmuxCLI()
|
|
69
|
-
if
|
|
70
|
-
|
|
85
|
+
if tasks:
|
|
86
|
+
for task in tasks:
|
|
87
|
+
cli.tmux.start_task(task)
|
|
71
88
|
else:
|
|
72
89
|
cli.tmux.start_all()
|
|
73
90
|
|
|
91
|
+
if monitor:
|
|
92
|
+
console.print("Monitoring tasks (Ctrl+C to stop)...")
|
|
93
|
+
try:
|
|
94
|
+
while True:
|
|
95
|
+
time.sleep(30)
|
|
96
|
+
cli.tmux.auto_restart_tasks()
|
|
97
|
+
except KeyboardInterrupt:
|
|
98
|
+
console.print("\nStopped monitoring")
|
|
99
|
+
|
|
74
100
|
|
|
75
101
|
@app.command()
|
|
76
102
|
def stop(
|
|
77
|
-
|
|
103
|
+
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
78
104
|
):
|
|
79
|
-
"""Stop
|
|
105
|
+
"""Stop tasks (all if none specified).
|
|
106
|
+
|
|
107
|
+
Uses signal escalation: C-c → SIGTERM → SIGKILL. Waits stop_grace_period
|
|
108
|
+
seconds (default 5) after C-c before escalating. Stopped tasks are marked
|
|
109
|
+
as manually stopped and will not be auto-restarted even with restart_policy="always".
|
|
110
|
+
"""
|
|
80
111
|
cli = TaskmuxCLI()
|
|
81
|
-
if
|
|
82
|
-
|
|
112
|
+
if tasks:
|
|
113
|
+
for task in tasks:
|
|
114
|
+
cli.tmux.stop_task(task)
|
|
83
115
|
else:
|
|
84
116
|
cli.tmux.stop_all()
|
|
85
117
|
|
|
86
118
|
|
|
87
119
|
@app.command()
|
|
88
120
|
def restart(
|
|
89
|
-
|
|
121
|
+
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
90
122
|
):
|
|
91
|
-
"""Restart
|
|
123
|
+
"""Restart tasks (all if none specified).
|
|
124
|
+
|
|
125
|
+
Full stop with signal escalation, port cleanup, then restart.
|
|
126
|
+
Clears the manually-stopped flag so auto-restart policies resume.
|
|
127
|
+
"""
|
|
92
128
|
cli = TaskmuxCLI()
|
|
93
|
-
if
|
|
94
|
-
|
|
129
|
+
if tasks:
|
|
130
|
+
for task in tasks:
|
|
131
|
+
cli.tmux.restart_task(task)
|
|
95
132
|
else:
|
|
96
133
|
cli.tmux.restart_all()
|
|
97
134
|
|
|
@@ -100,7 +137,11 @@ def restart(
|
|
|
100
137
|
def kill(
|
|
101
138
|
task: str = typer.Argument(..., help="Task name to kill"),
|
|
102
139
|
):
|
|
103
|
-
"""Kill a specific task.
|
|
140
|
+
"""Kill a specific task (SIGKILL + destroy window).
|
|
141
|
+
|
|
142
|
+
Unlike stop, kill is immediate with no grace period. The tmux window is
|
|
143
|
+
destroyed. The task is marked as manually stopped (no auto-restart).
|
|
144
|
+
"""
|
|
104
145
|
cli = TaskmuxCLI()
|
|
105
146
|
cli.tmux.kill_task(task)
|
|
106
147
|
|
|
@@ -113,7 +154,11 @@ def logs(
|
|
|
113
154
|
grep: str | None = typer.Option(None, "-g", "--grep", help="Filter logs by pattern"),
|
|
114
155
|
context: int = typer.Option(3, "-C", "--context", help="Context lines around grep matches"),
|
|
115
156
|
):
|
|
116
|
-
"""Show logs for a task, or
|
|
157
|
+
"""Show logs for a task, or interleaved logs from all tasks.
|
|
158
|
+
|
|
159
|
+
Without -f, prints recent output. With -f, follows logs live with colored
|
|
160
|
+
task prefixes. Use -g to grep across tasks and -C for context lines.
|
|
161
|
+
"""
|
|
117
162
|
cli = TaskmuxCLI()
|
|
118
163
|
cli.tmux.show_logs(task, follow, lines, grep=grep, context=context)
|
|
119
164
|
|
|
@@ -122,7 +167,11 @@ def logs(
|
|
|
122
167
|
def inspect(
|
|
123
168
|
task: str = typer.Argument(..., help="Task name to inspect"),
|
|
124
169
|
):
|
|
125
|
-
"""Inspect task state as JSON.
|
|
170
|
+
"""Inspect task state as JSON.
|
|
171
|
+
|
|
172
|
+
Returns detailed info: name, command, restart_policy, running/healthy status,
|
|
173
|
+
pid, pane command, cwd, window/pane IDs, health_check, and depends_on.
|
|
174
|
+
"""
|
|
126
175
|
cli = TaskmuxCLI()
|
|
127
176
|
data = cli.tmux.inspect_task(task)
|
|
128
177
|
console.print_json(json.dumps(data))
|
|
@@ -138,7 +187,7 @@ def add(
|
|
|
138
187
|
None, "--depends-on", help="Dependency task names"
|
|
139
188
|
),
|
|
140
189
|
):
|
|
141
|
-
"""Add a new task."""
|
|
190
|
+
"""Add a new task to taskmux.toml."""
|
|
142
191
|
addTask(None, task, command, cwd=cwd, health_check=health_check, depends_on=depends_on)
|
|
143
192
|
console.print(f"Added task '{task}': {command}")
|
|
144
193
|
|
|
@@ -147,7 +196,7 @@ def add(
|
|
|
147
196
|
def remove(
|
|
148
197
|
task: str = typer.Argument(..., help="Task name to remove"),
|
|
149
198
|
):
|
|
150
|
-
"""Remove a task."""
|
|
199
|
+
"""Remove a task from taskmux.toml (kills it first if running)."""
|
|
151
200
|
cli = TaskmuxCLI()
|
|
152
201
|
|
|
153
202
|
if cli.tmux.session_exists():
|
|
@@ -160,16 +209,28 @@ def remove(
|
|
|
160
209
|
console.print(f"Task '{task}' not found in config", style="red")
|
|
161
210
|
|
|
162
211
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
212
|
+
def _status():
|
|
213
|
+
"""Show session and task status.
|
|
214
|
+
|
|
215
|
+
Lists all tasks with health indicators, running state, ports, restart policy
|
|
216
|
+
(if non-default), working directory, and dependencies. Aliases: list, ls.
|
|
217
|
+
"""
|
|
166
218
|
cli = TaskmuxCLI()
|
|
167
|
-
cli.tmux.
|
|
219
|
+
cli.tmux.list_tasks()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
app.command(name="status")(_status)
|
|
223
|
+
app.command(name="list", hidden=True)(_status)
|
|
224
|
+
app.command(name="ls", hidden=True)(_status)
|
|
168
225
|
|
|
169
226
|
|
|
170
227
|
@app.command()
|
|
171
228
|
def health():
|
|
172
|
-
"""Check health of all tasks.
|
|
229
|
+
"""Check health of all tasks.
|
|
230
|
+
|
|
231
|
+
Runs each task's health_check command (or falls back to pane-alive check).
|
|
232
|
+
Displays a table with health status for every configured task.
|
|
233
|
+
"""
|
|
173
234
|
cli = TaskmuxCLI()
|
|
174
235
|
|
|
175
236
|
if not cli.tmux.session_exists():
|
|
@@ -200,7 +261,11 @@ def health():
|
|
|
200
261
|
|
|
201
262
|
@app.command()
|
|
202
263
|
def watch():
|
|
203
|
-
"""Watch
|
|
264
|
+
"""Watch taskmux.toml for changes and reload on edit.
|
|
265
|
+
|
|
266
|
+
Stays in the foreground. When the config file changes, reloads it and
|
|
267
|
+
restarts affected tasks.
|
|
268
|
+
"""
|
|
204
269
|
cli = TaskmuxCLI()
|
|
205
270
|
watcher = SimpleConfigWatcher(cli)
|
|
206
271
|
watcher.watch_config()
|
|
@@ -210,7 +275,12 @@ def watch():
|
|
|
210
275
|
def daemon(
|
|
211
276
|
port: int = typer.Option(8765, "--port", help="WebSocket API port"),
|
|
212
277
|
):
|
|
213
|
-
"""Run in daemon mode with API.
|
|
278
|
+
"""Run in daemon mode with WebSocket API and health monitoring.
|
|
279
|
+
|
|
280
|
+
Monitors task health every 30s and auto-restarts per restart_policy with
|
|
281
|
+
exponential backoff. Watches config for changes. Exposes a WebSocket API
|
|
282
|
+
for status, restart, kill, and logs commands.
|
|
283
|
+
"""
|
|
214
284
|
d = TaskmuxDaemon(api_port=port)
|
|
215
285
|
asyncio.run(d.start())
|
|
216
286
|
|
|
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
|
|
7
7
|
import tomlkit
|
|
8
8
|
|
|
9
|
-
from .models import HookConfig, TaskConfig, TaskmuxConfig
|
|
9
|
+
from .models import HookConfig, RestartPolicy, TaskConfig, TaskmuxConfig
|
|
10
10
|
|
|
11
11
|
CONFIG_FILENAME = "taskmux.toml"
|
|
12
12
|
|
|
@@ -122,6 +122,8 @@ def writeConfig(path: Path | None, config: TaskmuxConfig) -> Path:
|
|
|
122
122
|
inner.add("max_restarts", task_cfg.max_restarts)
|
|
123
123
|
if task_cfg.restart_backoff != 2.0:
|
|
124
124
|
inner.add("restart_backoff", task_cfg.restart_backoff)
|
|
125
|
+
if task_cfg.restart_policy != RestartPolicy.ON_FAILURE:
|
|
126
|
+
inner.add("restart_policy", str(task_cfg.restart_policy))
|
|
125
127
|
if task_cfg.depends_on:
|
|
126
128
|
inner.add("depends_on", task_cfg.depends_on)
|
|
127
129
|
# Task-level hooks
|
|
@@ -39,26 +39,6 @@ class ConfigWatcher(FileSystemEventHandler):
|
|
|
39
39
|
self.taskmux_cli.handle_config_reload()
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
class RestartTracker:
|
|
43
|
-
"""Tracks per-task restart counts and timestamps for backoff."""
|
|
44
|
-
|
|
45
|
-
def __init__(self) -> None:
|
|
46
|
-
self._data: dict[str, dict[str, float]] = {}
|
|
47
|
-
|
|
48
|
-
def get(self, task_name: str) -> dict[str, float]:
|
|
49
|
-
return self._data.get(task_name, {"count": 0, "last": 0.0})
|
|
50
|
-
|
|
51
|
-
def record(self, task_name: str) -> None:
|
|
52
|
-
info = self.get(task_name)
|
|
53
|
-
self._data[task_name] = {
|
|
54
|
-
"count": info["count"] + 1,
|
|
55
|
-
"last": time.time(),
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
def reset(self, task_name: str) -> None:
|
|
59
|
-
self._data.pop(task_name, None)
|
|
60
|
-
|
|
61
|
-
|
|
62
42
|
class TaskmuxDaemon:
|
|
63
43
|
"""Daemon mode for Taskmux with enhanced monitoring and API"""
|
|
64
44
|
|
|
@@ -71,7 +51,6 @@ class TaskmuxDaemon:
|
|
|
71
51
|
self.health_check_interval = 30
|
|
72
52
|
self.health_check_task: asyncio.Task | None = None
|
|
73
53
|
self.websocket_clients: set = set()
|
|
74
|
-
self.restart_tracker = RestartTracker()
|
|
75
54
|
self.logger = self._setup_logging()
|
|
76
55
|
|
|
77
56
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
@@ -161,40 +140,9 @@ class TaskmuxDaemon:
|
|
|
161
140
|
await asyncio.sleep(5)
|
|
162
141
|
|
|
163
142
|
def _auto_restart_with_backoff(self) -> None:
|
|
164
|
-
"""Auto-restart
|
|
143
|
+
"""Auto-restart tasks using TmuxManager's restart policy logic."""
|
|
165
144
|
assert self.cli is not None
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
for task_name, task_cfg in self.cli.config.tasks.items():
|
|
169
|
-
healthy = self.cli.tmux.check_task_health(task_name)
|
|
170
|
-
|
|
171
|
-
if healthy:
|
|
172
|
-
# Reset tracker if healthy for >60s
|
|
173
|
-
info = self.restart_tracker.get(task_name)
|
|
174
|
-
if info["count"] > 0 and now - info["last"] > 60:
|
|
175
|
-
self.restart_tracker.reset(task_name)
|
|
176
|
-
continue
|
|
177
|
-
|
|
178
|
-
# Skip if not previously healthy (avoid restart loop on first check)
|
|
179
|
-
prev_health = self.cli.tmux.task_health.get(task_name, {}).get("healthy", True)
|
|
180
|
-
if not prev_health:
|
|
181
|
-
info = self.restart_tracker.get(task_name)
|
|
182
|
-
|
|
183
|
-
# Check max_restarts
|
|
184
|
-
if task_cfg.max_restarts and info["count"] >= task_cfg.max_restarts:
|
|
185
|
-
self.logger.warning(
|
|
186
|
-
f"Task '{task_name}' exceeded max restarts ({task_cfg.max_restarts})"
|
|
187
|
-
)
|
|
188
|
-
continue
|
|
189
|
-
|
|
190
|
-
# Check backoff delay
|
|
191
|
-
delay = min(task_cfg.restart_backoff ** info["count"], 60)
|
|
192
|
-
if info["last"] and now - info["last"] < delay:
|
|
193
|
-
continue
|
|
194
|
-
|
|
195
|
-
self.logger.info(f"Auto-restarting unhealthy task: {task_name}")
|
|
196
|
-
self.cli.tmux.restart_task(task_name)
|
|
197
|
-
self.restart_tracker.record(task_name)
|
|
145
|
+
self.cli.tmux.auto_restart_tasks()
|
|
198
146
|
|
|
199
147
|
async def _start_api_server(self) -> None:
|
|
200
148
|
"""Start WebSocket API server"""
|
|
@@ -1,10 +1,19 @@
|
|
|
1
1
|
"""Pydantic models for Taskmux configuration."""
|
|
2
2
|
|
|
3
3
|
import warnings
|
|
4
|
+
from enum import StrEnum
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, ConfigDict, model_validator
|
|
6
7
|
|
|
7
8
|
|
|
9
|
+
class RestartPolicy(StrEnum):
|
|
10
|
+
"""Docker-style restart policy for tasks."""
|
|
11
|
+
|
|
12
|
+
NO = "no"
|
|
13
|
+
ON_FAILURE = "on-failure"
|
|
14
|
+
ALWAYS = "always"
|
|
15
|
+
|
|
16
|
+
|
|
8
17
|
class _StrictConfig(BaseModel):
|
|
9
18
|
"""Base config: frozen, warns on unknown keys."""
|
|
10
19
|
|
|
@@ -45,6 +54,7 @@ class TaskConfig(_StrictConfig):
|
|
|
45
54
|
stop_grace_period: int = 5
|
|
46
55
|
max_restarts: int = 5
|
|
47
56
|
restart_backoff: float = 2.0
|
|
57
|
+
restart_policy: RestartPolicy = RestartPolicy.ON_FAILURE
|
|
48
58
|
depends_on: list[str] = []
|
|
49
59
|
hooks: HookConfig = HookConfig()
|
|
50
60
|
|
|
@@ -15,13 +15,52 @@ from rich.console import Console
|
|
|
15
15
|
from rich.markup import escape
|
|
16
16
|
|
|
17
17
|
from .hooks import runHook
|
|
18
|
-
from .models import TaskmuxConfig
|
|
18
|
+
from .models import RestartPolicy, TaskmuxConfig
|
|
19
19
|
|
|
20
20
|
SHELL_NAMES = frozenset(("bash", "zsh", "sh", "fish"))
|
|
21
21
|
|
|
22
22
|
TASK_COLORS = ["cyan", "green", "yellow", "magenta", "blue", "red"]
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
class RestartTracker:
|
|
26
|
+
"""Tracks per-task restart counts, health failures, and manual-stop state."""
|
|
27
|
+
|
|
28
|
+
def __init__(self) -> None:
|
|
29
|
+
self._data: dict[str, dict[str, float]] = {}
|
|
30
|
+
self._consecutive_failures: dict[str, int] = {}
|
|
31
|
+
self._manually_stopped: set[str] = set()
|
|
32
|
+
|
|
33
|
+
def get(self, task_name: str) -> dict[str, float]:
|
|
34
|
+
return self._data.get(task_name, {"count": 0, "last": 0.0})
|
|
35
|
+
|
|
36
|
+
def record(self, task_name: str) -> None:
|
|
37
|
+
info = self.get(task_name)
|
|
38
|
+
self._data[task_name] = {
|
|
39
|
+
"count": info["count"] + 1,
|
|
40
|
+
"last": time.time(),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def reset(self, task_name: str) -> None:
|
|
44
|
+
self._data.pop(task_name, None)
|
|
45
|
+
|
|
46
|
+
def record_health_failure(self, task_name: str) -> int:
|
|
47
|
+
count = self._consecutive_failures.get(task_name, 0) + 1
|
|
48
|
+
self._consecutive_failures[task_name] = count
|
|
49
|
+
return count
|
|
50
|
+
|
|
51
|
+
def reset_health_failures(self, task_name: str) -> None:
|
|
52
|
+
self._consecutive_failures.pop(task_name, None)
|
|
53
|
+
|
|
54
|
+
def mark_manually_stopped(self, task_name: str) -> None:
|
|
55
|
+
self._manually_stopped.add(task_name)
|
|
56
|
+
|
|
57
|
+
def clear_manually_stopped(self, task_name: str) -> None:
|
|
58
|
+
self._manually_stopped.discard(task_name)
|
|
59
|
+
|
|
60
|
+
def is_manually_stopped(self, task_name: str) -> bool:
|
|
61
|
+
return task_name in self._manually_stopped
|
|
62
|
+
|
|
63
|
+
|
|
25
64
|
def _find_new_lines(current: list[str], prev_tail: list[str]) -> list[str]:
|
|
26
65
|
"""Return lines in current that are new since prev_tail."""
|
|
27
66
|
if not prev_tail:
|
|
@@ -43,6 +82,7 @@ class TmuxManager:
|
|
|
43
82
|
self.server = libtmux.Server()
|
|
44
83
|
self.session: libtmux.Session | None = None
|
|
45
84
|
self.task_health: dict = {}
|
|
85
|
+
self.restart_tracker = RestartTracker()
|
|
46
86
|
self._refresh_session()
|
|
47
87
|
|
|
48
88
|
def _refresh_session(self) -> None:
|
|
@@ -232,6 +272,7 @@ class TmuxManager:
|
|
|
232
272
|
|
|
233
273
|
def start_task(self, task_name: str) -> None:
|
|
234
274
|
"""Start a single task (create window + send command)."""
|
|
275
|
+
self.restart_tracker.clear_manually_stopped(task_name)
|
|
235
276
|
if task_name not in self.config.tasks:
|
|
236
277
|
print(f"Task '{task_name}' not found in config")
|
|
237
278
|
return
|
|
@@ -286,6 +327,7 @@ class TmuxManager:
|
|
|
286
327
|
|
|
287
328
|
def stop_task(self, task_name: str) -> None:
|
|
288
329
|
"""Graceful stop with signal escalation: C-c → SIGTERM → SIGKILL."""
|
|
330
|
+
self.restart_tracker.mark_manually_stopped(task_name)
|
|
289
331
|
if not self.session_exists():
|
|
290
332
|
print(f"Session '{self.config.name}' doesn't exist")
|
|
291
333
|
return
|
|
@@ -396,6 +438,9 @@ class TmuxManager:
|
|
|
396
438
|
|
|
397
439
|
def stop_all(self) -> None:
|
|
398
440
|
"""Stop all tasks with signal escalation then kill session."""
|
|
441
|
+
for task_name in self.config.tasks:
|
|
442
|
+
self.restart_tracker.mark_manually_stopped(task_name)
|
|
443
|
+
|
|
399
444
|
if not self.session_exists():
|
|
400
445
|
print("No session running")
|
|
401
446
|
return
|
|
@@ -456,6 +501,7 @@ class TmuxManager:
|
|
|
456
501
|
|
|
457
502
|
def restart_task(self, task_name: str) -> None:
|
|
458
503
|
"""Restart a specific task with full stop escalation."""
|
|
504
|
+
self.restart_tracker.clear_manually_stopped(task_name)
|
|
459
505
|
if not self.session_exists():
|
|
460
506
|
print(f"Session '{self.config.name}' doesn't exist. Run 'taskmux start' first.")
|
|
461
507
|
return
|
|
@@ -508,6 +554,7 @@ class TmuxManager:
|
|
|
508
554
|
|
|
509
555
|
def kill_task(self, task_name: str) -> None:
|
|
510
556
|
"""Kill a specific task (process group + window)."""
|
|
557
|
+
self.restart_tracker.mark_manually_stopped(task_name)
|
|
511
558
|
if not self.session_exists():
|
|
512
559
|
print(f"Session '{self.config.name}' doesn't exist")
|
|
513
560
|
return
|
|
@@ -534,6 +581,7 @@ class TmuxManager:
|
|
|
534
581
|
"name": task_name,
|
|
535
582
|
"command": task_cfg.command,
|
|
536
583
|
"auto_start": task_cfg.auto_start,
|
|
584
|
+
"restart_policy": str(task_cfg.restart_policy),
|
|
537
585
|
"cwd": task_cfg.cwd,
|
|
538
586
|
"health_check": task_cfg.health_check,
|
|
539
587
|
"depends_on": task_cfg.depends_on,
|
|
@@ -694,8 +742,12 @@ class TmuxManager:
|
|
|
694
742
|
console.print(f"[{color}]{prefix}[/{color}] {escape(line)}")
|
|
695
743
|
|
|
696
744
|
def list_tasks(self) -> None:
|
|
697
|
-
"""List all tasks and their status"""
|
|
698
|
-
|
|
745
|
+
"""List all tasks and their status."""
|
|
746
|
+
exists = self.session_exists()
|
|
747
|
+
print(f"Session '{self.config.name}': {'Running' if exists else 'Stopped'}")
|
|
748
|
+
if exists:
|
|
749
|
+
windows = self.list_windows()
|
|
750
|
+
print(f"Active tasks: {len(windows)}")
|
|
699
751
|
print("-" * 70)
|
|
700
752
|
|
|
701
753
|
if not self.config.tasks:
|
|
@@ -709,22 +761,16 @@ class TmuxManager:
|
|
|
709
761
|
"Healthy" if status["healthy"] else "Running" if status["running"] else "Stopped"
|
|
710
762
|
)
|
|
711
763
|
auto = "" if task_cfg.auto_start else " [manual]"
|
|
764
|
+
port = f" :{task_cfg.port}" if task_cfg.port else ""
|
|
712
765
|
extras = ""
|
|
713
766
|
if task_cfg.cwd:
|
|
714
767
|
extras += f" cwd={task_cfg.cwd}"
|
|
768
|
+
if task_cfg.restart_policy != RestartPolicy.ON_FAILURE:
|
|
769
|
+
extras += f" restart={task_cfg.restart_policy}"
|
|
715
770
|
if task_cfg.depends_on:
|
|
716
771
|
extras += f" deps=[{','.join(task_cfg.depends_on)}]"
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
def show_status(self) -> None:
|
|
720
|
-
"""Show overall session status"""
|
|
721
|
-
exists = self.session_exists()
|
|
722
|
-
print(f"Session '{self.config.name}': {'Running' if exists else 'Stopped'} (libtmux)")
|
|
723
|
-
|
|
724
|
-
if exists:
|
|
725
|
-
windows = self.list_windows()
|
|
726
|
-
print(f"Active tasks: {len(windows)}")
|
|
727
|
-
self.list_tasks()
|
|
772
|
+
line = f"{health_icon} {status_text:8} {task_name:15}{port:7} {task_cfg.command}"
|
|
773
|
+
print(f"{line}{auto}{extras}")
|
|
728
774
|
|
|
729
775
|
def check_task_health(self, task_name: str) -> bool:
|
|
730
776
|
"""Check if a task is healthy"""
|
|
@@ -739,17 +785,68 @@ class TmuxManager:
|
|
|
739
785
|
|
|
740
786
|
return is_healthy
|
|
741
787
|
|
|
742
|
-
def
|
|
743
|
-
"""Auto-restart tasks
|
|
788
|
+
def auto_restart_tasks(self) -> None:
|
|
789
|
+
"""Auto-restart tasks based on restart_policy, health_retries, max_restarts, and backoff."""
|
|
744
790
|
if not self.session_exists():
|
|
745
791
|
return
|
|
746
792
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
793
|
+
now = time.time()
|
|
794
|
+
|
|
795
|
+
for task_name, task_cfg in self.config.tasks.items():
|
|
796
|
+
if task_cfg.restart_policy == RestartPolicy.NO:
|
|
797
|
+
continue
|
|
798
|
+
if self.restart_tracker.is_manually_stopped(task_name):
|
|
799
|
+
continue
|
|
800
|
+
|
|
801
|
+
healthy = self.check_task_health(task_name)
|
|
802
|
+
pane_alive = self._is_pane_alive(task_name)
|
|
803
|
+
|
|
804
|
+
if healthy:
|
|
805
|
+
self.restart_tracker.reset_health_failures(task_name)
|
|
806
|
+
# Reset restart tracker after 60s stable
|
|
807
|
+
info = self.restart_tracker.get(task_name)
|
|
808
|
+
if info["count"] > 0 and now - info["last"] > 60:
|
|
809
|
+
self.restart_tracker.reset(task_name)
|
|
810
|
+
continue
|
|
811
|
+
|
|
812
|
+
# "on-failure": restart on crash or health_retries exceeded
|
|
813
|
+
# "always": restart whenever pane is dead (even clean exit)
|
|
814
|
+
should_restart = False
|
|
815
|
+
|
|
816
|
+
if not pane_alive:
|
|
817
|
+
# Process exited — restart for both on-failure and always
|
|
818
|
+
should_restart = True
|
|
819
|
+
elif task_cfg.restart_policy == RestartPolicy.ON_FAILURE:
|
|
820
|
+
# Pane alive but health check failing — count consecutive failures
|
|
821
|
+
failures = self.restart_tracker.record_health_failure(task_name)
|
|
822
|
+
if failures >= task_cfg.health_retries:
|
|
823
|
+
should_restart = True
|
|
824
|
+
elif task_cfg.restart_policy == RestartPolicy.ALWAYS:
|
|
825
|
+
failures = self.restart_tracker.record_health_failure(task_name)
|
|
826
|
+
if failures >= task_cfg.health_retries:
|
|
827
|
+
should_restart = True
|
|
828
|
+
|
|
829
|
+
if not should_restart:
|
|
830
|
+
continue
|
|
831
|
+
|
|
832
|
+
# Check max_restarts limit
|
|
833
|
+
info = self.restart_tracker.get(task_name)
|
|
834
|
+
if task_cfg.max_restarts and info["count"] >= task_cfg.max_restarts:
|
|
835
|
+
continue
|
|
836
|
+
|
|
837
|
+
# Check backoff delay
|
|
838
|
+
delay = min(task_cfg.restart_backoff ** info["count"], 60)
|
|
839
|
+
if info["last"] and now - info["last"] < delay:
|
|
840
|
+
continue
|
|
841
|
+
|
|
842
|
+
print(f"Auto-restarting task: {task_name}")
|
|
843
|
+
self.restart_task(task_name)
|
|
844
|
+
self.restart_tracker.record(task_name)
|
|
845
|
+
self.restart_tracker.reset_health_failures(task_name)
|
|
846
|
+
|
|
847
|
+
def auto_restart_unhealthy_tasks(self) -> None:
|
|
848
|
+
"""Deprecated: use auto_restart_tasks() instead."""
|
|
849
|
+
self.auto_restart_tasks()
|
|
753
850
|
|
|
754
851
|
def stop_session(self) -> None:
|
|
755
852
|
"""Stop the entire tmux session (legacy, wraps stop_all)."""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|