taskmux 0.2.5__tar.gz → 0.2.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {taskmux-0.2.5 → taskmux-0.2.7}/PKG-INFO +34 -6
- {taskmux-0.2.5 → taskmux-0.2.7}/README.md +33 -5
- {taskmux-0.2.5 → taskmux-0.2.7}/pyproject.toml +1 -1
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/agent.py +5 -2
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/cli.py +23 -23
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/config.py +8 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/daemon.py +59 -2
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/models.py +4 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/tmux_manager.py +134 -21
- {taskmux-0.2.5 → taskmux-0.2.7}/.gitignore +0 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/LICENSE +0 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/__init__.py +0 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/hooks.py +0 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/init.py +0 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/main.py +0 -0
- {taskmux-0.2.5 → taskmux-0.2.7}/taskmux/templates/claude.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: taskmux
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.7
|
|
4
4
|
Summary: Modern tmux-based task manager for LLM development tools
|
|
5
5
|
Project-URL: Homepage, https://github.com/nc9/taskmux
|
|
6
6
|
Project-URL: Repository, https://github.com/nc9/taskmux
|
|
@@ -125,17 +125,22 @@ health_check = "test -f .migrate-complete"
|
|
|
125
125
|
[tasks.api]
|
|
126
126
|
command = "python manage.py runserver 0.0.0.0:8000"
|
|
127
127
|
cwd = "apps/api"
|
|
128
|
+
port = 8000
|
|
128
129
|
depends_on = ["migrate"]
|
|
129
130
|
health_check = "curl -sf http://localhost:8000/health"
|
|
131
|
+
stop_grace_period = 10
|
|
130
132
|
|
|
131
133
|
[tasks.worker]
|
|
132
134
|
command = "celery -A myapp worker -l info"
|
|
133
135
|
cwd = "apps/api"
|
|
134
136
|
depends_on = ["db"]
|
|
137
|
+
max_restarts = 3
|
|
138
|
+
restart_backoff = 3.0
|
|
135
139
|
|
|
136
140
|
[tasks.web]
|
|
137
141
|
command = "bun dev"
|
|
138
142
|
cwd = "apps/web"
|
|
143
|
+
port = 3000
|
|
139
144
|
depends_on = ["api"]
|
|
140
145
|
health_check = "curl -sf http://localhost:3000"
|
|
141
146
|
|
|
@@ -171,8 +176,8 @@ taskmux start storybook # Start a manual task
|
|
|
171
176
|
# Session
|
|
172
177
|
taskmux start # Start all auto_start tasks
|
|
173
178
|
taskmux start <task> # Start a single task
|
|
174
|
-
taskmux stop # Stop all tasks (
|
|
175
|
-
taskmux stop <task> # Stop a single task (
|
|
179
|
+
taskmux stop # Stop all tasks (C-c → SIGTERM → SIGKILL)
|
|
180
|
+
taskmux stop <task> # Stop a single task (signal escalation)
|
|
176
181
|
taskmux restart # Restart all tasks
|
|
177
182
|
taskmux restart <task> # Restart a single task
|
|
178
183
|
taskmux status # Show session status
|
|
@@ -206,8 +211,8 @@ taskmux daemon --port 8765 # Run with WebSocket API + auto-restart
|
|
|
206
211
|
|
|
207
212
|
### stop vs kill
|
|
208
213
|
|
|
209
|
-
- **`stop`** sends C-c
|
|
210
|
-
- **`kill`** destroys the window immediately.
|
|
214
|
+
- **`stop`** sends C-c, then escalates to SIGTERM → SIGKILL if the process doesn't exit within the grace period. Window stays alive so you can see exit output.
|
|
215
|
+
- **`kill`** kills the process group and destroys the window immediately.
|
|
211
216
|
|
|
212
217
|
## Configuration
|
|
213
218
|
|
|
@@ -226,7 +231,9 @@ after_stop = "echo done"
|
|
|
226
231
|
[tasks.server]
|
|
227
232
|
command = "python manage.py runserver"
|
|
228
233
|
cwd = "apps/api"
|
|
234
|
+
port = 8000
|
|
229
235
|
health_check = "curl -sf http://localhost:8000/health"
|
|
236
|
+
stop_grace_period = 10
|
|
230
237
|
depends_on = ["db"]
|
|
231
238
|
|
|
232
239
|
[tasks.server.hooks]
|
|
@@ -239,6 +246,7 @@ health_check = "pg_isready -h localhost"
|
|
|
239
246
|
[tasks.worker]
|
|
240
247
|
command = "celery worker -A myapp"
|
|
241
248
|
depends_on = ["db"]
|
|
249
|
+
max_restarts = 3
|
|
242
250
|
|
|
243
251
|
[tasks.tailwind]
|
|
244
252
|
command = "npx tailwindcss -w"
|
|
@@ -258,10 +266,14 @@ auto_start = false
|
|
|
258
266
|
| `tasks.<name>.command` | — | Shell command to run |
|
|
259
267
|
| `tasks.<name>.auto_start` | `true` | Start with `taskmux start` |
|
|
260
268
|
| `tasks.<name>.cwd` | — | Working directory for the task |
|
|
269
|
+
| `tasks.<name>.port` | — | Port to clean up before starting (kills orphaned listeners) |
|
|
261
270
|
| `tasks.<name>.health_check` | — | Shell command to check health (exit 0 = healthy) |
|
|
262
271
|
| `tasks.<name>.health_interval` | `10` | Seconds between health checks |
|
|
263
272
|
| `tasks.<name>.health_timeout` | `5` | Seconds before health check times out |
|
|
264
273
|
| `tasks.<name>.health_retries` | `3` | Consecutive failures before "unhealthy" |
|
|
274
|
+
| `tasks.<name>.stop_grace_period` | `5` | Seconds to wait after C-c before escalating to SIGTERM |
|
|
275
|
+
| `tasks.<name>.max_restarts` | `5` | Max auto-restarts in daemon mode before giving up (0 = unlimited) |
|
|
276
|
+
| `tasks.<name>.restart_backoff` | `2.0` | Multiplier for restart delay (1s, 2s, 4s, 8s… capped at 60s) |
|
|
265
277
|
| `tasks.<name>.depends_on` | `[]` | Task names that must be healthy before this task starts |
|
|
266
278
|
| `tasks.<name>.hooks.*` | — | Per-task lifecycle hooks (same fields as global) |
|
|
267
279
|
|
|
@@ -290,6 +302,20 @@ Hooks fire in this order:
|
|
|
290
302
|
|
|
291
303
|
If a `before_*` hook fails (non-zero exit), the action is aborted.
|
|
292
304
|
|
|
305
|
+
### Process Lifecycle
|
|
306
|
+
|
|
307
|
+
Taskmux ensures processes are fully stopped before restarting and that orphaned port listeners don't block new starts.
|
|
308
|
+
|
|
309
|
+
**Stop escalation** (`stop`, `restart`):
|
|
310
|
+
|
|
311
|
+
1. **C-c** (SIGINT) — waits `stop_grace_period` seconds (default 5)
|
|
312
|
+
2. **SIGTERM** to process group — waits 3 seconds
|
|
313
|
+
3. **SIGKILL** to process group — force kill
|
|
314
|
+
|
|
315
|
+
**Port cleanup** (`start`, `restart`): If `port` is configured, taskmux kills any process listening on that port before starting. This handles orphaned processes from crashed sessions.
|
|
316
|
+
|
|
317
|
+
**Auto-restart backoff** (daemon mode): When a task keeps crashing, restart delays increase exponentially (`restart_backoff` multiplier, capped at 60s). After `max_restarts` failures, the task is left stopped. The counter resets after 60 seconds of healthy uptime.
|
|
318
|
+
|
|
293
319
|
### Init & Agent Context
|
|
294
320
|
|
|
295
321
|
`taskmux init` bootstraps your project:
|
|
@@ -325,13 +351,15 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
325
351
|
|
|
326
352
|
## Daemon Mode
|
|
327
353
|
|
|
328
|
-
Run as a background daemon with WebSocket API and auto-restart:
|
|
354
|
+
Run as a background daemon with WebSocket API and auto-restart with exponential backoff:
|
|
329
355
|
|
|
330
356
|
```bash
|
|
331
357
|
taskmux daemon # Default port 8765
|
|
332
358
|
taskmux daemon --port 9000 # Custom port
|
|
333
359
|
```
|
|
334
360
|
|
|
361
|
+
The daemon monitors task health every 30 seconds. Unhealthy tasks are restarted with exponential backoff (controlled by `restart_backoff` and `max_restarts`). Tasks that stay healthy for 60+ seconds have their restart counter reset.
|
|
362
|
+
|
|
335
363
|
WebSocket API:
|
|
336
364
|
|
|
337
365
|
```javascript
|
|
@@ -90,17 +90,22 @@ health_check = "test -f .migrate-complete"
|
|
|
90
90
|
[tasks.api]
|
|
91
91
|
command = "python manage.py runserver 0.0.0.0:8000"
|
|
92
92
|
cwd = "apps/api"
|
|
93
|
+
port = 8000
|
|
93
94
|
depends_on = ["migrate"]
|
|
94
95
|
health_check = "curl -sf http://localhost:8000/health"
|
|
96
|
+
stop_grace_period = 10
|
|
95
97
|
|
|
96
98
|
[tasks.worker]
|
|
97
99
|
command = "celery -A myapp worker -l info"
|
|
98
100
|
cwd = "apps/api"
|
|
99
101
|
depends_on = ["db"]
|
|
102
|
+
max_restarts = 3
|
|
103
|
+
restart_backoff = 3.0
|
|
100
104
|
|
|
101
105
|
[tasks.web]
|
|
102
106
|
command = "bun dev"
|
|
103
107
|
cwd = "apps/web"
|
|
108
|
+
port = 3000
|
|
104
109
|
depends_on = ["api"]
|
|
105
110
|
health_check = "curl -sf http://localhost:3000"
|
|
106
111
|
|
|
@@ -136,8 +141,8 @@ taskmux start storybook # Start a manual task
|
|
|
136
141
|
# Session
|
|
137
142
|
taskmux start # Start all auto_start tasks
|
|
138
143
|
taskmux start <task> # Start a single task
|
|
139
|
-
taskmux stop # Stop all tasks (
|
|
140
|
-
taskmux stop <task> # Stop a single task (
|
|
144
|
+
taskmux stop # Stop all tasks (C-c → SIGTERM → SIGKILL)
|
|
145
|
+
taskmux stop <task> # Stop a single task (signal escalation)
|
|
141
146
|
taskmux restart # Restart all tasks
|
|
142
147
|
taskmux restart <task> # Restart a single task
|
|
143
148
|
taskmux status # Show session status
|
|
@@ -171,8 +176,8 @@ taskmux daemon --port 8765 # Run with WebSocket API + auto-restart
|
|
|
171
176
|
|
|
172
177
|
### stop vs kill
|
|
173
178
|
|
|
174
|
-
- **`stop`** sends C-c
|
|
175
|
-
- **`kill`** destroys the window immediately.
|
|
179
|
+
- **`stop`** sends C-c, then escalates to SIGTERM → SIGKILL if the process doesn't exit within the grace period. Window stays alive so you can see exit output.
|
|
180
|
+
- **`kill`** kills the process group and destroys the window immediately.
|
|
176
181
|
|
|
177
182
|
## Configuration
|
|
178
183
|
|
|
@@ -191,7 +196,9 @@ after_stop = "echo done"
|
|
|
191
196
|
[tasks.server]
|
|
192
197
|
command = "python manage.py runserver"
|
|
193
198
|
cwd = "apps/api"
|
|
199
|
+
port = 8000
|
|
194
200
|
health_check = "curl -sf http://localhost:8000/health"
|
|
201
|
+
stop_grace_period = 10
|
|
195
202
|
depends_on = ["db"]
|
|
196
203
|
|
|
197
204
|
[tasks.server.hooks]
|
|
@@ -204,6 +211,7 @@ health_check = "pg_isready -h localhost"
|
|
|
204
211
|
[tasks.worker]
|
|
205
212
|
command = "celery worker -A myapp"
|
|
206
213
|
depends_on = ["db"]
|
|
214
|
+
max_restarts = 3
|
|
207
215
|
|
|
208
216
|
[tasks.tailwind]
|
|
209
217
|
command = "npx tailwindcss -w"
|
|
@@ -223,10 +231,14 @@ auto_start = false
|
|
|
223
231
|
| `tasks.<name>.command` | — | Shell command to run |
|
|
224
232
|
| `tasks.<name>.auto_start` | `true` | Start with `taskmux start` |
|
|
225
233
|
| `tasks.<name>.cwd` | — | Working directory for the task |
|
|
234
|
+
| `tasks.<name>.port` | — | Port to clean up before starting (kills orphaned listeners) |
|
|
226
235
|
| `tasks.<name>.health_check` | — | Shell command to check health (exit 0 = healthy) |
|
|
227
236
|
| `tasks.<name>.health_interval` | `10` | Seconds between health checks |
|
|
228
237
|
| `tasks.<name>.health_timeout` | `5` | Seconds before health check times out |
|
|
229
238
|
| `tasks.<name>.health_retries` | `3` | Consecutive failures before "unhealthy" |
|
|
239
|
+
| `tasks.<name>.stop_grace_period` | `5` | Seconds to wait after C-c before escalating to SIGTERM |
|
|
240
|
+
| `tasks.<name>.max_restarts` | `5` | Max auto-restarts in daemon mode before giving up (0 = unlimited) |
|
|
241
|
+
| `tasks.<name>.restart_backoff` | `2.0` | Multiplier for restart delay (1s, 2s, 4s, 8s… capped at 60s) |
|
|
230
242
|
| `tasks.<name>.depends_on` | `[]` | Task names that must be healthy before this task starts |
|
|
231
243
|
| `tasks.<name>.hooks.*` | — | Per-task lifecycle hooks (same fields as global) |
|
|
232
244
|
|
|
@@ -255,6 +267,20 @@ Hooks fire in this order:
|
|
|
255
267
|
|
|
256
268
|
If a `before_*` hook fails (non-zero exit), the action is aborted.
|
|
257
269
|
|
|
270
|
+
### Process Lifecycle
|
|
271
|
+
|
|
272
|
+
Taskmux ensures processes are fully stopped before restarting and that orphaned port listeners don't block new starts.
|
|
273
|
+
|
|
274
|
+
**Stop escalation** (`stop`, `restart`):
|
|
275
|
+
|
|
276
|
+
1. **C-c** (SIGINT) — waits `stop_grace_period` seconds (default 5)
|
|
277
|
+
2. **SIGTERM** to process group — waits 3 seconds
|
|
278
|
+
3. **SIGKILL** to process group — force kill
|
|
279
|
+
|
|
280
|
+
**Port cleanup** (`start`, `restart`): If `port` is configured, taskmux kills any process listening on that port before starting. This handles orphaned processes from crashed sessions.
|
|
281
|
+
|
|
282
|
+
**Auto-restart backoff** (daemon mode): When a task keeps crashing, restart delays increase exponentially (`restart_backoff` multiplier, capped at 60s). After `max_restarts` failures, the task is left stopped. The counter resets after 60 seconds of healthy uptime.
|
|
283
|
+
|
|
258
284
|
### Init & Agent Context
|
|
259
285
|
|
|
260
286
|
`taskmux init` bootstraps your project:
|
|
@@ -290,13 +316,15 @@ Use `--defaults` to skip prompts (CI/automation).
|
|
|
290
316
|
|
|
291
317
|
## Daemon Mode
|
|
292
318
|
|
|
293
|
-
Run as a background daemon with WebSocket API and auto-restart:
|
|
319
|
+
Run as a background daemon with WebSocket API and auto-restart with exponential backoff:
|
|
294
320
|
|
|
295
321
|
```bash
|
|
296
322
|
taskmux daemon # Default port 8765
|
|
297
323
|
taskmux daemon --port 9000 # Custom port
|
|
298
324
|
```
|
|
299
325
|
|
|
326
|
+
The daemon monitors task health every 30 seconds. Unhealthy tasks are restarted with exponential backoff (controlled by `restart_backoff` and `max_restarts`). Tasks that stay healthy for 60+ seconds have their restart counter reset.
|
|
327
|
+
|
|
300
328
|
WebSocket API:
|
|
301
329
|
|
|
302
330
|
```javascript
|
|
@@ -40,9 +40,12 @@ def buildContextBlock(config: TaskmuxConfig) -> str:
|
|
|
40
40
|
]
|
|
41
41
|
|
|
42
42
|
if config.tasks:
|
|
43
|
+
lines.append("| Task | Port | Auto-start | Command |")
|
|
44
|
+
lines.append("|------|------|------------|---------|")
|
|
43
45
|
for name, task in config.tasks.items():
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
port = str(task.port) if task.port else "—"
|
|
47
|
+
auto = "yes" if task.auto_start else "no"
|
|
48
|
+
lines.append(f"| {name} | {port} | {auto} | `{task.command}` |")
|
|
46
49
|
else:
|
|
47
50
|
lines.append('_No tasks configured yet. Use `taskmux add <name> "<command>"` to add._')
|
|
48
51
|
|
|
@@ -53,45 +53,41 @@ def init(
|
|
|
53
53
|
initProject(defaults=defaults)
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
@app.command()
|
|
57
|
-
def list():
|
|
58
|
-
"""List all tasks and their status."""
|
|
59
|
-
cli = TaskmuxCLI()
|
|
60
|
-
cli.tmux.list_tasks()
|
|
61
|
-
|
|
62
|
-
|
|
63
56
|
@app.command()
|
|
64
57
|
def start(
|
|
65
|
-
|
|
58
|
+
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
66
59
|
):
|
|
67
|
-
"""Start
|
|
60
|
+
"""Start tasks (all if none specified)."""
|
|
68
61
|
cli = TaskmuxCLI()
|
|
69
|
-
if
|
|
70
|
-
|
|
62
|
+
if tasks:
|
|
63
|
+
for task in tasks:
|
|
64
|
+
cli.tmux.start_task(task)
|
|
71
65
|
else:
|
|
72
66
|
cli.tmux.start_all()
|
|
73
67
|
|
|
74
68
|
|
|
75
69
|
@app.command()
|
|
76
70
|
def stop(
|
|
77
|
-
|
|
71
|
+
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
78
72
|
):
|
|
79
|
-
"""Stop
|
|
73
|
+
"""Stop tasks (all if none specified)."""
|
|
80
74
|
cli = TaskmuxCLI()
|
|
81
|
-
if
|
|
82
|
-
|
|
75
|
+
if tasks:
|
|
76
|
+
for task in tasks:
|
|
77
|
+
cli.tmux.stop_task(task)
|
|
83
78
|
else:
|
|
84
79
|
cli.tmux.stop_all()
|
|
85
80
|
|
|
86
81
|
|
|
87
82
|
@app.command()
|
|
88
83
|
def restart(
|
|
89
|
-
|
|
84
|
+
tasks: list[str] = typer.Argument(None, help="Task names (omit for all)"), # noqa: B008
|
|
90
85
|
):
|
|
91
|
-
"""Restart
|
|
86
|
+
"""Restart tasks (all if none specified)."""
|
|
92
87
|
cli = TaskmuxCLI()
|
|
93
|
-
if
|
|
94
|
-
|
|
88
|
+
if tasks:
|
|
89
|
+
for task in tasks:
|
|
90
|
+
cli.tmux.restart_task(task)
|
|
95
91
|
else:
|
|
96
92
|
cli.tmux.restart_all()
|
|
97
93
|
|
|
@@ -160,11 +156,15 @@ def remove(
|
|
|
160
156
|
console.print(f"Task '{task}' not found in config", style="red")
|
|
161
157
|
|
|
162
158
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
"""Show session status."""
|
|
159
|
+
def _status():
|
|
160
|
+
"""Show session and task status."""
|
|
166
161
|
cli = TaskmuxCLI()
|
|
167
|
-
cli.tmux.
|
|
162
|
+
cli.tmux.list_tasks()
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
app.command(name="status")(_status)
|
|
166
|
+
app.command(name="list", hidden=True)(_status)
|
|
167
|
+
app.command(name="ls", hidden=True)(_status)
|
|
168
168
|
|
|
169
169
|
|
|
170
170
|
@app.command()
|
|
@@ -106,6 +106,8 @@ def writeConfig(path: Path | None, config: TaskmuxConfig) -> Path:
|
|
|
106
106
|
inner.add("auto_start", False)
|
|
107
107
|
if task_cfg.cwd is not None:
|
|
108
108
|
inner.add("cwd", task_cfg.cwd)
|
|
109
|
+
if task_cfg.port is not None:
|
|
110
|
+
inner.add("port", task_cfg.port)
|
|
109
111
|
if task_cfg.health_check is not None:
|
|
110
112
|
inner.add("health_check", task_cfg.health_check)
|
|
111
113
|
if task_cfg.health_interval != 10:
|
|
@@ -114,6 +116,12 @@ def writeConfig(path: Path | None, config: TaskmuxConfig) -> Path:
|
|
|
114
116
|
inner.add("health_timeout", task_cfg.health_timeout)
|
|
115
117
|
if task_cfg.health_retries != 3:
|
|
116
118
|
inner.add("health_retries", task_cfg.health_retries)
|
|
119
|
+
if task_cfg.stop_grace_period != 5:
|
|
120
|
+
inner.add("stop_grace_period", task_cfg.stop_grace_period)
|
|
121
|
+
if task_cfg.max_restarts != 5:
|
|
122
|
+
inner.add("max_restarts", task_cfg.max_restarts)
|
|
123
|
+
if task_cfg.restart_backoff != 2.0:
|
|
124
|
+
inner.add("restart_backoff", task_cfg.restart_backoff)
|
|
117
125
|
if task_cfg.depends_on:
|
|
118
126
|
inner.add("depends_on", task_cfg.depends_on)
|
|
119
127
|
# Task-level hooks
|
|
@@ -39,6 +39,26 @@ class ConfigWatcher(FileSystemEventHandler):
|
|
|
39
39
|
self.taskmux_cli.handle_config_reload()
|
|
40
40
|
|
|
41
41
|
|
|
42
|
+
class RestartTracker:
|
|
43
|
+
"""Tracks per-task restart counts and timestamps for backoff."""
|
|
44
|
+
|
|
45
|
+
def __init__(self) -> None:
|
|
46
|
+
self._data: dict[str, dict[str, float]] = {}
|
|
47
|
+
|
|
48
|
+
def get(self, task_name: str) -> dict[str, float]:
|
|
49
|
+
return self._data.get(task_name, {"count": 0, "last": 0.0})
|
|
50
|
+
|
|
51
|
+
def record(self, task_name: str) -> None:
|
|
52
|
+
info = self.get(task_name)
|
|
53
|
+
self._data[task_name] = {
|
|
54
|
+
"count": info["count"] + 1,
|
|
55
|
+
"last": time.time(),
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
def reset(self, task_name: str) -> None:
|
|
59
|
+
self._data.pop(task_name, None)
|
|
60
|
+
|
|
61
|
+
|
|
42
62
|
class TaskmuxDaemon:
|
|
43
63
|
"""Daemon mode for Taskmux with enhanced monitoring and API"""
|
|
44
64
|
|
|
@@ -51,6 +71,7 @@ class TaskmuxDaemon:
|
|
|
51
71
|
self.health_check_interval = 30
|
|
52
72
|
self.health_check_task: asyncio.Task | None = None
|
|
53
73
|
self.websocket_clients: set = set()
|
|
74
|
+
self.restart_tracker = RestartTracker()
|
|
54
75
|
self.logger = self._setup_logging()
|
|
55
76
|
|
|
56
77
|
signal.signal(signal.SIGINT, self._signal_handler)
|
|
@@ -124,11 +145,11 @@ class TaskmuxDaemon:
|
|
|
124
145
|
self.logger.info("Taskmux daemon stopped")
|
|
125
146
|
|
|
126
147
|
async def _health_check_loop(self) -> None:
|
|
127
|
-
"""Continuous health checking loop"""
|
|
148
|
+
"""Continuous health checking loop with restart backoff."""
|
|
128
149
|
while self.running:
|
|
129
150
|
try:
|
|
130
151
|
if self.cli and self.cli.tmux.session_exists():
|
|
131
|
-
self.
|
|
152
|
+
self._auto_restart_with_backoff()
|
|
132
153
|
|
|
133
154
|
if self.websocket_clients:
|
|
134
155
|
status = await self._get_full_status()
|
|
@@ -139,6 +160,42 @@ class TaskmuxDaemon:
|
|
|
139
160
|
self.logger.error(f"Health check error: {e}")
|
|
140
161
|
await asyncio.sleep(5)
|
|
141
162
|
|
|
163
|
+
def _auto_restart_with_backoff(self) -> None:
|
|
164
|
+
"""Auto-restart unhealthy tasks with exponential backoff."""
|
|
165
|
+
assert self.cli is not None
|
|
166
|
+
now = time.time()
|
|
167
|
+
|
|
168
|
+
for task_name, task_cfg in self.cli.config.tasks.items():
|
|
169
|
+
healthy = self.cli.tmux.check_task_health(task_name)
|
|
170
|
+
|
|
171
|
+
if healthy:
|
|
172
|
+
# Reset tracker if healthy for >60s
|
|
173
|
+
info = self.restart_tracker.get(task_name)
|
|
174
|
+
if info["count"] > 0 and now - info["last"] > 60:
|
|
175
|
+
self.restart_tracker.reset(task_name)
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
# Skip if not previously healthy (avoid restart loop on first check)
|
|
179
|
+
prev_health = self.cli.tmux.task_health.get(task_name, {}).get("healthy", True)
|
|
180
|
+
if not prev_health:
|
|
181
|
+
info = self.restart_tracker.get(task_name)
|
|
182
|
+
|
|
183
|
+
# Check max_restarts
|
|
184
|
+
if task_cfg.max_restarts and info["count"] >= task_cfg.max_restarts:
|
|
185
|
+
self.logger.warning(
|
|
186
|
+
f"Task '{task_name}' exceeded max restarts ({task_cfg.max_restarts})"
|
|
187
|
+
)
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
# Check backoff delay
|
|
191
|
+
delay = min(task_cfg.restart_backoff ** info["count"], 60)
|
|
192
|
+
if info["last"] and now - info["last"] < delay:
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
self.logger.info(f"Auto-restarting unhealthy task: {task_name}")
|
|
196
|
+
self.cli.tmux.restart_task(task_name)
|
|
197
|
+
self.restart_tracker.record(task_name)
|
|
198
|
+
|
|
142
199
|
async def _start_api_server(self) -> None:
|
|
143
200
|
"""Start WebSocket API server"""
|
|
144
201
|
|
|
@@ -37,10 +37,14 @@ class TaskConfig(_StrictConfig):
|
|
|
37
37
|
command: str
|
|
38
38
|
auto_start: bool = True
|
|
39
39
|
cwd: str | None = None
|
|
40
|
+
port: int | None = None
|
|
40
41
|
health_check: str | None = None
|
|
41
42
|
health_interval: int = 10
|
|
42
43
|
health_timeout: int = 5
|
|
43
44
|
health_retries: int = 3
|
|
45
|
+
stop_grace_period: int = 5
|
|
46
|
+
max_restarts: int = 5
|
|
47
|
+
restart_backoff: float = 2.0
|
|
44
48
|
depends_on: list[str] = []
|
|
45
49
|
hooks: HookConfig = HookConfig()
|
|
46
50
|
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import contextlib
|
|
6
|
+
import os
|
|
7
|
+
import signal as sig
|
|
5
8
|
import subprocess
|
|
6
9
|
import time
|
|
7
10
|
from collections import deque
|
|
@@ -14,6 +17,8 @@ from rich.markup import escape
|
|
|
14
17
|
from .hooks import runHook
|
|
15
18
|
from .models import TaskmuxConfig
|
|
16
19
|
|
|
20
|
+
SHELL_NAMES = frozenset(("bash", "zsh", "sh", "fish"))
|
|
21
|
+
|
|
17
22
|
TASK_COLORS = ["cyan", "green", "yellow", "magenta", "blue", "red"]
|
|
18
23
|
|
|
19
24
|
|
|
@@ -74,11 +79,61 @@ class TmuxManager:
|
|
|
74
79
|
window = self._get_session().windows.get(window_name=task_name, default=None)
|
|
75
80
|
if window and window.active_pane:
|
|
76
81
|
cmd = getattr(window.active_pane, "pane_current_command", "")
|
|
77
|
-
return cmd != "" and cmd
|
|
82
|
+
return cmd != "" and cmd not in SHELL_NAMES
|
|
78
83
|
except Exception:
|
|
79
84
|
pass
|
|
80
85
|
return False
|
|
81
86
|
|
|
87
|
+
def _wait_for_exit(self, pane: libtmux.Pane, timeout: float) -> bool:
|
|
88
|
+
"""Poll pane_current_command until it returns to a shell or timeout."""
|
|
89
|
+
elapsed = 0.0
|
|
90
|
+
while elapsed < timeout:
|
|
91
|
+
time.sleep(0.5)
|
|
92
|
+
elapsed += 0.5
|
|
93
|
+
cmd = getattr(pane, "pane_current_command", "")
|
|
94
|
+
if cmd == "" or cmd in SHELL_NAMES:
|
|
95
|
+
return True
|
|
96
|
+
return False
|
|
97
|
+
|
|
98
|
+
def _get_pane_child_pid(self, pane: libtmux.Pane) -> int | None:
|
|
99
|
+
"""Get the child process PID running inside the pane's shell."""
|
|
100
|
+
shell_pid = getattr(pane, "pane_pid", None)
|
|
101
|
+
if not shell_pid:
|
|
102
|
+
return None
|
|
103
|
+
try:
|
|
104
|
+
result = subprocess.run(
|
|
105
|
+
["pgrep", "-P", str(shell_pid)],
|
|
106
|
+
capture_output=True,
|
|
107
|
+
text=True,
|
|
108
|
+
)
|
|
109
|
+
pids = result.stdout.strip().split("\n")
|
|
110
|
+
return int(pids[0]) if pids and pids[0] else None
|
|
111
|
+
except (ValueError, OSError):
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
def _kill_process_tree(self, pid: int, signal_num: int = sig.SIGKILL) -> None:
|
|
115
|
+
"""Kill process and all children via process group."""
|
|
116
|
+
try:
|
|
117
|
+
pgid = os.getpgid(pid)
|
|
118
|
+
os.killpg(pgid, signal_num)
|
|
119
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
def _cleanup_port(self, port: int) -> None:
|
|
123
|
+
"""Kill any process listening on port."""
|
|
124
|
+
try:
|
|
125
|
+
result = subprocess.run(
|
|
126
|
+
["lsof", "-ti", f":{port}"],
|
|
127
|
+
capture_output=True,
|
|
128
|
+
text=True,
|
|
129
|
+
)
|
|
130
|
+
for pid_str in result.stdout.strip().split("\n"):
|
|
131
|
+
if pid_str.strip():
|
|
132
|
+
with contextlib.suppress(ProcessLookupError, PermissionError, OSError):
|
|
133
|
+
os.kill(int(pid_str.strip()), sig.SIGKILL)
|
|
134
|
+
except OSError:
|
|
135
|
+
pass
|
|
136
|
+
|
|
82
137
|
def is_task_healthy(self, task_name: str) -> bool:
|
|
83
138
|
"""Check task health. Uses health_check command if configured, falls back to pane-alive."""
|
|
84
139
|
task_cfg = self.config.tasks.get(task_name)
|
|
@@ -188,6 +243,10 @@ class TmuxManager:
|
|
|
188
243
|
sess = self._get_session()
|
|
189
244
|
task_cfg = self.config.tasks[task_name]
|
|
190
245
|
|
|
246
|
+
# Kill anything occupying the port before starting
|
|
247
|
+
if task_cfg.port:
|
|
248
|
+
self._cleanup_port(task_cfg.port)
|
|
249
|
+
|
|
191
250
|
# Check if already running
|
|
192
251
|
existing = sess.windows.get(window_name=task_name, default=None)
|
|
193
252
|
if existing:
|
|
@@ -226,7 +285,7 @@ class TmuxManager:
|
|
|
226
285
|
print(f"Started task '{task_name}'")
|
|
227
286
|
|
|
228
287
|
def stop_task(self, task_name: str) -> None:
|
|
229
|
-
"""Graceful stop
|
|
288
|
+
"""Graceful stop with signal escalation: C-c → SIGTERM → SIGKILL."""
|
|
230
289
|
if not self.session_exists():
|
|
231
290
|
print(f"Session '{self.config.name}' doesn't exist")
|
|
232
291
|
return
|
|
@@ -249,8 +308,22 @@ class TmuxManager:
|
|
|
249
308
|
|
|
250
309
|
pane = window.active_pane
|
|
251
310
|
if pane:
|
|
311
|
+
# Phase 1: SIGINT (Ctrl+C)
|
|
252
312
|
pane.send_keys("C-c")
|
|
253
313
|
|
|
314
|
+
if not self._wait_for_exit(pane, timeout=task_cfg.stop_grace_period):
|
|
315
|
+
# Phase 2: SIGTERM via process group
|
|
316
|
+
pid = self._get_pane_child_pid(pane)
|
|
317
|
+
if pid:
|
|
318
|
+
self._kill_process_tree(pid, sig.SIGTERM)
|
|
319
|
+
|
|
320
|
+
if not self._wait_for_exit(pane, timeout=3):
|
|
321
|
+
# Phase 3: SIGKILL entire process group
|
|
322
|
+
if pid:
|
|
323
|
+
self._kill_process_tree(pid, sig.SIGKILL)
|
|
324
|
+
# Final wait for cleanup
|
|
325
|
+
self._wait_for_exit(pane, timeout=1)
|
|
326
|
+
|
|
254
327
|
# Hooks: task after_stop, then global after_stop
|
|
255
328
|
runHook(task_cfg.hooks.after_stop, task_name)
|
|
256
329
|
runHook(self.config.hooks.after_stop, task_name)
|
|
@@ -322,7 +395,7 @@ class TmuxManager:
|
|
|
322
395
|
print(f"Started session '{self.config.name}' with {len(auto_tasks)} tasks")
|
|
323
396
|
|
|
324
397
|
def stop_all(self) -> None:
|
|
325
|
-
"""Stop all tasks then kill session."""
|
|
398
|
+
"""Stop all tasks with signal escalation then kill session."""
|
|
326
399
|
if not self.session_exists():
|
|
327
400
|
print("No session running")
|
|
328
401
|
return
|
|
@@ -330,8 +403,9 @@ class TmuxManager:
|
|
|
330
403
|
# Global before_stop
|
|
331
404
|
runHook(self.config.hooks.before_stop)
|
|
332
405
|
|
|
333
|
-
#
|
|
406
|
+
# Phase 1: send C-c to all tasks
|
|
334
407
|
sess = self._get_session()
|
|
408
|
+
pane_map: dict[str, tuple[libtmux.Pane, int | None]] = {}
|
|
335
409
|
for task_name, task_cfg in self.config.tasks.items():
|
|
336
410
|
window = sess.windows.get(window_name=task_name, default=None)
|
|
337
411
|
if window:
|
|
@@ -339,7 +413,30 @@ class TmuxManager:
|
|
|
339
413
|
pane = window.active_pane
|
|
340
414
|
if pane:
|
|
341
415
|
pane.send_keys("C-c")
|
|
342
|
-
|
|
416
|
+
pid = self._get_pane_child_pid(pane)
|
|
417
|
+
pane_map[task_name] = (pane, pid)
|
|
418
|
+
|
|
419
|
+
# Wait for graceful exit (use max grace period across tasks)
|
|
420
|
+
max_grace = max((cfg.stop_grace_period for cfg in self.config.tasks.values()), default=5)
|
|
421
|
+
time.sleep(max_grace)
|
|
422
|
+
|
|
423
|
+
# Phase 2: SIGTERM then SIGKILL any survivors
|
|
424
|
+
for _name, (pane, pid) in pane_map.items():
|
|
425
|
+
cmd = getattr(pane, "pane_current_command", "")
|
|
426
|
+
if cmd and cmd not in SHELL_NAMES and pid:
|
|
427
|
+
self._kill_process_tree(pid, sig.SIGTERM)
|
|
428
|
+
|
|
429
|
+
time.sleep(1)
|
|
430
|
+
|
|
431
|
+
for _name, (pane, pid) in pane_map.items():
|
|
432
|
+
cmd = getattr(pane, "pane_current_command", "")
|
|
433
|
+
if cmd and cmd not in SHELL_NAMES and pid:
|
|
434
|
+
self._kill_process_tree(pid, sig.SIGKILL)
|
|
435
|
+
|
|
436
|
+
# Run after_stop hooks
|
|
437
|
+
for task_name in pane_map:
|
|
438
|
+
task_cfg = self.config.tasks[task_name]
|
|
439
|
+
runHook(task_cfg.hooks.after_stop, task_name)
|
|
343
440
|
|
|
344
441
|
sess.kill()
|
|
345
442
|
|
|
@@ -358,7 +455,7 @@ class TmuxManager:
|
|
|
358
455
|
self.start_all()
|
|
359
456
|
|
|
360
457
|
def restart_task(self, task_name: str) -> None:
|
|
361
|
-
"""Restart a specific task
|
|
458
|
+
"""Restart a specific task with full stop escalation."""
|
|
362
459
|
if not self.session_exists():
|
|
363
460
|
print(f"Session '{self.config.name}' doesn't exist. Run 'taskmux start' first.")
|
|
364
461
|
return
|
|
@@ -373,13 +470,25 @@ class TmuxManager:
|
|
|
373
470
|
|
|
374
471
|
window = sess.windows.get(window_name=task_name, default=None)
|
|
375
472
|
if window:
|
|
473
|
+
# Full stop with signal escalation
|
|
376
474
|
runHook(task_cfg.hooks.before_stop, task_name)
|
|
377
475
|
pane = window.active_pane
|
|
378
476
|
if pane:
|
|
379
477
|
pane.send_keys("C-c")
|
|
380
|
-
|
|
478
|
+
if not self._wait_for_exit(pane, timeout=task_cfg.stop_grace_period):
|
|
479
|
+
pid = self._get_pane_child_pid(pane)
|
|
480
|
+
if pid:
|
|
481
|
+
self._kill_process_tree(pid, sig.SIGTERM)
|
|
482
|
+
if not self._wait_for_exit(pane, timeout=3):
|
|
483
|
+
if pid:
|
|
484
|
+
self._kill_process_tree(pid, sig.SIGKILL)
|
|
485
|
+
self._wait_for_exit(pane, timeout=1)
|
|
381
486
|
runHook(task_cfg.hooks.after_stop, task_name)
|
|
382
487
|
|
|
488
|
+
# Port cleanup before restart
|
|
489
|
+
if task_cfg.port:
|
|
490
|
+
self._cleanup_port(task_cfg.port)
|
|
491
|
+
|
|
383
492
|
runHook(task_cfg.hooks.before_start, task_name)
|
|
384
493
|
pane = window.active_pane
|
|
385
494
|
if pane:
|
|
@@ -388,6 +497,9 @@ class TmuxManager:
|
|
|
388
497
|
pane.send_keys(command, enter=True)
|
|
389
498
|
runHook(task_cfg.hooks.after_start, task_name)
|
|
390
499
|
else:
|
|
500
|
+
# Port cleanup before start
|
|
501
|
+
if task_cfg.port:
|
|
502
|
+
self._cleanup_port(task_cfg.port)
|
|
391
503
|
runHook(task_cfg.hooks.before_start, task_name)
|
|
392
504
|
self._send_command_to_window(sess, task_name, command, task_cfg.cwd)
|
|
393
505
|
runHook(task_cfg.hooks.after_start, task_name)
|
|
@@ -395,13 +507,18 @@ class TmuxManager:
|
|
|
395
507
|
print(f"Restarted task '{task_name}'")
|
|
396
508
|
|
|
397
509
|
def kill_task(self, task_name: str) -> None:
|
|
398
|
-
"""Kill a specific task"""
|
|
510
|
+
"""Kill a specific task (process group + window)."""
|
|
399
511
|
if not self.session_exists():
|
|
400
512
|
print(f"Session '{self.config.name}' doesn't exist")
|
|
401
513
|
return
|
|
402
514
|
|
|
403
515
|
window = self._get_session().windows.get(window_name=task_name, default=None)
|
|
404
516
|
if window:
|
|
517
|
+
pane = window.active_pane
|
|
518
|
+
if pane:
|
|
519
|
+
pid = self._get_pane_child_pid(pane)
|
|
520
|
+
if pid:
|
|
521
|
+
self._kill_process_tree(pid)
|
|
405
522
|
window.kill()
|
|
406
523
|
print(f"Killed task '{task_name}'")
|
|
407
524
|
else:
|
|
@@ -577,8 +694,12 @@ class TmuxManager:
|
|
|
577
694
|
console.print(f"[{color}]{prefix}[/{color}] {escape(line)}")
|
|
578
695
|
|
|
579
696
|
def list_tasks(self) -> None:
|
|
580
|
-
"""List all tasks and their status"""
|
|
581
|
-
|
|
697
|
+
"""List all tasks and their status."""
|
|
698
|
+
exists = self.session_exists()
|
|
699
|
+
print(f"Session '{self.config.name}': {'Running' if exists else 'Stopped'}")
|
|
700
|
+
if exists:
|
|
701
|
+
windows = self.list_windows()
|
|
702
|
+
print(f"Active tasks: {len(windows)}")
|
|
582
703
|
print("-" * 70)
|
|
583
704
|
|
|
584
705
|
if not self.config.tasks:
|
|
@@ -592,22 +713,14 @@ class TmuxManager:
|
|
|
592
713
|
"Healthy" if status["healthy"] else "Running" if status["running"] else "Stopped"
|
|
593
714
|
)
|
|
594
715
|
auto = "" if task_cfg.auto_start else " [manual]"
|
|
716
|
+
port = f" :{task_cfg.port}" if task_cfg.port else ""
|
|
595
717
|
extras = ""
|
|
596
718
|
if task_cfg.cwd:
|
|
597
719
|
extras += f" cwd={task_cfg.cwd}"
|
|
598
720
|
if task_cfg.depends_on:
|
|
599
721
|
extras += f" deps=[{','.join(task_cfg.depends_on)}]"
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
def show_status(self) -> None:
|
|
603
|
-
"""Show overall session status"""
|
|
604
|
-
exists = self.session_exists()
|
|
605
|
-
print(f"Session '{self.config.name}': {'Running' if exists else 'Stopped'} (libtmux)")
|
|
606
|
-
|
|
607
|
-
if exists:
|
|
608
|
-
windows = self.list_windows()
|
|
609
|
-
print(f"Active tasks: {len(windows)}")
|
|
610
|
-
self.list_tasks()
|
|
722
|
+
line = f"{health_icon} {status_text:8} {task_name:15}{port:7} {task_cfg.command}"
|
|
723
|
+
print(f"{line}{auto}{extras}")
|
|
611
724
|
|
|
612
725
|
def check_task_health(self, task_name: str) -> bool:
|
|
613
726
|
"""Check if a task is healthy"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|