vibego 1.0.2__py3-none-any.whl → 1.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,137 +1,65 @@
1
1
  #!/usr/bin/env python3
2
- """Master Post-startup health check script.
2
+ """Master post-startup health check.
3
3
 
4
- Process:
5
- 1. Wait for a startup flag to appear in the master log.
6
- 2. Call MasterManager to start the specified worker (default hyphavibebotbackend).
7
- 3. Automatically discover the worker's chat_id(Prioritize the state file, then read the latest log).
8
- 4. Send a probe message to the chat through the Telegram Bot API to confirm that the sending is successful.
9
- 5. If any step fails, an exception is thrown and an attempt is made to notify the administrator.
10
-
11
- Note: This script will not automatically retry the restart and will only return a non-zero exit code for processing by the outer script.
4
+ This lightweight variant only verifies that the master process has emitted the
5
+ readiness marker in its log file. Worker bootstrap and Telegram probes are no
6
+ longer executed because master restart confirmation is now limited to the
7
+ controller process itself.
12
8
  """
13
9
  from __future__ import annotations
14
10
 
15
11
  import argparse
16
- import asyncio
17
12
  import json
18
13
  import os
19
- import re
20
14
  import sys
21
15
  import time
22
16
  from pathlib import Path
23
- from typing import Optional
24
- from urllib.error import URLError, HTTPError
25
17
  from urllib.request import Request, urlopen
26
18
 
27
- # Import the configuration and tools in master and reuse the project parsing logic
19
+ # Make sure the master module can be imported from the repository root
28
20
  ROOT_DIR = Path(__file__).resolve().parent.parent
29
21
  ROOT_DIR_STR = str(ROOT_DIR)
30
22
  if ROOT_DIR_STR not in sys.path:
31
- # Make sure the master module can be imported from the repository root
32
23
  sys.path.insert(0, ROOT_DIR_STR)
33
24
 
34
25
  import master # type: ignore
35
- from project_repository import ProjectRepository
26
+
36
27
  DEFAULT_MASTER_LOG = master.LOG_ROOT_PATH / "vibe.log"
37
28
  DEFAULT_TIMEOUT_MASTER = 60.0
38
- DEFAULT_TIMEOUT_PROBE = 15.0
39
- PROBE_TEXT = "hello"
40
- REPOSITORY = ProjectRepository(master.CONFIG_DB_PATH, master.CONFIG_PATH)
41
-
42
-
43
- def _load_project(project_id: str) -> master.ProjectConfig:
44
- """Get the project configuration based on the slug or bot name, and list the options on failure."""
45
-
46
- record = REPOSITORY.get_by_slug(project_id)
47
- if record is None:
48
- record = REPOSITORY.get_by_bot_name(project_id)
49
- if record is None:
50
- available = [r.project_slug for r in REPOSITORY.list_projects()]
51
- raise RuntimeError(f"No items found {project_id}, Optional items: {available}")
52
- return master.ProjectConfig.from_dict(record.to_dict())
29
+ MASTER_READY_MARKER = "Master Started, listening for administrator commands."
53
30
 
54
31
 
55
32
  def _wait_for_log_flag(path: Path, pattern: str, timeout: float) -> None:
56
- """Waits for a specific mark in the log within the timeout period."""
33
+ """Poll the master log until the readiness marker is detected or timeout."""
57
34
 
58
35
  deadline = time.monotonic() + timeout
59
- position = 0
36
+ if path.exists():
37
+ position = path.stat().st_size
38
+ initialized = True
39
+ else:
40
+ position = 0
41
+ initialized = False
60
42
  while time.monotonic() < deadline:
61
43
  if path.exists():
62
- if position == 0:
63
- position = path.stat().st_size
44
+ if not initialized:
45
+ position = 0
46
+ initialized = True
64
47
  with path.open("r", encoding="utf-8", errors="ignore") as fh:
65
48
  fh.seek(position)
66
49
  while time.monotonic() < deadline:
67
50
  line = fh.readline()
68
51
  if not line:
69
52
  time.sleep(0.5)
70
- continue
53
+ break
71
54
  position = fh.tell()
72
55
  if pattern in line:
73
56
  return
74
57
  time.sleep(0.5)
75
- raise TimeoutError(f"exist {timeout:.0f} No log markers detected in seconds: {pattern}")
76
-
77
-
78
- def _extract_chat_id_from_logs(log_path: Path) -> Optional[int]:
79
- """Find the most recent chat from the log file in reverse order_id."""
80
-
81
- if not log_path.exists():
82
- return None
83
- pattern = re.compile(r"chat=(-?\d+)")
84
- try:
85
- lines = log_path.read_text(encoding="utf-8", errors="ignore").splitlines()
86
- except Exception:
87
- return None
88
- for line in reversed(lines[-200:]): # Reverse search for recent records
89
- match = pattern.search(line)
90
- if match:
91
- try:
92
- return int(match.group(1))
93
- except ValueError:
94
- continue
95
- return None
96
-
97
-
98
- def _ensure_chat_id(cfg: master.ProjectConfig, manager: master.MasterManager) -> int:
99
- """Make sure the task is assigned chat_id, Backfill from log and write back to state if necessary."""
100
-
101
- state = manager.state_store.data.get(cfg.project_slug)
102
- if state and state.chat_id:
103
- return int(state.chat_id)
104
- # Fall back to log search
105
- log_dir = master.LOG_ROOT_PATH / (cfg.default_model.lower()) / cfg.project_slug
106
- chat_id = _extract_chat_id_from_logs(log_dir / "run_bot.log")
107
- if chat_id is None:
108
- raise RuntimeError(
109
- "Unable to get chat automatically_id, Please manually have a conversation with the bot to write the state/log"
110
- )
111
- # will discover chat_id Write back the state for easy reuse next time
112
- manager.state_store.update(cfg.project_slug, chat_id=chat_id)
113
- return chat_id
114
-
115
-
116
- def _send_probe(bot_token: str, chat_id: int, text: str, timeout: float) -> None:
117
- """Send a probe message to the specified chat to verify that the Telegram API is available."""
118
-
119
- url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
120
- payload = json.dumps({"chat_id": chat_id, "text": text, "disable_notification": True}).encode("utf-8")
121
- request = Request(url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
122
- try:
123
- with urlopen(request, timeout=timeout) as resp:
124
- data = json.loads(resp.read().decode("utf-8"))
125
- except HTTPError as exc: # pragma: no cover - Thrown when network exception occurs
126
- raise RuntimeError(f"Failed to send probe message, HTTP {exc.code}: {exc.reason}") from exc
127
- except URLError as exc: # pragma: no cover - Thrown when network exception occurs
128
- raise RuntimeError(f"Failed to send probe message: {exc}") from exc
129
- if not data.get("ok"):
130
- raise RuntimeError(f"Failed to send probe message: {data}")
58
+ raise TimeoutError(f"No log markers detected within {timeout:.0f}s: {pattern}")
131
59
 
132
60
 
133
61
  def _format_admin_notice(reason: str) -> str:
134
- """Generate alarm text to notify the administrator."""
62
+ """Compose the notification text for administrator alerts."""
135
63
 
136
64
  return (
137
65
  "Master Restart health check failed\n"
@@ -141,7 +69,7 @@ def _format_admin_notice(reason: str) -> str:
141
69
 
142
70
 
143
71
  def _notify_admins(reason: str) -> None:
144
- """If the master token is available, the failure reason is broadcast to the administrator list."""
72
+ """Broadcast the failure reason to administrators if the master token exists."""
145
73
 
146
74
  master_token = os.environ.get("MASTER_BOT_TOKEN")
147
75
  if not master_token:
@@ -163,58 +91,30 @@ def _notify_admins(reason: str) -> None:
163
91
  continue
164
92
 
165
93
 
166
- def _ensure_worker(cfg: master.ProjectConfig) -> master.MasterManager:
167
- """Starts the specified project worker and returns the temporarily constructed MasterManager."""
168
-
169
- records = REPOSITORY.list_projects()
170
- configs = [master.ProjectConfig.from_dict(record.to_dict()) for record in records]
171
- state_store = master.StateStore(
172
- master.STATE_PATH, {item.project_slug: item for item in configs}
173
- )
174
- manager = master.MasterManager(configs, state_store=state_store)
175
-
176
- async def _run() -> None:
177
- """The coroutine performs the actual stop/start process."""
178
- # Make sure to stop the old instance first(If it existsexist)
179
- try:
180
- await manager.stop_worker(cfg)
181
- except Exception:
182
- pass
183
- await manager.run_worker(cfg)
184
-
185
- asyncio.run(_run())
186
- return manager
187
-
188
-
189
94
  def main() -> int:
190
- """Command line entry, performs master health check and returns exit code."""
191
-
192
- parser = argparse.ArgumentParser(description="Master Post-launch health check")
193
- parser.add_argument("--project", default="hyphavibebotbackend", help="Project slug or bot name")
194
- parser.add_argument("--master-log", default=str(DEFAULT_MASTER_LOG), help="master Log path")
195
- parser.add_argument("--master-timeout", type=float, default=DEFAULT_TIMEOUT_MASTER, help="master Log wait timeout (Second)")
196
- parser.add_argument("--probe-timeout", type=float, default=DEFAULT_TIMEOUT_PROBE, help="Telegram Probe timeout (Second)")
95
+ """Command line entry point, only validates master readiness."""
96
+
97
+ parser = argparse.ArgumentParser(description="Master post-launch health check (master only)")
98
+ parser.add_argument("--master-log", default=str(DEFAULT_MASTER_LOG), help="Master log path")
99
+ parser.add_argument(
100
+ "--master-timeout",
101
+ type=float,
102
+ default=DEFAULT_TIMEOUT_MASTER,
103
+ help="Master log wait timeout (seconds)",
104
+ )
197
105
  args = parser.parse_args()
198
106
 
199
- project_id = master._sanitize_slug(args.project)
200
107
  master_log = Path(args.master_log)
201
108
 
202
109
  try:
203
- _wait_for_log_flag(master_log, "Master Started, listening for administrator commands.", args.master_timeout)
204
- cfg = _load_project(project_id)
205
- manager = _ensure_worker(cfg)
206
- chat_id = _ensure_chat_id(cfg, manager)
207
- _send_probe(cfg.bot_token, chat_id, PROBE_TEXT, args.probe_timeout)
110
+ _wait_for_log_flag(master_log, MASTER_READY_MARKER, args.master_timeout)
208
111
  except Exception as exc:
209
112
  reason = str(exc)
210
113
  _notify_admins(reason)
211
114
  print(f"[healthcheck] fail: {reason}", file=sys.stderr)
212
115
  return 1
213
116
  else:
214
- print(
215
- "[healthcheck] success: master ready,"
216
- f"worker={cfg.display_name} Startup completed, chat_id={chat_id}, Probe message sent"
217
- )
117
+ print("[healthcheck] success: master ready, worker checks skipped by configuration")
218
118
  return 0
219
119
 
220
120
 
scripts/requirements.txt CHANGED
@@ -2,3 +2,4 @@ aiogram>=3.0.0,<4.0.0
2
2
  aiohttp-socks>=0.10.0
3
3
  aiosqlite>=0.19.0
4
4
  markdown-it-py>=3.0.0,<4.0.0
5
+ watchdog>=4.0.0
scripts/run_bot.sh CHANGED
@@ -90,6 +90,7 @@ LOG_DIR="$(log_dir_for "$MODEL" "$PROJECT_NAME")"
90
90
  MODEL_LOG="$LOG_DIR/model.log"
91
91
  RUN_LOG="$LOG_DIR/run_bot.log"
92
92
  POINTER_FILE="$LOG_DIR/${MODEL_POINTER_BASENAME:-current_session.txt}"
93
+ LOCK_FILE="${SESSION_LOCK_FILE:-$LOG_DIR/${MODEL_POINTER_LOCK_BASENAME:-session_lock.json}}"
93
94
  TMUX_SESSION="$(tmux_session_for "$PROJECT_NAME")"
94
95
 
95
96
  expand_model_workdir() {
@@ -167,6 +168,8 @@ export MODEL_SESSION_ROOT
167
168
  export MODEL_SESSION_GLOB
168
169
  export SESSION_POINTER_FILE="$POINTER_FILE"
169
170
  export CODEX_SESSION_FILE_PATH="$POINTER_FILE"
171
+ export SESSION_LOCK_FILE="$LOCK_FILE"
172
+ export SESSION_LOCK_FILE_PATH="$LOCK_FILE"
170
173
  export TMUX_SESSION="$TMUX_SESSION"
171
174
  export TMUX_LOG="$MODEL_LOG"
172
175
  export PROJECT_NAME="$PROJECT_NAME"
@@ -0,0 +1,265 @@
1
+ #!/usr/bin/env python3
2
+ """Capture the session JSONL path generated after a worker launch and lock it.
3
+
4
+ This helper watches the Codex/Claude session directory after the tmux worker
5
+ starts. The first rollout file that matches the configured working directory is
6
+ recorded into both the pointer file (used by the worker for streaming) and a
7
+ dedicated lock file so subsequent bindings never drift to another CLI session.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import json
14
+ import logging
15
+ import sys
16
+ import time
17
+ from datetime import datetime, timezone
18
+ from fnmatch import fnmatch
19
+ from pathlib import Path
20
+ from threading import Event, Lock
21
+ from typing import Iterable, Optional
22
+
23
+ try:
24
+ from watchdog.events import FileSystemEvent, FileSystemEventHandler
25
+ from watchdog.observers import Observer
26
+ except Exception: # pragma: no cover - watchdog is an optional dependency fallback
27
+ Observer = None
28
+ FileSystemEventHandler = object # type: ignore[assignment]
29
+
30
+
31
+ log = logging.getLogger("session_pointer_watch")
32
+
33
+
34
+ def _resolve(path: str) -> Path:
35
+ return Path(path).expanduser().resolve()
36
+
37
+
38
+ def _read_session_cwd(path: Path) -> Optional[str]:
39
+ """Read first JSON line and return payload.cwd when available."""
40
+
41
+ try:
42
+ with path.open("r", encoding="utf-8", errors="ignore") as fh:
43
+ first_line = fh.readline()
44
+ except OSError:
45
+ return None
46
+ if not first_line:
47
+ return None
48
+ try:
49
+ data = json.loads(first_line)
50
+ except json.JSONDecodeError:
51
+ return None
52
+ payload = data.get("payload")
53
+ if isinstance(payload, dict):
54
+ raw = payload.get("cwd")
55
+ if isinstance(raw, str):
56
+ return raw
57
+ return None
58
+
59
+
60
+ def _iter_candidate_files(roots: Iterable[Path], glob: str) -> Iterable[Path]:
61
+ for root in roots:
62
+ if not root.exists():
63
+ continue
64
+ try:
65
+ real_root = root.resolve()
66
+ except OSError:
67
+ real_root = root
68
+ yield from real_root.glob(f"**/{glob}")
69
+
70
+
71
+ class _RolloutCapture(FileSystemEventHandler):
72
+ """Handle filesystem events and pick the first rollout that matches criteria."""
73
+
74
+ def __init__(
75
+ self,
76
+ *,
77
+ pattern: str,
78
+ baseline: set[str],
79
+ start_wall: float,
80
+ start_monotonic: float,
81
+ target_cwd: Optional[str],
82
+ timeout: float,
83
+ poll_interval: float,
84
+ ) -> None:
85
+ self._pattern = pattern
86
+ self._baseline = baseline
87
+ self._start_wall = start_wall
88
+ self._target_cwd = target_cwd
89
+ self._deadline = start_monotonic + timeout
90
+ self._poll_interval = poll_interval
91
+ self._chosen: Optional[Path] = None
92
+ self._event = Event()
93
+ self._lock = Lock()
94
+
95
+ def _consider(self, candidate: Path) -> None:
96
+ if candidate.is_dir():
97
+ return
98
+ name = candidate.name
99
+ if not fnmatch(name, self._pattern):
100
+ return
101
+ try:
102
+ real_path = candidate.resolve()
103
+ except OSError:
104
+ real_path = candidate
105
+ real_key = str(real_path)
106
+ if real_key in self._baseline:
107
+ return
108
+ try:
109
+ stat = real_path.stat()
110
+ except OSError:
111
+ return
112
+ if stat.st_mtime + 0.01 < self._start_wall:
113
+ # Ignore historical files.
114
+ return
115
+
116
+ if self._target_cwd:
117
+ # Wait until the JSON header is flushed and matches our CWD.
118
+ deadline = time.monotonic() + self._poll_interval * 10
119
+ while time.monotonic() < deadline:
120
+ cwd = _read_session_cwd(real_path)
121
+ if cwd is None:
122
+ time.sleep(self._poll_interval)
123
+ continue
124
+ if cwd == self._target_cwd:
125
+ break
126
+ log.debug("Skip rollout with mismatched cwd=%s", cwd)
127
+ return
128
+ with self._lock:
129
+ if self._chosen is None:
130
+ self._chosen = real_path
131
+ self._event.set()
132
+
133
+ # The following methods are only called when watchdog is available.
134
+ def on_created(self, event: FileSystemEvent) -> None: # type: ignore[override]
135
+ if getattr(event, "is_directory", False):
136
+ return
137
+ self._consider(Path(event.src_path))
138
+
139
+ def on_moved(self, event: FileSystemEvent) -> None: # type: ignore[override]
140
+ if getattr(event, "is_directory", False):
141
+ return
142
+ self._consider(Path(event.dest_path))
143
+
144
+ def poll_until_found(self, roots: Iterable[Path]) -> Optional[Path]:
145
+ while time.monotonic() < self._deadline:
146
+ remaining = self._deadline - time.monotonic()
147
+ if remaining <= 0:
148
+ break
149
+ wait_time = min(self._poll_interval, remaining)
150
+ if self._event.wait(timeout=wait_time):
151
+ break
152
+ for candidate in _iter_candidate_files(roots, self._pattern):
153
+ self._consider(candidate)
154
+ if self._event.is_set():
155
+ break
156
+ if self._event.is_set():
157
+ break
158
+ return self._chosen
159
+
160
+
161
+ def _write_pointer(pointer: Path, session_path: Path) -> None:
162
+ pointer.parent.mkdir(parents=True, exist_ok=True)
163
+ pointer.write_text(str(session_path), encoding="utf-8")
164
+
165
+
166
+ def _write_lock(lock_file: Path, *, session_path: Path, tmux_session: str, project: str, workdir: str, method: str) -> None:
167
+ payload = {
168
+ "session_path": str(session_path),
169
+ "captured_at": datetime.now(timezone.utc).isoformat(),
170
+ "tmux_session": tmux_session,
171
+ "project": project,
172
+ "workdir": workdir,
173
+ "method": method,
174
+ }
175
+ lock_file.parent.mkdir(parents=True, exist_ok=True)
176
+ lock_file.write_text(json.dumps(payload, indent=2), encoding="utf-8")
177
+
178
+
179
+ def main(argv: Optional[list[str]] = None) -> int:
180
+ parser = argparse.ArgumentParser(description="Capture newly created Codex/Claude rollout JSONL and write pointer+lock files.")
181
+ parser.add_argument("--pointer", required=True, help="Path to current_session.txt pointer file")
182
+ parser.add_argument("--lock", required=True, help="Path to persistent lock metadata JSON")
183
+ parser.add_argument("--session-root", required=False, help="Primary sessions root directory")
184
+ parser.add_argument("--additional-root", action="append", default=[], help="Extra directories to monitor for rollouts")
185
+ parser.add_argument("--glob", default="rollout-*.jsonl", help="Glob pattern for rollout files")
186
+ parser.add_argument("--workdir", default="", help="Model working directory, used to filter sessions")
187
+ parser.add_argument("--tmux-session", default="", help="tmux session name for diagnostics")
188
+ parser.add_argument("--project", default="", help="Project slug for diagnostics")
189
+ parser.add_argument("--timeout", type=float, default=180.0, help="Maximum seconds to wait for a new rollout")
190
+ parser.add_argument("--poll", type=float, default=0.5, help="Polling interval when watchdog is unavailable")
191
+ args = parser.parse_args(argv)
192
+
193
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
194
+
195
+ pointer_path = _resolve(args.pointer)
196
+ lock_path = _resolve(args.lock)
197
+ roots: list[Path] = []
198
+ if args.session_root:
199
+ roots.append(_resolve(args.session_root))
200
+ pointer_parent = pointer_path.parent
201
+ if pointer_parent not in roots:
202
+ roots.append(pointer_parent)
203
+ for raw in args.additional_root:
204
+ resolved = _resolve(raw)
205
+ if resolved not in roots:
206
+ roots.append(resolved)
207
+
208
+ baseline = {str(path.resolve()) for path in _iter_candidate_files(roots, args.glob)}
209
+ start_wall = time.time()
210
+ start_monotonic = time.monotonic()
211
+
212
+ capture = _RolloutCapture(
213
+ pattern=args.glob,
214
+ baseline=baseline,
215
+ start_wall=start_wall,
216
+ start_monotonic=start_monotonic,
217
+ target_cwd=args.workdir or None,
218
+ timeout=args.timeout,
219
+ poll_interval=max(args.poll, 0.1),
220
+ )
221
+
222
+ observer: Optional[Observer] = None
223
+ method = "watchdog"
224
+ if Observer is not None:
225
+ observer = Observer()
226
+ for root in roots:
227
+ if root.exists():
228
+ observer.schedule(capture, str(root), recursive=True)
229
+ observer.start()
230
+ else: # pragma: no cover - watchdog is optional
231
+ log.warning("watchdog not available, falling back to polling")
232
+ method = "polling"
233
+
234
+ try:
235
+ session_path = capture.poll_until_found(roots)
236
+ finally:
237
+ if observer is not None:
238
+ observer.stop()
239
+ observer.join(timeout=5)
240
+
241
+ if session_path is None:
242
+ log.error(
243
+ "Failed to detect rollout file within timeout %.1fs (tmux session=%s, project=%s)",
244
+ args.timeout,
245
+ args.tmux_session or "-",
246
+ args.project or "-",
247
+ )
248
+ return 1
249
+
250
+ _write_pointer(pointer_path, session_path)
251
+ _write_lock(
252
+ lock_path,
253
+ session_path=session_path,
254
+ tmux_session=args.tmux_session,
255
+ project=args.project,
256
+ workdir=args.workdir,
257
+ method=method,
258
+ )
259
+
260
+ log.info("Recorded session pointer -> %s", session_path)
261
+ return 0
262
+
263
+
264
+ if __name__ == "__main__":
265
+ sys.exit(main())
scripts/start.sh CHANGED
@@ -118,8 +118,9 @@ ensure_codex_installed() {
118
118
  ensure_codex_installed
119
119
 
120
120
  select_python_binary() {
121
- # Choose to meet CPython <=3.12 interpreter, disabled by default 3.13(pydantic-core There is no compatible wheel in the pipx base environment)
121
+ # Select a compatible CPython version; defaults accept 3.9-3.14 and can be overridden via env variables.
122
122
  local allow_py313="${VIBEGO_ALLOW_PY313:-}"
123
+ local supported_max_minor="${VIBEGO_MAX_MINOR:-14}"
123
124
  local candidates=()
124
125
  local chosen=""
125
126
  local name
@@ -152,18 +153,24 @@ select_python_binary() {
152
153
  if [[ -n "${VIBEGO_PYTHON:-}" && "$name" == "$VIBEGO_PYTHON" ]]; then
153
154
  explicit_override=1
154
155
  fi
155
- if [[ "$minor" =~ ^[0-9]+$ ]] && (( minor == 13 )) && [[ "$allow_py313" != "1" ]] && (( explicit_override == 0 )); then
156
- log_line "jump over ${name} (Version ${version_raw}): Python 3 is disabled by default.13, Configurable VIBEGO_ALLOW_PY313=1 cover" >&2
157
- continue
156
+ if [[ "$minor" =~ ^[0-9]+$ ]] && (( minor == 13 )) && (( explicit_override == 0 )); then
157
+ if [[ "$allow_py313" == "0" ]]; then
158
+ log_line "Skip ${name} (version ${version_raw}): disabled explicitly by VIBEGO_ALLOW_PY313=0" >&2
159
+ continue
160
+ fi
161
+ log_line "Detected ${name} (version ${version_raw}): Python 3.13 accepted by default; use VIBEGO_ALLOW_PY313=1 to prefer or 0 to disable" >&2
158
162
  fi
159
- if [[ "$minor" =~ ^[0-9]+$ ]] && (( minor > 13 )); then
160
- log_line "jump over ${name} (Version ${version_raw}): higher than 3.13" >&2
163
+ if [[ "$minor" =~ ^[0-9]+$ ]] && (( minor > supported_max_minor )) && (( explicit_override == 0 )); then
164
+ log_line "Skip ${name} (version ${version_raw}): above supported ceiling 3.${supported_max_minor}; override with VIBEGO_MAX_MINOR if needed" >&2
161
165
  continue
162
166
  fi
163
167
  if [[ "$minor" =~ ^[0-9]+$ ]] && (( minor < 9 )); then
164
168
  log_line "jump over ${name} (Version ${version_raw}): less than 3.9, May be missing official wheels" >&2
165
169
  continue
166
170
  fi
171
+ if [[ "$minor" =~ ^[0-9]+$ ]] && (( minor >= 14 )); then
172
+ log_line "Detected ${name} (version ${version_raw}): ensure dependencies support this Python version" >&2
173
+ fi
167
174
  chosen="$name"
168
175
  log_line "Using the Python interpreter:${chosen} (Version ${version_raw})" >&2
169
176
  break
@@ -438,13 +445,13 @@ fi
438
445
  log_info "master Started in background, PID=$MASTER_PID, Log writing ${LOG_FILE}"
439
446
 
440
447
  # Health check: wait for master to come online and verify key workers
441
- log_info "Start health check..."
448
+ log_info "Start master readiness check..."
442
449
  HEALTHCHECK_START=$(date +%s)
443
450
 
444
- if python scripts/master_healthcheck.py --project hyphavibebotbackend; then
451
+ if python scripts/master_healthcheck.py --master-log "$LOG_FILE"; then
445
452
  HEALTHCHECK_END=$(date +%s)
446
453
  HEALTHCHECK_DURATION=$((HEALTHCHECK_END - HEALTHCHECK_START))
447
- log_info "OK: Master health check passed (elapsed ${HEALTHCHECK_DURATION}s)"
454
+ log_info "OK: Master readiness confirmed (elapsed ${HEALTHCHECK_DURATION}s)"
448
455
  else
449
456
  HEALTHCHECK_END=$(date +%s)
450
457
  HEALTHCHECK_DURATION=$((HEALTHCHECK_END - HEALTHCHECK_START))
@@ -18,6 +18,11 @@ MODEL_WORKDIR="${MODEL_WORKDIR:-$ROOT_DIR}"
18
18
  MODEL_SESSION_ROOT="${MODEL_SESSION_ROOT:-${CODEX_SESSION_ROOT:-$HOME/.codex/sessions}}"
19
19
  MODEL_SESSION_GLOB="${MODEL_SESSION_GLOB:-rollout-*.jsonl}"
20
20
  SESSION_POINTER_FILE="${SESSION_POINTER_FILE:-$LOG_ROOT/${MODEL_NAME:-codex}/${PROJECT_NAME:-project}/current_session.txt}"
21
+ SESSION_LOCK_FILE="${SESSION_LOCK_FILE:-${SESSION_POINTER_FILE%.txt}.lock.json}"
22
+ SESSION_CAPTURE_TIMEOUT="${SESSION_CAPTURE_TIMEOUT:-180}"
23
+ SESSION_CAPTURE_POLL_INTERVAL="${SESSION_CAPTURE_POLL_INTERVAL:-0.5}"
24
+ # Ensure optional CODEX session root is always defined even under `set -u`
25
+ : "${CODEX_SESSIONS_ROOT:=}"
21
26
 
22
27
  # Avoid oh-my-zsh popping up update prompts in non-interactive environments
23
28
  export DISABLE_UPDATE_PROMPT="${DISABLE_UPDATE_PROMPT:-true}"
@@ -67,6 +72,7 @@ MODEL_SESSION_ROOT=$(expand_path "$MODEL_SESSION_ROOT")
67
72
  SESSION_POINTER_FILE=$(expand_path "$SESSION_POINTER_FILE")
68
73
  ensure_dir "$(dirname "$LOG_PATH")"
69
74
  ensure_dir "$(dirname "$SESSION_POINTER_FILE")"
75
+ ensure_dir "$(dirname "$SESSION_LOCK_FILE")"
70
76
 
71
77
  run_tmux() {
72
78
  if (( DRY_RUN )); then
@@ -143,5 +149,24 @@ if (( DRY_RUN )); then
143
149
  fi
144
150
 
145
151
  : > "$SESSION_POINTER_FILE"
152
+ rm -f "$SESSION_LOCK_FILE"
153
+
154
+ if (( ! DRY_RUN )); then
155
+ WATCH_ARGS=("$PYTHON_EXEC" "$ROOT_DIR/scripts/session_pointer_watch.py" "--pointer" "$SESSION_POINTER_FILE" "--lock" "$SESSION_LOCK_FILE" "--glob" "$MODEL_SESSION_GLOB" "--workdir" "$MODEL_WORKDIR" "--tmux-session" "$SESSION_NAME" "--project" "${PROJECT_NAME:-}")
156
+ if [[ -n "$MODEL_SESSION_ROOT" ]]; then
157
+ WATCH_ARGS+=("--session-root" "$MODEL_SESSION_ROOT")
158
+ fi
159
+ if [[ -n "$CODEX_SESSIONS_ROOT" ]]; then
160
+ WATCH_ARGS+=("--additional-root" "$CODEX_SESSIONS_ROOT")
161
+ fi
162
+ WATCH_ARGS+=("--additional-root" "$(dirname "$SESSION_POINTER_FILE")")
163
+ WATCH_ARGS+=("--additional-root" "$(dirname "$SESSION_POINTER_FILE")/sessions")
164
+ WATCH_ARGS+=("--timeout" "$SESSION_CAPTURE_TIMEOUT")
165
+ WATCH_ARGS+=("--poll" "$SESSION_CAPTURE_POLL_INTERVAL")
166
+ if ! "${WATCH_ARGS[@]}"; then
167
+ echo "Failed to capture Codex/Claude session log for tmux session '$SESSION_NAME'. Please check CLI startup output." >&2
168
+ exit 1
169
+ fi
170
+ fi
146
171
 
147
172
  exit 0