src-py-lib 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,950 @@
1
+ """Central structured logging for small CLIs and scripts.
2
+
3
+ Use `configure_logging()` once near process startup. Other modules should use
4
+ `logging.getLogger(__name__)` for human-readable operator messages and
5
+ `event()` / `log()` for structured JSONL events.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import ast
11
+ import contextlib
12
+ import contextvars
13
+ import datetime as _datetime
14
+ import json
15
+ import logging
16
+ import os
17
+ import secrets
18
+ import subprocess
19
+ import sys
20
+ import threading
21
+ import time
22
+ from collections.abc import Callable, Generator, Iterable, Mapping
23
+ from concurrent.futures import Executor, Future
24
+ from dataclasses import dataclass, field
25
+ from pathlib import Path
26
+ from typing import Any, Final, Self, cast
27
+
28
+ if sys.platform != "win32":
29
+ import resource
30
+
31
+ from pydantic import model_validator
32
+
33
+ from src_py_lib.utils.config import Config, config_field, config_snapshot
34
+
35
+ RUN: Final[str] = secrets.token_hex(4)
36
+ DEFAULT_LOGS_DIR: Final[Path] = Path("logs")
37
+ DEFAULT_RETAIN_FILES: Final[int] = 50
38
+ DEFAULT_LOG_FILE_LEVEL: Final[str] = "debug"
39
+ SRC_LOG_LEVEL: Final[str] = "SRC_LOG_LEVEL"
40
+ SRC_LOG_VERBOSE: Final[str] = "SRC_LOG_VERBOSE"
41
+ SRC_LOG_QUIET: Final[str] = "SRC_LOG_QUIET"
42
+ SRC_LOG_SILENT: Final[str] = "SRC_LOG_SILENT"
43
+ TRACE_SPAN_BYTES: Final[int] = 4
44
+ MEBIBYTE: Final[int] = 1024 * 1024
45
+ SECRET_FIELD_FRAGMENTS: Final[tuple[str, ...]] = (
46
+ "api_key",
47
+ "authorization",
48
+ "cookie",
49
+ "password",
50
+ "secret",
51
+ "token",
52
+ )
53
+ LOG_FIELD_ORDER: Final[tuple[str, ...]] = (
54
+ "ts",
55
+ "command",
56
+ "level",
57
+ "run",
58
+ "trace",
59
+ "span",
60
+ "parent_span",
61
+ "logger",
62
+ "event",
63
+ "phase",
64
+ "stage",
65
+ "message",
66
+ )
67
+
68
+ _STRUCTURED_EVENT_ATTR: Final[str] = "_src_py_lib_structured_event"
69
+ _STRUCTURED_FIELDS_ATTR: Final[str] = "_src_py_lib_structured_fields"
70
+ _HTTPCORE_RESPONSE_HEADERS_PREFIX: Final[str] = "receive_response_headers.complete return_value="
71
+ _HTTPX_REQUEST_PREFIX: Final[str] = "HTTP Request: "
72
+ _HTTP_DEPENDENCY_LOGGER_PREFIXES: Final[tuple[str, ...]] = ("httpx", "httpcore")
73
+ _CONTEXT: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar("src_py_lib_log_context")
74
+
75
+
76
+ @dataclass(frozen=True)
77
+ class LoggingSettings:
78
+ """Logging destinations and levels."""
79
+
80
+ logger_name: str = ""
81
+ terminal_level: str = "info"
82
+ log_file_level: str | None = None
83
+ log_file: Path | None = None
84
+ logs_dir: Path | None = DEFAULT_LOGS_DIR
85
+ run: str = RUN
86
+ retain_log_files: int = DEFAULT_RETAIN_FILES
87
+ suppress_http_dependency_logs: bool = True
88
+ resource_sample_interval_seconds: float | None = None
89
+
90
+
91
+ class LoggingConfig(Config):
92
+ """Config fields for logging-related CLI and environment options."""
93
+
94
+ src_log_level: str | None = config_field(
95
+ default="INFO",
96
+ env_var=SRC_LOG_LEVEL,
97
+ cli_flag="--src-log-level",
98
+ metavar="LEVEL",
99
+ help="Log level (default: INFO)",
100
+ )
101
+ verbose: bool = config_field(
102
+ default=False,
103
+ env_var=SRC_LOG_VERBOSE,
104
+ cli_flag="--verbose",
105
+ cli_aliases=("-v",),
106
+ cli_action="store_true",
107
+ help="Alias for --src-log-level DEBUG",
108
+ )
109
+ quiet: bool = config_field(
110
+ default=False,
111
+ env_var=SRC_LOG_QUIET,
112
+ cli_flag="--quiet",
113
+ cli_aliases=("-q",),
114
+ cli_action="store_true",
115
+ help="Alias for --src-log-level WARNING",
116
+ )
117
+ silent: bool = config_field(
118
+ default=False,
119
+ env_var=SRC_LOG_SILENT,
120
+ cli_flag="--silent",
121
+ cli_aliases=("-s",),
122
+ cli_action="store_true",
123
+ help="Alias for --src-log-level ERROR",
124
+ )
125
+
126
+ @model_validator(mode="after")
127
+ def validate_log_level_alias(self) -> Self:
128
+ """Require at most one alias for the terminal/log-file level."""
129
+ if sum((self.verbose, self.quiet, self.silent)) > 1:
130
+ raise ValueError("choose only one of --verbose/-v, --quiet/-q, or --silent/-s")
131
+ return self
132
+
133
+
134
+ def resolve_log_level_name(
135
+ config: object | None = None,
136
+ *,
137
+ log_level: str | None = None,
138
+ verbose: bool | None = None,
139
+ quiet: bool | None = None,
140
+ silent: bool | None = None,
141
+ ) -> str | None:
142
+ """Resolve common CLI log-level alias to a level name.
143
+
144
+ Alias flags intentionally only map to strings. Explicit log-level
145
+ values are returned unchanged so `configure_logging()` owns parsing
146
+ and fallback behavior.
147
+ """
148
+ resolved_verbose = verbose if verbose is not None else bool(getattr(config, "verbose", False))
149
+ resolved_quiet = quiet if quiet is not None else bool(getattr(config, "quiet", False))
150
+ resolved_silent = silent if silent is not None else bool(getattr(config, "silent", False))
151
+ if resolved_verbose:
152
+ return "DEBUG"
153
+ if resolved_quiet:
154
+ return "WARNING"
155
+ if resolved_silent:
156
+ return "ERROR"
157
+ if log_level is not None:
158
+ return log_level
159
+ return _src_log_level_from_config(config)
160
+
161
+
162
+ def logging_settings_from_config(
163
+ config: object | None = None,
164
+ *,
165
+ terminal_default: str = "INFO",
166
+ log_file_default: str | None = DEFAULT_LOG_FILE_LEVEL,
167
+ logger_name: str = "",
168
+ log_file: Path | None = None,
169
+ logs_dir: Path | None = DEFAULT_LOGS_DIR,
170
+ run: str = RUN,
171
+ retain_log_files: int = DEFAULT_RETAIN_FILES,
172
+ suppress_http_dependency_logs: bool = True,
173
+ resource_sample_interval_seconds: float | None = None,
174
+ ) -> LoggingSettings:
175
+ """Return `LoggingSettings` using common CLI log-level alias."""
176
+ explicit_level = resolve_log_level_name(config)
177
+ return LoggingSettings(
178
+ logger_name=logger_name,
179
+ terminal_level=explicit_level or terminal_default,
180
+ log_file_level=explicit_level or log_file_default,
181
+ log_file=log_file,
182
+ logs_dir=logs_dir,
183
+ run=run,
184
+ retain_log_files=retain_log_files,
185
+ suppress_http_dependency_logs=suppress_http_dependency_logs,
186
+ resource_sample_interval_seconds=resource_sample_interval_seconds,
187
+ )
188
+
189
+
190
+ @dataclass(frozen=True)
191
+ class _SpanContext:
192
+ trace: str
193
+ span: str
194
+ parent_span: str | None = None
195
+
196
+
197
+ _SPAN_CONTEXT: contextvars.ContextVar[_SpanContext | None] = contextvars.ContextVar(
198
+ "src_py_lib_span_context", default=None
199
+ )
200
+
201
+ _HTTP_METRICS_LOCK: Final[threading.Lock] = threading.Lock()
202
+ _HTTP_METRICS: dict[str, int] = {
203
+ "http_request_attempt_count": 0,
204
+ "http_request_bytes_total": 0,
205
+ "http_response_bytes_total": 0,
206
+ "http_retry_count": 0,
207
+ "http_2xx_count": 0,
208
+ "http_3xx_count": 0,
209
+ "http_4xx_count": 0,
210
+ "http_429_count": 0,
211
+ "http_5xx_count": 0,
212
+ "http_transport_error_count": 0,
213
+ }
214
+
215
+
216
+ @dataclass
217
+ class ResourceSampler:
218
+ """Emit optional process resource samples and summarize usage at run end."""
219
+
220
+ interval_seconds: float
221
+ _stop: threading.Event = field(init=False, default_factory=threading.Event)
222
+ _thread: threading.Thread | None = field(init=False, default=None)
223
+ _started_at: float = field(init=False, default_factory=time.perf_counter)
224
+ _last_sample_at: float = field(init=False, default_factory=time.perf_counter)
225
+ _last_cpu_seconds: float = field(init=False, default=0.0)
226
+ _start_usage: Any = field(init=False, default=None)
227
+ _peak_rss_bytes: int = field(init=False, default=0)
228
+
229
+ def __post_init__(self) -> None:
230
+ if self.interval_seconds < 0:
231
+ raise ValueError("resource_sample_interval_seconds must be >= 0")
232
+ self._start_usage = _resource_usage()
233
+ if self._start_usage is not None:
234
+ self._last_cpu_seconds = _cpu_seconds(self._start_usage)
235
+
236
+ def start(self) -> None:
237
+ """Start periodic sampling, if enabled by a positive interval."""
238
+ if self.interval_seconds <= 0:
239
+ return
240
+ context = contextvars.copy_context()
241
+ self._thread = threading.Thread(
242
+ target=context.run,
243
+ args=(self._loop,),
244
+ name="ResourceSampler",
245
+ daemon=True,
246
+ )
247
+ self._thread.start()
248
+ self.emit_sample()
249
+
250
+ def emit_sample(self) -> None:
251
+ """Emit one DEBUG `resource_sample` event."""
252
+ log("debug", "resource_sample", **self._sample_fields())
253
+
254
+ def stop_and_summary(self) -> dict[str, Any]:
255
+ """Stop periodic sampling and return run-end resource fields."""
256
+ if self.interval_seconds > 0:
257
+ self.emit_sample()
258
+ self._stop.set()
259
+ if self._thread is not None:
260
+ self._thread.join(timeout=2.0)
261
+ usage = _resource_usage()
262
+ summary: dict[str, Any] = {
263
+ "cpu_count_logical": os.cpu_count() or 0,
264
+ "num_threads": threading.active_count(),
265
+ }
266
+ file_descriptors = _num_file_descriptors()
267
+ if file_descriptors is not None:
268
+ summary["num_fds"] = file_descriptors
269
+ rss_bytes = _rss_bytes(usage)
270
+ if rss_bytes is not None:
271
+ self._peak_rss_bytes = max(self._peak_rss_bytes, rss_bytes)
272
+ if self._peak_rss_bytes:
273
+ summary["peak_rss_mb"] = _bytes_to_mib(self._peak_rss_bytes)
274
+ if usage is not None and self._start_usage is not None:
275
+ summary["cpu_user_seconds"] = round(
276
+ float(usage.ru_utime) - float(self._start_usage.ru_utime), 3
277
+ )
278
+ summary["cpu_system_seconds"] = round(
279
+ float(usage.ru_stime) - float(self._start_usage.ru_stime), 3
280
+ )
281
+ summary["io_read_count"] = int(usage.ru_inblock) - int(self._start_usage.ru_inblock)
282
+ summary["io_write_count"] = int(usage.ru_oublock) - int(self._start_usage.ru_oublock)
283
+ return summary
284
+
285
+ def _loop(self) -> None:
286
+ while not self._stop.wait(self.interval_seconds):
287
+ self.emit_sample()
288
+
289
+ def _sample_fields(self) -> dict[str, Any]:
290
+ now = time.perf_counter()
291
+ usage = _resource_usage()
292
+ fields: dict[str, Any] = {
293
+ "num_threads": threading.active_count(),
294
+ }
295
+ rss_bytes = _rss_bytes(usage)
296
+ if rss_bytes is not None:
297
+ self._peak_rss_bytes = max(self._peak_rss_bytes, rss_bytes)
298
+ fields["rss_mb"] = _bytes_to_mib(rss_bytes)
299
+ file_descriptors = _num_file_descriptors()
300
+ if file_descriptors is not None:
301
+ fields["num_fds"] = file_descriptors
302
+ if usage is not None:
303
+ cpu_seconds = _cpu_seconds(usage)
304
+ elapsed = max(now - self._last_sample_at, 0.001)
305
+ fields["process_cpu_percent"] = round(
306
+ max(cpu_seconds - self._last_cpu_seconds, 0.0) / elapsed * 100.0,
307
+ 1,
308
+ )
309
+ self._last_cpu_seconds = cpu_seconds
310
+ self._last_sample_at = now
311
+ return fields
312
+
313
+
314
+ class _DropStructuredEvents(logging.Filter):
315
+ def filter(self, record: logging.LogRecord) -> bool:
316
+ return not hasattr(record, _STRUCTURED_EVENT_ATTR)
317
+
318
+
319
+ class _DropHTTPDependencyLogs(logging.Filter):
320
+ def filter(self, record: logging.LogRecord) -> bool:
321
+ return not any(
322
+ record.name == prefix or record.name.startswith(f"{prefix}.")
323
+ for prefix in _HTTP_DEPENDENCY_LOGGER_PREFIXES
324
+ )
325
+
326
+
327
+ class JSONLogFileHandler(logging.Handler):
328
+ """Write every log record as one JSON object line."""
329
+
330
+ def __init__(self, path: Path, *, run: str, level: int) -> None:
331
+ super().__init__(level=level)
332
+ self.path = path
333
+ self._run = run
334
+ self._lock = threading.Lock()
335
+ self._file = path.open("w", encoding="utf-8", buffering=1)
336
+
337
+ def emit(self, record: logging.LogRecord) -> None:
338
+ try:
339
+ timestamp = _datetime.datetime.now(_datetime.UTC).isoformat(timespec="milliseconds")
340
+ structured_event = getattr(record, _STRUCTURED_EVENT_ATTR, None)
341
+ if isinstance(structured_event, str):
342
+ fields = getattr(record, _STRUCTURED_FIELDS_ATTR, {})
343
+ structured_fields: dict[str, Any] = (
344
+ dict(cast(Mapping[str, Any], fields)) if isinstance(fields, Mapping) else {}
345
+ )
346
+ payload = {
347
+ "ts": timestamp,
348
+ "run": self._run,
349
+ "level": record.levelname,
350
+ "event": structured_event,
351
+ **structured_fields,
352
+ }
353
+ else:
354
+ message, log_fields = _structured_log_fields(record)
355
+ payload = {
356
+ "ts": timestamp,
357
+ "run": self._run,
358
+ "level": record.levelname,
359
+ "event": "log",
360
+ "logger": record.name,
361
+ "message": message,
362
+ }
363
+ payload.update(log_fields)
364
+ payload.update(_current_log_fields(payload))
365
+ if record.exc_info:
366
+ payload["exc_info"] = self.format(record)
367
+ with self._lock:
368
+ self._file.write(json.dumps(_ordered_payload(payload), default=str) + "\n")
369
+ except Exception:
370
+ self.handleError(record)
371
+
372
+ def close(self) -> None:
373
+ with contextlib.suppress(Exception), self._lock:
374
+ self._file.flush()
375
+ self._file.close()
376
+ super().close()
377
+
378
+
379
+ def configure_logging(config: LoggingSettings | None = None) -> Path | None:
380
+ """Configure terminal logging and optional JSON log-file logging.
381
+
382
+ Returns the JSON log-file path when file logging is enabled.
383
+ """
384
+ config = config or LoggingSettings()
385
+ reset_observability_metrics()
386
+ terminal_level = _log_level(config.terminal_level)
387
+ log_file_level = _log_file_level(config.log_file_level)
388
+ log_file = config.log_file
389
+ if log_file is None and config.logs_dir is not None:
390
+ log_file = default_log_file(config.logs_dir, run=config.run)
391
+ root_or_package_logger = logging.getLogger(config.logger_name)
392
+ root_or_package_logger.handlers.clear()
393
+ root_or_package_logger.setLevel(
394
+ min(
395
+ terminal_level,
396
+ log_file_level if log_file else terminal_level,
397
+ )
398
+ )
399
+ root_or_package_logger.propagate = False
400
+
401
+ terminal_handler = logging.StreamHandler()
402
+ terminal_handler.setLevel(terminal_level)
403
+ terminal_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
404
+ terminal_handler.addFilter(_DropStructuredEvents())
405
+ if config.suppress_http_dependency_logs and config.logger_name == "":
406
+ terminal_handler.addFilter(_DropHTTPDependencyLogs())
407
+ root_or_package_logger.addHandler(terminal_handler)
408
+
409
+ if log_file is None:
410
+ return None
411
+
412
+ log_file.parent.mkdir(parents=True, exist_ok=True)
413
+ _prune_old_log_files(log_file.parent, config.retain_log_files)
414
+ log_file_handler = JSONLogFileHandler(
415
+ log_file,
416
+ run=config.run,
417
+ level=log_file_level,
418
+ )
419
+ if config.suppress_http_dependency_logs and config.logger_name == "":
420
+ log_file_handler.addFilter(_DropHTTPDependencyLogs())
421
+ root_or_package_logger.addHandler(log_file_handler)
422
+ root_or_package_logger.info("Writing log events to %s.", log_file)
423
+ return log_file
424
+
425
+
426
+ def reset_observability_metrics() -> None:
427
+ """Reset process-wide HTTP counters used by `logging_context()` run summaries."""
428
+ with _HTTP_METRICS_LOCK:
429
+ for metric_name in _HTTP_METRICS:
430
+ _HTTP_METRICS[metric_name] = 0
431
+
432
+
433
+ def record_http_attempt(
434
+ *,
435
+ request_bytes: int,
436
+ response_bytes: int = 0,
437
+ status_code: int | None = None,
438
+ transport_error: bool = False,
439
+ ) -> None:
440
+ """Record one HTTP attempt for the current run summary."""
441
+ with _HTTP_METRICS_LOCK:
442
+ _HTTP_METRICS["http_request_attempt_count"] += 1
443
+ _HTTP_METRICS["http_request_bytes_total"] += request_bytes
444
+ _HTTP_METRICS["http_response_bytes_total"] += response_bytes
445
+ if transport_error:
446
+ _HTTP_METRICS["http_transport_error_count"] += 1
447
+ if status_code is None:
448
+ return
449
+ status_group = 5 if status_code >= 500 else status_code // 100
450
+ metric_name = {
451
+ 2: "http_2xx_count",
452
+ 3: "http_3xx_count",
453
+ 4: "http_4xx_count",
454
+ 5: "http_5xx_count",
455
+ }.get(status_group)
456
+ if metric_name is not None:
457
+ _HTTP_METRICS[metric_name] += 1
458
+ if status_code == 429:
459
+ _HTTP_METRICS["http_429_count"] += 1
460
+
461
+
462
+ def record_http_retry() -> None:
463
+ """Record that an HTTP attempt will be retried."""
464
+ with _HTTP_METRICS_LOCK:
465
+ _HTTP_METRICS["http_retry_count"] += 1
466
+
467
+
468
+ def observability_summary() -> dict[str, Any]:
469
+ """Return process-wide counters accumulated since logging was configured."""
470
+ with _HTTP_METRICS_LOCK:
471
+ return dict(_HTTP_METRICS)
472
+
473
+
474
+ @contextlib.contextmanager
475
+ def logging_context(
476
+ name: str,
477
+ config: object | None = None,
478
+ *,
479
+ git_cwd: Path | str | None = None,
480
+ logging_config: LoggingSettings | None = None,
481
+ run_fields: Mapping[str, Any] | None = None,
482
+ run_summary: Callable[[], Mapping[str, Any]] | None = None,
483
+ ) -> Generator[Path | None]:
484
+ """Configure logging, install command context, and emit startup metadata."""
485
+ resolved_logging_config = logging_config or LoggingSettings(
486
+ log_file_level=_src_log_level_from_config(config)
487
+ )
488
+ log_file = configure_logging(resolved_logging_config)
489
+ sampler = _resource_sampler(resolved_logging_config)
490
+ started = time.perf_counter()
491
+ error: BaseException | None = None
492
+ with log_context(command=name):
493
+ if sampler is not None:
494
+ sampler.start()
495
+ start_fields = {"phase": "start", **dict(run_fields or {})}
496
+ info("run", logger_name=resolved_logging_config.logger_name, **start_fields)
497
+ try:
498
+ startup_event(
499
+ command=name,
500
+ config=config,
501
+ log_file=log_file,
502
+ git_cwd=_git_cwd_path(git_cwd),
503
+ logger_name=resolved_logging_config.logger_name,
504
+ )
505
+ yield log_file
506
+ except BaseException as exception:
507
+ error = exception
508
+ raise
509
+ finally:
510
+ error_type = _run_error_type(error)
511
+ summary: dict[str, Any] = {}
512
+ if sampler is not None:
513
+ summary.update(sampler.stop_and_summary())
514
+ summary.update(observability_summary())
515
+ summary["exit_code"] = _run_exit_code(error)
516
+ if run_summary is not None:
517
+ summary.update(dict(run_summary()))
518
+ end_fields = {
519
+ "phase": "end",
520
+ "duration_ms": round((time.perf_counter() - started) * 1000.0),
521
+ "status": "error" if error_type else "ok",
522
+ "error_type": error_type,
523
+ **dict(run_fields or {}),
524
+ **summary,
525
+ }
526
+ log(
527
+ "error" if error_type else "info",
528
+ "run",
529
+ logger_name=resolved_logging_config.logger_name,
530
+ **end_fields,
531
+ )
532
+
533
+
534
+ def default_log_file(logs_dir: Path = DEFAULT_LOGS_DIR, *, run: str = RUN) -> Path:
535
+ """Return a timestamped log-file path under `logs_dir`."""
536
+ timestamp = _datetime.datetime.now(_datetime.UTC).strftime("%Y-%m-%d-%H-%M-%S-%z")
537
+ timestamp = timestamp.replace("+", "", 1)
538
+ return logs_dir / f"{timestamp}-{run}.json"
539
+
540
+
541
+ def log(level: str, key: str, *, logger_name: str = "", **fields: Any) -> None:
542
+ """Log one structured event through the configured logger."""
543
+ numeric_level = _log_level(level)
544
+ logger = logging.getLogger(logger_name)
545
+ if not logger.isEnabledFor(numeric_level):
546
+ return
547
+ logger.log(
548
+ numeric_level,
549
+ "event=%s",
550
+ key,
551
+ extra={
552
+ _STRUCTURED_EVENT_ATTR: key,
553
+ _STRUCTURED_FIELDS_ATTR: {**_current_log_fields(), **fields},
554
+ },
555
+ )
556
+
557
+
558
+ def debug(key: str, *, logger_name: str = "", **fields: Any) -> None:
559
+ """Log a DEBUG structured event."""
560
+ log("debug", key, logger_name=logger_name, **fields)
561
+
562
+
563
+ def info(key: str, *, logger_name: str = "", **fields: Any) -> None:
564
+ """Log an INFO structured event."""
565
+ log("info", key, logger_name=logger_name, **fields)
566
+
567
+
568
+ def warning(key: str, *, logger_name: str = "", **fields: Any) -> None:
569
+ """Log a WARNING structured event."""
570
+ log("warning", key, logger_name=logger_name, **fields)
571
+
572
+
573
+ def error(key: str, *, logger_name: str = "", **fields: Any) -> None:
574
+ """Log an ERROR structured event."""
575
+ log("error", key, logger_name=logger_name, **fields)
576
+
577
+
578
+ def critical(key: str, *, logger_name: str = "", **fields: Any) -> None:
579
+ """Log a CRITICAL structured event."""
580
+ log("critical", key, logger_name=logger_name, **fields)
581
+
582
+
583
+ @contextlib.contextmanager
584
+ def log_context(**fields: Any) -> Generator[None]:
585
+ """Add inherited structured fields for nested `log()` calls."""
586
+ reset_token = _CONTEXT.set({**_CONTEXT.get({}), **fields})
587
+ try:
588
+ yield
589
+ finally:
590
+ _CONTEXT.reset(reset_token)
591
+
592
+
593
+ @contextlib.contextmanager
594
+ def stage(name: str, **fields: Any) -> Generator[None]:
595
+ """Add a workflow stage field for nested logs and structured events."""
596
+ with log_context(stage=name, **fields):
597
+ yield
598
+
599
+
600
+ @contextlib.contextmanager
601
+ def event(
602
+ key: str,
603
+ *,
604
+ level: str = "info",
605
+ start_level: str | None = None,
606
+ omit_success_status: bool = False,
607
+ logger_name: str = "",
608
+ **fields: Any,
609
+ ) -> Generator[dict[str, Any]]:
610
+ """Emit start/end structured events around a block of work."""
611
+ parent = _SPAN_CONTEXT.get()
612
+ span = _SpanContext(
613
+ trace=parent.trace if parent else secrets.token_hex(TRACE_SPAN_BYTES),
614
+ span=secrets.token_hex(TRACE_SPAN_BYTES),
615
+ parent_span=parent.span if parent else None,
616
+ )
617
+ reset_token = _SPAN_CONTEXT.set(span)
618
+ try:
619
+ log(start_level or level, key, logger_name=logger_name, phase="start", **fields)
620
+ started = time.perf_counter()
621
+ extra: dict[str, Any] = {}
622
+ error: BaseException | None = None
623
+ try:
624
+ yield extra
625
+ except BaseException as exception:
626
+ error = exception
627
+ raise
628
+ finally:
629
+ end_fields = {
630
+ **fields,
631
+ **extra,
632
+ "phase": "end",
633
+ "duration_ms": round((time.perf_counter() - started) * 1000.0),
634
+ }
635
+ if error:
636
+ end_fields["status"] = "error"
637
+ end_fields["error_type"] = type(error).__name__
638
+ elif not omit_success_status:
639
+ end_fields["status"] = "ok"
640
+ end_fields["error_type"] = None
641
+ log(
642
+ "error" if error else level,
643
+ key,
644
+ logger_name=logger_name,
645
+ **end_fields,
646
+ )
647
+ finally:
648
+ _SPAN_CONTEXT.reset(reset_token)
649
+
650
+
651
+ def submit_with_log_context(
652
+ executor: Executor,
653
+ function: Callable[..., Any],
654
+ *args: Any,
655
+ **kwargs: Any,
656
+ ) -> Future[Any]:
657
+ """Submit work to an executor with current logging ContextVars propagated."""
658
+ context = contextvars.copy_context()
659
+ return executor.submit(context.run, function, *args, **kwargs)
660
+
661
+
662
+ def sanitized_config_snapshot(config: object) -> dict[str, Any]:
663
+ """Return a log-safe snapshot of dataclass/object/dict config values."""
664
+ if isinstance(config, Mapping):
665
+ items: Iterable[tuple[object, object]] = cast(Mapping[object, object], config).items()
666
+ else:
667
+ object_items: list[tuple[object, object]] = []
668
+ for name in dir(config):
669
+ if name.startswith("_"):
670
+ continue
671
+ object_items.append((name, getattr(config, name)))
672
+ items = object_items
673
+ snapshot: dict[str, Any] = {}
674
+ for key, value in items:
675
+ if callable(value):
676
+ continue
677
+ key_text = str(key)
678
+ if any(fragment in key_text.lower() for fragment in SECRET_FIELD_FRAGMENTS):
679
+ snapshot[key_text] = _secret_state(value)
680
+ elif isinstance(value, Path):
681
+ snapshot[key_text] = str(value)
682
+ elif isinstance(value, str | int | float | bool) or value is None:
683
+ snapshot[key_text] = value
684
+ else:
685
+ snapshot[key_text] = str(value)
686
+ return snapshot
687
+
688
+
689
+ def _current_log_fields(protected: Mapping[str, Any] | None = None) -> dict[str, Any]:
690
+ protected_keys = set(protected or {})
691
+ fields = {key: value for key, value in _CONTEXT.get({}).items() if key not in protected_keys}
692
+ span = _SPAN_CONTEXT.get()
693
+ if span is None:
694
+ return fields
695
+ if "parent_span" not in protected_keys and span.parent_span is not None:
696
+ fields["parent_span"] = span.parent_span
697
+ if "span" not in protected_keys:
698
+ fields["span"] = span.span
699
+ if "trace" not in protected_keys:
700
+ fields["trace"] = span.trace
701
+ return fields
702
+
703
+
704
+ def startup_event(
705
+ *,
706
+ command: str,
707
+ config: object | None = None,
708
+ log_file: Path | None = None,
709
+ git_commit: str | None = None,
710
+ git_cwd: Path | None = None,
711
+ logger_name: str = "",
712
+ ) -> None:
713
+ """Emit standard startup metadata after logging is configured."""
714
+ fields: dict[str, Any] = {
715
+ "command": command,
716
+ "log_file": str(log_file) if log_file else None,
717
+ }
718
+ commit = git_commit or git_short_hash(git_cwd)
719
+ if commit:
720
+ fields["git_commit"] = commit
721
+ if config is not None:
722
+ config_value = config_snapshot(config) if isinstance(config, Config) else config
723
+ fields["config"] = sanitized_config_snapshot(config_value)
724
+ info("startup", logger_name=logger_name, **fields)
725
+
726
+
727
+ def git_short_hash(cwd: Path | None = None) -> str | None:
728
+ """Return the current git short hash, or None outside a git checkout."""
729
+ try:
730
+ result = subprocess.run(
731
+ ["git", "rev-parse", "--short", "HEAD"],
732
+ cwd=cwd,
733
+ capture_output=True,
734
+ text=True,
735
+ timeout=2,
736
+ check=False,
737
+ )
738
+ except OSError:
739
+ return None
740
+ except subprocess.SubprocessError:
741
+ return None
742
+ commit = result.stdout.strip()
743
+ return commit if result.returncode == 0 and commit else None
744
+
745
+
746
+ def _ordered_payload(payload: Mapping[str, Any]) -> dict[str, Any]:
747
+ ordered: dict[str, Any] = {}
748
+ for key in LOG_FIELD_ORDER:
749
+ if key in payload:
750
+ ordered[key] = payload[key]
751
+ for key in sorted(key for key in payload if key not in ordered):
752
+ ordered[key] = payload[key]
753
+ return ordered
754
+
755
+
756
+ def _log_file_level(configured_level: str | None) -> int:
757
+ if configured_level is not None:
758
+ return _log_level(configured_level)
759
+ env_level = os.environ.get(SRC_LOG_LEVEL)
760
+ if env_level:
761
+ return _log_level(env_level)
762
+ return _log_level(DEFAULT_LOG_FILE_LEVEL)
763
+
764
+
765
+ def _src_log_level_from_config(config: object | None) -> str | None:
766
+ value = getattr(config, "src_log_level", None)
767
+ return value if isinstance(value, str) else None
768
+
769
+
770
+ def _git_cwd_path(value: Path | str | None) -> Path | None:
771
+ if value is None:
772
+ return None
773
+ path = Path(value)
774
+ return path.parent if path.is_file() else path
775
+
776
+
777
+ def _log_level(value: int | str) -> int:
778
+ if isinstance(value, int):
779
+ return value
780
+ normalized = value.strip().upper()
781
+ if not normalized:
782
+ return logging.INFO
783
+ if normalized.isdecimal():
784
+ return int(normalized)
785
+ levels = logging.getLevelNamesMapping()
786
+ level = levels.get(normalized)
787
+ if level is None:
788
+ return logging.INFO
789
+ return level
790
+
791
+
792
+ def _structured_log_fields(record: logging.LogRecord) -> tuple[str, dict[str, Any]]:
793
+ message = record.getMessage()
794
+ fields: dict[str, Any] = (
795
+ {"level": "DEBUG"}
796
+ if record.name == "httpx" and message.startswith(_HTTPX_REQUEST_PREFIX)
797
+ else {}
798
+ )
799
+ if not message.startswith(_HTTPCORE_RESPONSE_HEADERS_PREFIX):
800
+ return message, fields
801
+ try:
802
+ literal_value = cast(
803
+ object,
804
+ ast.literal_eval(message.removeprefix(_HTTPCORE_RESPONSE_HEADERS_PREFIX)),
805
+ )
806
+ except (SyntaxError, ValueError):
807
+ return message, fields
808
+ if not isinstance(literal_value, tuple):
809
+ return message, fields
810
+
811
+ return_value = cast(tuple[object, ...], literal_value)
812
+ if len(return_value) != 4:
813
+ return message, fields
814
+ http_version, status_code, reason_phrase, raw_headers = return_value
815
+ headers = _http_headers(raw_headers)
816
+ if not headers:
817
+ return message, fields
818
+
819
+ fields["headers"] = headers
820
+ decoded_version = _decode_http_bytes(http_version)
821
+ if decoded_version is not None:
822
+ fields["http_version"] = decoded_version
823
+ if isinstance(status_code, int):
824
+ fields["status_code"] = status_code
825
+ decoded_reason = _decode_http_bytes(reason_phrase)
826
+ if decoded_reason is not None:
827
+ fields["reason_phrase"] = decoded_reason
828
+ return "receive_response_headers.complete", fields
829
+
830
+
831
+ def _http_headers(raw_headers: object) -> dict[str, str | list[str]]:
832
+ if not isinstance(raw_headers, list | tuple):
833
+ return {}
834
+ headers: dict[str, str | list[str]] = {}
835
+ for item in cast(Iterable[object], raw_headers):
836
+ if not isinstance(item, tuple):
837
+ continue
838
+ header = cast(tuple[object, ...], item)
839
+ if len(header) != 2:
840
+ continue
841
+ raw_name, raw_value = header
842
+ name = _decode_http_bytes(raw_name)
843
+ value = _decode_http_bytes(raw_value)
844
+ if name is None or value is None:
845
+ continue
846
+ key = name.lower()
847
+ existing = headers.get(key)
848
+ if existing is None:
849
+ headers[key] = value
850
+ elif isinstance(existing, list):
851
+ existing.append(value)
852
+ else:
853
+ headers[key] = [existing, value]
854
+ return {key: headers[key] for key in sorted(headers)}
855
+
856
+
857
+ def _decode_http_bytes(value: object) -> str | None:
858
+ if isinstance(value, bytes):
859
+ return value.decode("latin-1", errors="replace")
860
+ if isinstance(value, str):
861
+ return value
862
+ return None
863
+
864
+
865
+ def _secret_state(value: object) -> str:
866
+ if value is None or value == "":
867
+ return "missing"
868
+ return "reference" if isinstance(value, str) and value.startswith("op://") else "provided"
869
+
870
+
871
+ def _resource_sampler(config: LoggingSettings) -> ResourceSampler | None:
872
+ interval_seconds = config.resource_sample_interval_seconds
873
+ return ResourceSampler(interval_seconds) if interval_seconds is not None else None
874
+
875
+
876
+ def _run_error_type(exception: BaseException | None) -> str | None:
877
+ if exception is None:
878
+ return None
879
+ if isinstance(exception, SystemExit) and exception.code in (None, 0):
880
+ return None
881
+ return type(exception).__name__
882
+
883
+
884
+ def _run_exit_code(exception: BaseException | None) -> int:
885
+ if exception is None:
886
+ return 0
887
+ if isinstance(exception, SystemExit):
888
+ return exception.code if isinstance(exception.code, int) else 1
889
+ return 1
890
+
891
+
892
+ def _resource_usage() -> Any | None:
893
+ if sys.platform == "win32":
894
+ return None
895
+ return resource.getrusage(resource.RUSAGE_SELF)
896
+
897
+
898
+ def _cpu_seconds(usage: Any) -> float:
899
+ return float(usage.ru_utime) + float(usage.ru_stime)
900
+
901
+
902
+ def _rss_bytes(usage: Any | None) -> int | None:
903
+ current = _linux_current_rss_bytes()
904
+ if current is not None:
905
+ return current
906
+ if usage is None:
907
+ return None
908
+ # Linux reports ru_maxrss in KiB; macOS reports bytes.
909
+ max_rss = int(usage.ru_maxrss)
910
+ return max_rss if sys.platform == "darwin" else max_rss * 1024
911
+
912
+
913
+ def _linux_current_rss_bytes() -> int | None:
914
+ statm = Path("/proc/self/statm")
915
+ if not statm.exists():
916
+ return None
917
+ try:
918
+ fields = statm.read_text(encoding="utf-8").split()
919
+ if len(fields) < 2:
920
+ return None
921
+ return int(fields[1]) * os.sysconf("SC_PAGE_SIZE")
922
+ except (OSError, ValueError):
923
+ return None
924
+
925
+
926
+ def _num_file_descriptors() -> int | None:
927
+ for directory in (Path("/proc/self/fd"), Path("/dev/fd")):
928
+ if not directory.exists():
929
+ continue
930
+ try:
931
+ return len(list(directory.iterdir()))
932
+ except OSError:
933
+ continue
934
+ return None
935
+
936
+
937
+ def _bytes_to_mib(byte_count: int) -> float:
938
+ return round(byte_count / MEBIBYTE, 2)
939
+
940
+
941
+ def _prune_old_log_files(logs_dir: Path, retain_files: int) -> None:
942
+ if retain_files <= 0 or not logs_dir.exists():
943
+ return
944
+ log_files = sorted(
945
+ [*logs_dir.glob("????-??-??-??-??-??-*.json"), *logs_dir.glob("events-*.json")],
946
+ key=lambda path: path.stat().st_mtime,
947
+ )
948
+ for old_file in log_files[:-retain_files]:
949
+ with contextlib.suppress(OSError):
950
+ old_file.unlink()