PyPI - xmi-logger - Versions diffs - 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

xmi-logger 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

xmi_logger/__version__.py +2 -2
xmi_logger/advanced_features.py +743 -579
xmi_logger/xmi_logger.py +266 -178
{xmi_logger-0.0.7.dist-info → xmi_logger-0.0.9.dist-info}/METADATA +151 -69
xmi_logger-0.0.9.dist-info/RECORD +8 -0
{xmi_logger-0.0.7.dist-info → xmi_logger-0.0.9.dist-info}/WHEEL +1 -1
xmi_logger-0.0.7.dist-info/RECORD +0 -8
{xmi_logger-0.0.7.dist-info → xmi_logger-0.0.9.dist-info}/top_level.txt +0 -0

xmi_logger/advanced_features.py CHANGED Viewed

@@ -1,380 +1,501 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
 """
-XmiLogger 高级功能模块
-包含智能日志过滤、聚合、监控、分布式支持等功能
+XmiLogger 高级功能模块（实用版）
+该模块聚焦可直接集成到业务中的能力：
+脱敏与可选加密、安全备份恢复、日志压缩归档、SQLite 结构化存储、日志处理管道、健康检查与轻量性能指标。
 """
-import asyncio
+from __future__ import annotations
 import json
-import time
-import os
-import sys
-import threading
-from datetime import datetime, timedelta
-from typing import Dict, Any, List, Optional, Union, Callable
-from functools import wraps
-from collections import defaultdict, deque
 import logging
-import hashlib
-import pickle
-import zlib
-import socket
-import struct
-from concurrent.futures import ThreadPoolExecutor, as_completed
+import os
 import queue
-import weakref
-import gc
-import psutil
-import signal
-from contextlib import contextmanager
-import uuid
-import inspect
-import traceback
-from dataclasses import dataclass, field
-from enum import Enum
 import re
-import sqlite3
-from pathlib import Path
-import tempfile
 import shutil
-import gzip
+import sqlite3
 import tarfile
+import threading
+import time
 import zipfile
-import base64
-import hmac
-import secrets
-import ssl
-import certifi
-import urllib3
-from urllib3.util.retry import Retry
-from urllib3.util import Timeout
-# 新增：智能日志过滤和聚合功能
+from collections import defaultdict, deque
+from datetime import datetime
+from enum import Enum
+from pathlib import Path
+from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Sequence, Tuple, Union
+try:
+    import psutil
+except Exception:
+    psutil = None
+_logger = logging.getLogger(__name__)
 class LogFilter(Enum):
-    """日志过滤器类型"""
     NONE = "none"
     REGEX = "regex"
     KEYWORD = "keyword"
-    PATTERN = "pattern"
     CUSTOM = "custom"
-class LogAggregator:
-    """日志聚合器"""
-    def __init__(self, window_size: int = 100, flush_interval: float = 5.0):
-        self.window_size = window_size
-        self.flush_interval = flush_interval
-        self.buffer = deque(maxlen=window_size)
-        self.last_flush = time.time()
-        self.lock = threading.Lock()
-        self._running = True
-        self._flush_thread = threading.Thread(target=self._flush_worker, daemon=True)
-        self._flush_thread.start()
-    def add_log(self, log_entry: Dict[str, Any]) -> None:
-        """添加日志到缓冲区"""
-        with self.lock:
-            self.buffer.append(log_entry)
-            if len(self.buffer) >= self.window_size:
-                self._flush_buffer()
-    def _flush_buffer(self) -> None:
-        """刷新缓冲区"""
-        if not self.buffer:
-            return
-        # 聚合日志
-        aggregated = self._aggregate_logs()
-        # 这里可以发送到外部系统或存储
-        print(f"聚合日志: {len(self.buffer)} 条 -> {len(aggregated)} 条")
-        self.buffer.clear()
-        self.last_flush = time.time()
-    def _aggregate_logs(self) -> List[Dict[str, Any]]:
-        """聚合日志"""
-        if not self.buffer:
+def _now_iso() -> str:
+    return datetime.now().isoformat(timespec="seconds")
+def _ensure_dir(path: Union[str, Path]) -> str:
+    p = Path(path)
+    p.mkdir(parents=True, exist_ok=True)
+    return str(p)
+def _is_within_directory(base_dir: Path, target_path: Path) -> bool:
+    try:
+        base = base_dir.resolve()
+        target = target_path.resolve()
+        return str(target).startswith(str(base) + os.sep) or target == base
+    except Exception:
+        return False
+class LogSecurity:
+    def __init__(
+        self,
+        sensitive_keys: Optional[Sequence[str]] = None,
+        replacement: str = "***",
+        enable_encryption: bool = False,
+        encryption_key: Optional[Union[str, bytes]] = None,
+    ):
+        self.replacement = replacement
+        self.sensitive_keys = {k.lower() for k in (sensitive_keys or self._default_sensitive_keys())}
+        self._patterns = self._compile_patterns()
+        self._cipher = None
+        self._encryption_key: Optional[bytes] = None
+        if enable_encryption or encryption_key is not None:
+            self._init_cipher(encryption_key)
+    @staticmethod
+    def _default_sensitive_keys() -> List[str]:
+        return [
+            "password",
+            "passwd",
+            "pwd",
+            "密码",
+            "口令",
+            "secret",
+            "token",
+            "api_key",
+            "apikey",
+            "密钥",
+            "access_token",
+            "refresh_token",
+            "private_key",
+        ]
+    def _compile_patterns(self) -> List[re.Pattern[str]]:
+        keys = sorted({re.escape(k) for k in self.sensitive_keys}, key=len, reverse=True)
+        if not keys:
             return []
-        # 按级别和消息模式聚合
-        groups = defaultdict(list)
-        for log in self.buffer:
-            key = f"{log.get('level', 'INFO')}:{log.get('message', '')[:50]}"
-            groups[key].append(log)
-        aggregated = []
-        for key, logs in groups.items():
-            if len(logs) == 1:
-                aggregated.append(logs[0])
-            else:
-                # 创建聚合日志
-                first_log = logs[0]
-                aggregated_log = {
-                    'level': first_log.get('level', 'INFO'),
-                    'message': f"[聚合] {first_log.get('message', '')} (重复 {len(logs)} 次)",
-                    'timestamp': first_log.get('timestamp'),
-                    'count': len(logs),
-                    'original_logs': logs
-                }
-                aggregated.append(aggregated_log)
-        return aggregated
-    def _flush_worker(self) -> None:
-        """后台刷新工作线程"""
-        while self._running:
-            time.sleep(self.flush_interval)
-            with self.lock:
-                if self.buffer and time.time() - self.last_flush > self.flush_interval:
-                    self._flush_buffer()
-    def stop(self) -> None:
-        """停止聚合器"""
-        self._running = False
-        self._flush_buffer()
+        key_alt = "|".join(keys)
+        return [
+            re.compile(
+                rf'(?i)((?:{key_alt}))(\s*[:=：]\s*)(["\']?)([^"\',\s\}}\]\n\r]+)(\3)',
+                re.IGNORECASE,
+            ),
+            re.compile(
+                rf'(?i)("?(?:{key_alt})"?)\s*:\s*(["\'])(.*?)\2',
+                re.IGNORECASE,
+            ),
+        ]
+    def sanitize_message(self, message: str) -> str:
+        sanitized = message
+        for pattern in self._patterns:
+            def _repl(m: re.Match[str]) -> str:
+                if len(m.groups()) >= 5:
+                    key = m.group(1)
+                    sep = m.group(2)
+                    quote = m.group(3) or ""
+                    end_quote = m.group(5) or quote
+                    return f"{key}{sep}{quote}{self.replacement}{end_quote}"
+                key = m.group(1)
+                quote = m.group(2)
+                return f"{key}: {quote}{self.replacement}{quote}"
+            sanitized = pattern.sub(_repl, sanitized)
+        return sanitized
+    def sanitize_mapping(self, data: Any) -> Any:
+        if isinstance(data, Mapping):
+            out: Dict[str, Any] = {}
+            for k, v in data.items():
+                key_str = str(k)
+                if key_str.lower() in self.sensitive_keys:
+                    out[key_str] = self.replacement
+                else:
+                    out[key_str] = self.sanitize_mapping(v)
+            return out
+        if isinstance(data, list):
+            return [self.sanitize_mapping(x) for x in data]
+        if isinstance(data, tuple):
+            return tuple(self.sanitize_mapping(x) for x in data)
+        return data
+    def _init_cipher(self, encryption_key: Optional[Union[str, bytes]]) -> None:
+        try:
+            from cryptography.fernet import Fernet
+        except Exception as e:
+            raise RuntimeError("cryptography 未安装，无法启用加密功能") from e
+        if encryption_key is None:
+            self._encryption_key = Fernet.generate_key()
+        elif isinstance(encryption_key, bytes):
+            self._encryption_key = encryption_key
+        else:
+            self._encryption_key = encryption_key.encode("utf-8")
+        self._cipher = Fernet(self._encryption_key)
+    def get_encryption_key(self) -> Optional[bytes]:
+        return self._encryption_key
+    def encrypt_bytes(self, data: bytes) -> bytes:
+        if self._cipher is None:
+            raise RuntimeError("加密未启用")
+        return self._cipher.encrypt(data)
+    def decrypt_bytes(self, data: bytes) -> bytes:
+        if self._cipher is None:
+            raise RuntimeError("加密未启用")
+        return self._cipher.decrypt(data)
-# 新增：实时监控和性能分析
-class PerformanceMonitor:
-    """性能监控器"""
-    def __init__(self):
-        self.metrics = {
-            'log_count': 0,
-            'error_count': 0,
-            'avg_processing_time': 0.0,
-            'memory_usage': 0.0,
-            'cpu_usage': 0.0,
-            'throughput': 0.0
-        }
-        self.processing_times = deque(maxlen=1000)
-        self.start_time = time.time()
-        self.lock = threading.Lock()
-        self._monitor_thread = threading.Thread(target=self._monitor_worker, daemon=True)
-        self._monitor_thread.start()
-    def record_log(self, level: str, processing_time: float) -> None:
-        """记录日志处理"""
-        with self.lock:
-            self.metrics['log_count'] += 1
-            if level.upper() == 'ERROR':
-                self.metrics['error_count'] += 1
-            self.processing_times.append(processing_time)
-            if self.processing_times:
-                self.metrics['avg_processing_time'] = sum(self.processing_times) / len(self.processing_times)
-    def _monitor_worker(self) -> None:
-        """监控工作线程"""
-        while True:
-            try:
-                # 监控系统资源
-                process = psutil.Process()
-                self.metrics['memory_usage'] = process.memory_info().rss / 1024 / 1024  # MB
-                self.metrics['cpu_usage'] = process.cpu_percent()
-                # 计算吞吐量
-                elapsed = time.time() - self.start_time
-                if elapsed > 0:
-                    self.metrics['throughput'] = self.metrics['log_count'] / elapsed
-                time.sleep(5)  # 每5秒更新一次
-            except Exception:
-                time.sleep(5)
-    def get_metrics(self) -> Dict[str, Any]:
-        """获取性能指标"""
-        with self.lock:
-            return self.metrics.copy()
-# 新增：分布式日志支持
 class DistributedLogger:
-    """分布式日志记录器"""
-    def __init__(self, node_id: str, cluster_nodes: List[str] = None):
+    def __init__(
+        self,
+        node_id: str,
+        sequence_dir: Optional[str] = None,
+        persist_every: int = 100,
+    ):
         self.node_id = node_id
-        self.cluster_nodes = cluster_nodes or []
-        self.sequence_number = 0
-        self.lock = threading.Lock()
-        self._sequence_file = f"sequence_{node_id}.dat"
+        self.persist_every = max(1, int(persist_every))
+        self._lock = threading.Lock()
+        self._sequence_number = 0
+        base_dir = Path(sequence_dir) if sequence_dir else Path(os.getenv("XMI_LOGGER_SEQ_DIR", ""))
+        if not str(base_dir):
+            base_dir = Path(os.path.expanduser("~")) / ".xmi_logger"
+        _ensure_dir(base_dir)
+        self._sequence_file = str(base_dir / f"sequence_{self.node_id}.txt")
         self._load_sequence()
     def _load_sequence(self) -> None:
-        """加载序列号"""
         try:
             if os.path.exists(self._sequence_file):
-                with open(self._sequence_file, 'r') as f:
-                    self.sequence_number = int(f.read().strip())
+                with open(self._sequence_file, "r", encoding="utf-8") as f:
+                    value = f.read().strip()
+                self._sequence_number = int(value) if value else 0
         except Exception:
-            self.sequence_number = 0
+            self._sequence_number = 0
     def _save_sequence(self) -> None:
-        """保存序列号"""
+        tmp = f"{self._sequence_file}.tmp"
         try:
-            with open(self._sequence_file, 'w') as f:
-                f.write(str(self.sequence_number))
+            with open(tmp, "w", encoding="utf-8") as f:
+                f.write(str(self._sequence_number))
+            os.replace(tmp, self._sequence_file)
         except Exception:
-            pass
+            try:
+                if os.path.exists(tmp):
+                    os.remove(tmp)
+            except Exception:
+                pass
     def get_log_id(self) -> str:
-        """获取唯一日志ID"""
-        with self.lock:
-            self.sequence_number += 1
+        with self._lock:
+            self._sequence_number += 1
+            if self._sequence_number % self.persist_every == 0:
+                self._save_sequence()
+            ts_ms = int(time.time() * 1000)
+            return f"{self.node_id}_{ts_ms}_{self._sequence_number}"
+    def flush(self) -> None:
+        with self._lock:
             self._save_sequence()
-            timestamp = int(time.time() * 1000)
-            return f"{self.node_id}_{timestamp}_{self.sequence_number}"
-# 新增：内存优化和垃圾回收
-class MemoryOptimizer:
-    """内存优化器"""
-    def __init__(self, max_memory_mb: int = 512):
-        self.max_memory_mb = max_memory_mb
-        self.last_gc_time = time.time()
-        self.gc_interval = 60  # 60秒执行一次GC
-        self._gc_thread = threading.Thread(target=self._gc_worker, daemon=True)
-        self._gc_thread.start()
-    def check_memory(self) -> bool:
-        """检查内存使用情况"""
-        process = psutil.Process()
-        memory_mb = process.memory_info().rss / 1024 / 1024
-        return memory_mb > self.max_memory_mb
-    def optimize_memory(self) -> None:
-        """优化内存使用"""
-        if self.check_memory():
-            # 强制垃圾回收
-            collected = gc.collect()
-            print(f"内存优化: 回收了 {collected} 个对象")
-            # 清理缓存
-            if hasattr(self, '_clear_caches'):
-                self._clear_caches()
-    def _gc_worker(self) -> None:
-        """垃圾回收工作线程"""
-        while True:
-            time.sleep(self.gc_interval)
-            self.optimize_memory()
-# 新增：智能日志路由
-class LogRouter:
-    """智能日志路由器"""
-    def __init__(self):
-        self.routes = {}
-        self.default_route = None
-        self.lock = threading.Lock()
-    def add_route(self, condition: Callable, handler: Callable) -> None:
-        """添加路由规则"""
-        with self.lock:
-            route_id = len(self.routes)
-            self.routes[route_id] = (condition, handler)
-    def set_default_route(self, handler: Callable) -> None:
-        """设置默认路由"""
-        self.default_route = handler
-    def route_log(self, log_entry: Dict[str, Any]) -> None:
-        """路由日志"""
-        with self.lock:
-            for route_id, (condition, handler) in self.routes.items():
-                if condition(log_entry):
-                    handler(log_entry)
-                    return
-            if self.default_route:
-                self.default_route(log_entry)
-# 新增：日志加密和安全
-class LogSecurity:
-    """日志安全模块"""
-    def __init__(self, encryption_key: str = None):
+class LogAggregator:
+    def __init__(
+        self,
+        window_size: int = 200,
+        flush_interval: float = 5.0,
+        key_fn: Optional[Callable[[Dict[str, Any]], str]] = None,
+        on_flush: Optional[Callable[[List[Dict[str, Any]]], None]] = None,
+        include_samples: bool = True,
+    ):
+        self.window_size = max(1, int(window_size))
+        self.flush_interval = max(0.1, float(flush_interval))
+        self._key_fn = key_fn or (lambda e: f"{e.get('level', 'INFO')}:{str(e.get('message', ''))[:80]}")
+        self._on_flush = on_flush
+        self._include_samples = include_samples
+        self._buffer: deque[Dict[str, Any]] = deque(maxlen=self.window_size)
+        self._lock = threading.Lock()
+        self._stop = threading.Event()
+        self._output: "queue.Queue[List[Dict[str, Any]]]" = queue.Queue(maxsize=10)
+        self._last_flush_at = time.time()
+        self._thread = threading.Thread(target=self._worker, daemon=True)
+        self._thread.start()
+    def add_log(self, log_entry: Dict[str, Any]) -> None:
+        with self._lock:
+            self._buffer.append(log_entry)
+            if len(self._buffer) >= self.window_size:
+                self._flush_locked()
+    def flush(self) -> List[Dict[str, Any]]:
+        with self._lock:
+            return self._flush_locked()
+    def get_aggregated(self, timeout: float = 0.0) -> Optional[List[Dict[str, Any]]]:
         try:
-            from cryptography.fernet import Fernet
-            self.encryption_key = encryption_key or Fernet.generate_key()
-            self.cipher = Fernet(self.encryption_key)
-        except ImportError:
-            print("警告: cryptography 未安装，加密功能将不可用")
-            self.cipher = None
-        self.sensitive_patterns = [
-            r'(password["\']?\s*[:=]\s*["\'][^"\']*["\'])',
-            r'(api_key["\']?\s*[:=]\s*["\'][^"\']*["\'])',
-            r'(token["\']?\s*[:=]\s*["\'][^"\']*["\'])',
-            r'(secret["\']?\s*[:=]\s*["\'][^"\']*["\'])'
-        ]
-        self.compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.sensitive_patterns]
-    def sanitize_message(self, message: str) -> str:
-        """清理敏感信息"""
-        sanitized = message
-        for pattern in self.compiled_patterns:
-            sanitized = pattern.sub(r'\1=***', sanitized)
-        return sanitized
-    def encrypt_log(self, log_data: bytes) -> bytes:
-        """加密日志数据"""
-        if self.cipher is None:
-            return log_data
-        return self.cipher.encrypt(log_data)
-    def decrypt_log(self, encrypted_data: bytes) -> bytes:
-        """解密日志数据"""
-        if self.cipher is None:
-            return encrypted_data
-        return self.cipher.decrypt(encrypted_data)
-# 新增：日志压缩和归档
+            return self._output.get(timeout=timeout)
+        except queue.Empty:
+            return None
+    def stop(self) -> None:
+        self._stop.set()
+        self._thread.join(timeout=2)
+        try:
+            self.flush()
+        except Exception:
+            pass
+    def _flush_locked(self) -> List[Dict[str, Any]]:
+        if not self._buffer:
+            return []
+        groups: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
+        for entry in self._buffer:
+            groups[self._key_fn(entry)].append(entry)
+        aggregated: List[Dict[str, Any]] = []
+        for _, entries in groups.items():
+            if len(entries) == 1:
+                aggregated.append(entries[0])
+                continue
+            first = entries[0]
+            item: Dict[str, Any] = dict(first)
+            item["count"] = len(entries)
+            item["message"] = f"[聚合] {first.get('message', '')} (重复 {len(entries)} 次)"
+            if self._include_samples:
+                item["sample"] = {"first": entries[0], "last": entries[-1]}
+            aggregated.append(item)
+        self._buffer.clear()
+        self._last_flush_at = time.time()
+        if self._on_flush is not None:
+            try:
+                self._on_flush(aggregated)
+            except Exception:
+                _logger.exception("LogAggregator on_flush 执行失败")
+        else:
+            try:
+                self._output.put_nowait(aggregated)
+            except queue.Full:
+                pass
+        return aggregated
+    def _worker(self) -> None:
+        while not self._stop.is_set():
+            self._stop.wait(self.flush_interval)
+            if self._stop.is_set():
+                break
+            with self._lock:
+                if self._buffer and (time.time() - self._last_flush_at) >= self.flush_interval:
+                    self._flush_locked()
+class PerformanceMonitor:
+    def __init__(self, sample_interval: float = 5.0):
+        self._lock = threading.Lock()
+        self._stop = threading.Event()
+        self._sample_interval = max(0.5, float(sample_interval))
+        self._start_time = time.time()
+        self._processing_times: deque[float] = deque(maxlen=1000)
+        self._metrics: Dict[str, Any] = {
+            "log_count": 0,
+            "error_count": 0,
+            "avg_processing_time_ms": 0.0,
+            "memory_usage_mb": None,
+            "cpu_usage_percent": None,
+            "throughput_per_sec": 0.0,
+            "updated_at": _now_iso(),
+        }
+        self._thread = threading.Thread(target=self._worker, daemon=True)
+        self._thread.start()
+    def record_log(self, level: str, processing_time_sec: float) -> None:
+        with self._lock:
+            self._metrics["log_count"] += 1
+            if str(level).upper() == "ERROR":
+                self._metrics["error_count"] += 1
+            self._processing_times.append(float(processing_time_sec))
+            if self._processing_times:
+                avg_ms = (sum(self._processing_times) / len(self._processing_times)) * 1000.0
+                self._metrics["avg_processing_time_ms"] = avg_ms
+    def get_metrics(self) -> Dict[str, Any]:
+        with self._lock:
+            return dict(self._metrics)
+    def stop(self) -> None:
+        self._stop.set()
+        self._thread.join(timeout=2)
+    def _worker(self) -> None:
+        process = None
+        if psutil is not None:
+            try:
+                process = psutil.Process()
+                process.cpu_percent(interval=None)
+            except Exception:
+                process = None
+        while not self._stop.is_set():
+            self._stop.wait(self._sample_interval)
+            if self._stop.is_set():
+                break
+            with self._lock:
+                if process is not None:
+                    try:
+                        self._metrics["memory_usage_mb"] = process.memory_info().rss / 1024 / 1024
+                        self._metrics["cpu_usage_percent"] = process.cpu_percent(interval=None)
+                    except Exception:
+                        self._metrics["memory_usage_mb"] = None
+                        self._metrics["cpu_usage_percent"] = None
+                elapsed = time.time() - self._start_time
+                if elapsed > 0:
+                    self._metrics["throughput_per_sec"] = self._metrics["log_count"] / elapsed
+                self._metrics["updated_at"] = _now_iso()
 class LogArchiver:
-    """日志归档器"""
     def __init__(self, archive_dir: str = "archives"):
-        self.archive_dir = archive_dir
-        os.makedirs(archive_dir, exist_ok=True)
-    def compress_file(self, file_path: str, compression_type: str = "gzip") -> str:
-        """压缩文件"""
+        self.archive_dir = _ensure_dir(archive_dir)
+    def compress_file(
+        self,
+        file_path: str,
+        compression_type: str = "gzip",
+        output_name: Optional[str] = None,
+    ) -> str:
+        src = Path(file_path)
+        if not src.exists() or not src.is_file():
+            raise FileNotFoundError(str(src))
+        compression_type = str(compression_type).lower()
+        if compression_type not in {"gzip", "zip"}:
+            raise ValueError("compression_type 仅支持 gzip/zip")
+        base_name = output_name or src.name
+        out_path = Path(self.archive_dir) / base_name
         if compression_type == "gzip":
-            archive_path = f"{file_path}.gz"
-            with open(file_path, 'rb') as f_in:
-                with gzip.open(archive_path, 'wb') as f_out:
-                    shutil.copyfileobj(f_in, f_out)
-        elif compression_type == "zip":
-            archive_path = f"{file_path}.zip"
-            with zipfile.ZipFile(archive_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-                zipf.write(file_path, os.path.basename(file_path))
-        elif compression_type == "tar":
-            archive_path = f"{file_path}.tar.gz"
-            with tarfile.open(archive_path, 'w:gz') as tar:
-                tar.add(file_path, arcname=os.path.basename(file_path))
-        return archive_path
-    def archive_logs(self, log_dir: str, days_old: int = 7) -> List[str]:
-        """归档旧日志"""
-        archived_files = []
-        current_time = datetime.now()
-        for file_path in Path(log_dir).glob("*.log"):
-            file_time = datetime.fromtimestamp(file_path.stat().st_mtime)
-            if (current_time - file_time).days >= days_old:
-                try:
-                    archive_path = self.compress_file(str(file_path))
-                    os.remove(file_path)
-                    archived_files.append(archive_path)
-                except Exception as e:
-                    print(f"归档文件失败 {file_path}: {e}")
-        return archived_files
-# 新增：日志数据库支持
+            if not str(out_path).endswith(".gz"):
+                out_path = out_path.with_suffix(out_path.suffix + ".gz")
+            import gzip
+            with open(src, "rb") as f_in, gzip.open(out_path, "wb") as f_out:
+                shutil.copyfileobj(f_in, f_out)
+            return str(out_path)
+        if not str(out_path).endswith(".zip"):
+            out_path = out_path.with_suffix(out_path.suffix + ".zip")
+        with zipfile.ZipFile(out_path, "w", zipfile.ZIP_DEFLATED) as zf:
+            zf.write(str(src), arcname=src.name)
+        return str(out_path)
+    def archive_logs(
+        self,
+        log_dir: str,
+        days_old: int = 7,
+        compression_type: str = "gzip",
+        delete_original: bool = True,
+    ) -> List[str]:
+        log_path = Path(log_dir)
+        if not log_path.exists():
+            return []
+        archived: List[str] = []
+        cutoff = time.time() - (max(0, int(days_old)) * 86400)
+        for fp in log_path.glob("*.log"):
+            try:
+                if fp.stat().st_mtime > cutoff:
+                    continue
+                out = self.compress_file(str(fp), compression_type=compression_type)
+                archived.append(out)
+                if delete_original:
+                    fp.unlink(missing_ok=True)
+            except Exception:
+                _logger.exception("归档失败: %s", fp)
+        return archived
 class LogDatabase:
-    """日志数据库支持"""
-    def __init__(self, db_path: str = "logs.db"):
+    _allowed_columns = {
+        "id",
+        "timestamp",
+        "level",
+        "message",
+        "file",
+        "line",
+        "function",
+        "process_id",
+        "thread_id",
+        "extra_data",
+    }
+    def __init__(
+        self,
+        db_path: str = "logs.db",
+        enable_wal: bool = True,
+        busy_timeout_ms: int = 5000,
+    ):
         self.db_path = db_path
+        self._lock = threading.Lock()
+        self._conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self._conn.row_factory = sqlite3.Row
+        if enable_wal:
+            try:
+                self._conn.execute("PRAGMA journal_mode=WAL;")
+            except Exception:
+                pass
+        try:
+            self._conn.execute(f"PRAGMA busy_timeout={int(busy_timeout_ms)};")
+        except Exception:
+            pass
         self._init_database()
+    def close(self) -> None:
+        with self._lock:
+            try:
+                self._conn.close()
+            except Exception:
+                pass
     def _init_database(self) -> None:
-        """初始化数据库"""
-        with sqlite3.connect(self.db_path) as conn:
-            conn.execute("""
+        with self._lock:
+            self._conn.execute(
+                """
                 CREATE TABLE IF NOT EXISTS logs (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
                     timestamp TEXT NOT NULL,
@@ -387,281 +508,324 @@ class LogDatabase:
                     thread_id INTEGER,
                     extra_data TEXT
                 )
-            """)
-            conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_timestamp ON logs(timestamp)
-            """)
-            conn.execute("""
-                CREATE INDEX IF NOT EXISTS idx_level ON logs(level)
-            """)
-    def insert_log(self, log_entry: Dict[str, Any]) -> None:
-        """插入日志记录"""
-        with sqlite3.connect(self.db_path) as conn:
-            conn.execute("""
+                """
+            )
+            self._conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON logs(timestamp)")
+            self._conn.execute("CREATE INDEX IF NOT EXISTS idx_logs_level ON logs(level)")
+            self._conn.commit()
+    def insert_log(self, log_entry: Mapping[str, Any]) -> None:
+        self.insert_many([log_entry])
+    def insert_many(self, log_entries: Sequence[Mapping[str, Any]]) -> None:
+        rows = []
+        for e in log_entries:
+            rows.append(
+                (
+                    e.get("timestamp") or _now_iso(),
+                    e.get("level") or "INFO",
+                    e.get("message") or "",
+                    e.get("file"),
+                    e.get("line"),
+                    e.get("function"),
+                    e.get("process_id"),
+                    e.get("thread_id"),
+                    json.dumps(e.get("extra_data") or {}, ensure_ascii=False),
+                )
+            )
+        with self._lock:
+            self._conn.executemany(
+                """
                 INSERT INTO logs (timestamp, level, message, file, line, function, process_id, thread_id, extra_data)
                 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """, (
-                log_entry.get('timestamp'),
-                log_entry.get('level'),
-                log_entry.get('message'),
-                log_entry.get('file'),
-                log_entry.get('line'),
-                log_entry.get('function'),
-                log_entry.get('process_id'),
-                log_entry.get('thread_id'),
-                json.dumps(log_entry.get('extra_data', {}))
-            ))
-    def query_logs(self, conditions: Dict[str, Any] = None, limit: int = 1000) -> List[Dict[str, Any]]:
-        """查询日志"""
-        query = "SELECT * FROM logs"
-        params = []
+                """,
+                rows,
+            )
+            self._conn.commit()
+    def query_logs(
+        self,
+        conditions: Optional[Mapping[str, Any]] = None,
+        limit: int = 1000,
+        order_desc: bool = True,
+    ) -> List[Dict[str, Any]]:
+        where_parts: List[str] = []
+        params: List[Any] = []
         if conditions:
-            where_clauses = []
-            for key, value in conditions.items():
-                where_clauses.append(f"{key} = ?")
-                params.append(value)
-            query += " WHERE " + " AND ".join(where_clauses)
-        query += " ORDER BY timestamp DESC LIMIT ?"
-        params.append(limit)
-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.execute(query, params)
-            columns = [description[0] for description in cursor.description]
-            return [dict(zip(columns, row)) for row in cursor.fetchall()]
-# 新增：日志流处理
+            for k, v in conditions.items():
+                if k not in self._allowed_columns:
+                    raise ValueError(f"不支持的查询字段: {k}")
+                where_parts.append(f"{k} = ?")
+                params.append(v)
+        sql = "SELECT * FROM logs"
+        if where_parts:
+            sql += " WHERE " + " AND ".join(where_parts)
+        sql += " ORDER BY timestamp " + ("DESC" if order_desc else "ASC")
+        sql += " LIMIT ?"
+        params.append(int(limit))
+        with self._lock:
+            cur = self._conn.execute(sql, params)
+            result = []
+            for row in cur.fetchall():
+                d = dict(row)
+                try:
+                    d["extra_data"] = json.loads(d.get("extra_data") or "{}")
+                except Exception:
+                    pass
+                result.append(d)
+            return result
+    def purge_older_than(self, days: int) -> int:
+        days = int(days)
+        if days <= 0:
+            return 0
+        cutoff = datetime.fromtimestamp(time.time() - days * 86400).isoformat(timespec="seconds")
+        with self._lock:
+            cur = self._conn.execute("DELETE FROM logs WHERE timestamp < ?", (cutoff,))
+            self._conn.commit()
+            return int(cur.rowcount or 0)
 class LogStreamProcessor:
-    """日志流处理器"""
-    def __init__(self, processors: List[Callable] = None):
-        self.processors = processors or []
-        self.input_queue = queue.Queue()
-        self.output_queue = queue.Queue()
-        self._running = True
-        self._processor_thread = threading.Thread(target=self._process_worker, daemon=True)
-        self._processor_thread.start()
-    def add_processor(self, processor: Callable) -> None:
-        """添加处理器"""
+    def __init__(
+        self,
+        processors: Optional[List[Callable[[Dict[str, Any]], Dict[str, Any]]]] = None,
+        max_queue_size: int = 10000,
+        error_handler: Optional[Callable[[Exception, Dict[str, Any]], None]] = None,
+    ):
+        self.processors = list(processors or [])
+        self._error_handler = error_handler
+        self._input: "queue.Queue[Optional[Dict[str, Any]]]" = queue.Queue(maxsize=max(1, int(max_queue_size)))
+        self._output: "queue.Queue[Dict[str, Any]]" = queue.Queue(maxsize=max(1, int(max_queue_size)))
+        self._stop = threading.Event()
+        self._thread = threading.Thread(target=self._worker, daemon=True)
+        self._thread.start()
+    def add_processor(self, processor: Callable[[Dict[str, Any]], Dict[str, Any]]) -> None:
         self.processors.append(processor)
-    def process_log(self, log_entry: Dict[str, Any]) -> None:
-        """处理日志"""
-        self.input_queue.put(log_entry)
-    def _process_worker(self) -> None:
-        """处理工作线程"""
-        while self._running:
+    def process_log(self, log_entry: Dict[str, Any], block: bool = True, timeout: Optional[float] = None) -> bool:
+        try:
+            self._input.put(log_entry, block=block, timeout=timeout)
+            return True
+        except queue.Full:
+            return False
+    def get_processed_log(self, timeout: float = 0.0) -> Optional[Dict[str, Any]]:
+        try:
+            return self._output.get(timeout=timeout)
+        except queue.Empty:
+            return None
+    def stop(self, timeout: float = 2.0) -> None:
+        self._stop.set()
+        try:
+            self._input.put_nowait(None)
+        except Exception:
+            pass
+        self._thread.join(timeout=timeout)
+    def _handle_error(self, exc: Exception, entry: Dict[str, Any]) -> None:
+        if self._error_handler is not None:
             try:
-                log_entry = self.input_queue.get(timeout=1)
-                processed_entry = log_entry
-                for processor in self.processors:
-                    processed_entry = processor(processed_entry)
-                self.output_queue.put(processed_entry)
+                self._error_handler(exc, entry)
+            except Exception:
+                _logger.exception("LogStreamProcessor error_handler 执行失败")
+            return
+        _logger.exception("日志处理失败")
+    def _worker(self) -> None:
+        while not self._stop.is_set():
+            try:
+                item = self._input.get(timeout=0.5)
             except queue.Empty:
                 continue
+            if item is None:
+                break
+            processed = item
+            try:
+                for p in self.processors:
+                    processed = p(processed)
+                try:
+                    self._output.put_nowait(processed)
+                except queue.Full:
+                    pass
             except Exception as e:
-                print(f"日志处理错误: {e}")
-    def get_processed_log(self) -> Optional[Dict[str, Any]]:
-        """获取处理后的日志"""
-        try:
-            return self.output_queue.get_nowait()
-        except queue.Empty:
-            return None
+                self._handle_error(e, item)
-# 新增：智能日志分析
 class LogAnalyzer:
-    """智能日志分析器"""
-    def __init__(self):
-        self.patterns = {
-            'error_patterns': [
-                r'Exception|Error|Failed|Timeout|Connection refused',
-                r'HTTP \d{3}',
-                r'ORA-\d{5}',
-                r'MySQL.*error'
+    def __init__(self, patterns: Optional[Mapping[str, Sequence[str]]] = None):
+        self.patterns: Dict[str, List[str]] = {
+            "error": [
+                r"Exception|Error|Failed|Timeout|Connection refused",
+                r"HTTP \d{3}",
+                r"ORA-\d{5}",
+                r"MySQL.*error",
             ],
-            'warning_patterns': [
-                r'Warning|Deprecated|Deprecation',
-                r'Slow query|Performance issue',
-                r'Resource.*low|Memory.*high'
+            "warning": [
+                r"Warning|Deprecated|Deprecation",
+                r"Slow query|Performance issue",
+                r"Resource.*low|Memory.*high",
+            ],
+            "security": [
+                r"Unauthorized|Forbidden|Authentication failed",
+                r"SQL injection|XSS|CSRF",
+                r"Failed login|Invalid credentials",
             ],
-            'security_patterns': [
-                r'Unauthorized|Forbidden|Authentication failed',
-                r'SQL injection|XSS|CSRF',
-                r'Failed login|Invalid credentials'
-            ]
-        }
-        self.compiled_patterns = {}
-        for category, patterns in self.patterns.items():
-            self.compiled_patterns[category] = [re.compile(pattern, re.IGNORECASE) for pattern in patterns]
-    def analyze_log(self, log_entry: Dict[str, Any]) -> Dict[str, Any]:
-        """分析日志"""
-        message = log_entry.get('message', '')
-        level = log_entry.get('level', 'INFO')
-        analysis = {
-            'severity': 'normal',
-            'categories': [],
-            'suggestions': [],
-            'patterns_found': []
         }
-        # 检查错误模式
-        for pattern in self.compiled_patterns['error_patterns']:
+        if patterns:
+            for k, v in patterns.items():
+                self.patterns[str(k)] = list(v)
+        self._compiled: Dict[str, List[re.Pattern[str]]] = {}
+        for category, pats in self.patterns.items():
+            self._compiled[category] = [re.compile(p, re.IGNORECASE) for p in pats]
+    def analyze_log(self, log_entry: Mapping[str, Any]) -> Dict[str, Any]:
+        message = str(log_entry.get("message") or "")
+        categories: List[str] = []
+        patterns_found: List[str] = []
+        severity = "normal"
+        for pattern in self._compiled.get("error", []):
             if pattern.search(message):
-                analysis['severity'] = 'high'
-                analysis['categories'].append('error')
-                analysis['patterns_found'].append(pattern.pattern)
-        # 检查警告模式
-        for pattern in self.compiled_patterns['warning_patterns']:
+                categories.append("error")
+                patterns_found.append(pattern.pattern)
+                severity = "high"
+        for pattern in self._compiled.get("warning", []):
             if pattern.search(message):
-                if analysis['severity'] == 'normal':
-                    analysis['severity'] = 'medium'
-                analysis['categories'].append('warning')
-                analysis['patterns_found'].append(pattern.pattern)
-        # 检查安全模式
-        for pattern in self.compiled_patterns['security_patterns']:
+                categories.append("warning")
+                patterns_found.append(pattern.pattern)
+                if severity == "normal":
+                    severity = "medium"
+        for pattern in self._compiled.get("security", []):
             if pattern.search(message):
-                analysis['severity'] = 'critical'
-                analysis['categories'].append('security')
-                analysis['patterns_found'].append(pattern.pattern)
-        # 生成建议
-        if 'error' in analysis['categories']:
-            analysis['suggestions'].append('检查相关服务和依赖')
-        if 'security' in analysis['categories']:
-            analysis['suggestions'].append('立即检查安全配置')
-        if 'warning' in analysis['categories']:
-            analysis['suggestions'].append('监控系统性能')
-        return analysis
-# 新增：日志健康检查
-class LogHealthChecker:
-    """日志健康检查器"""
-    def __init__(self):
-        self.health_metrics = {
-            'total_logs': 0,
-            'error_rate': 0.0,
-            'avg_response_time': 0.0,
-            'memory_usage': 0.0,
-            'disk_usage': 0.0,
-            'last_check': None
+                categories.append("security")
+                patterns_found.append(pattern.pattern)
+                severity = "critical"
+        suggestions: List[str] = []
+        if "error" in categories:
+            suggestions.append("检查相关服务和依赖")
+        if "security" in categories:
+            suggestions.append("立即检查安全配置")
+        if "warning" in categories:
+            suggestions.append("监控系统性能")
+        return {
+            "severity": severity,
+            "categories": list(dict.fromkeys(categories)),
+            "suggestions": suggestions,
+            "patterns_found": patterns_found,
         }
+class LogHealthChecker:
     def check_health(self, log_dir: str) -> Dict[str, Any]:
-        """检查日志系统健康状态"""
         try:
-            # 检查磁盘使用情况
-            total, used, free = shutil.disk_usage(log_dir)
-            disk_usage_percent = (used / total) * 100
-            # 检查内存使用情况
-            process = psutil.Process()
-            memory_usage = process.memory_info().rss / 1024 / 1024  # MB
-            # 检查日志文件
+            total, used, _ = shutil.disk_usage(log_dir)
+            disk_usage_percent = (used / total) * 100 if total else 0.0
+            memory_usage_mb = None
+            if psutil is not None:
+                try:
+                    process = psutil.Process()
+                    memory_usage_mb = process.memory_info().rss / 1024 / 1024
+                except Exception:
+                    memory_usage_mb = None
             log_files = list(Path(log_dir).glob("*.log"))
-            total_size = sum(f.stat().st_size for f in log_files)
-            health_status = {
-                'status': 'healthy',
-                'disk_usage_percent': disk_usage_percent,
-                'memory_usage_mb': memory_usage,
-                'log_files_count': len(log_files),
-                'total_log_size_mb': total_size / 1024 / 1024,
-                'last_check': datetime.now().isoformat()
-            }
-            # 判断健康状态
+            total_size = sum(f.stat().st_size for f in log_files) if log_files else 0
+            status = "healthy"
+            warnings: List[str] = []
             if disk_usage_percent > 90:
-                health_status['status'] = 'critical'
-                health_status['warnings'] = ['磁盘使用率过高']
+                status = "critical"
+                warnings.append("磁盘使用率过高")
             elif disk_usage_percent > 80:
-                health_status['status'] = 'warning'
-                health_status['warnings'] = ['磁盘使用率较高']
-            if memory_usage > 1024:  # 超过1GB
-                health_status['status'] = 'warning'
-                if 'warnings' not in health_status:
-                    health_status['warnings'] = []
-                health_status['warnings'].append('内存使用量较高')
-            return health_status
-        except Exception as e:
-            return {
-                'status': 'error',
-                'error': str(e),
-                'last_check': datetime.now().isoformat()
+                status = "warning"
+                warnings.append("磁盘使用率较高")
+            if memory_usage_mb is not None and memory_usage_mb > 1024:
+                if status == "healthy":
+                    status = "warning"
+                warnings.append("内存使用量较高")
+            result: Dict[str, Any] = {
+                "status": status,
+                "disk_usage_percent": float(disk_usage_percent),
+                "memory_usage_mb": memory_usage_mb,
+                "log_files_count": len(log_files),
+                "total_log_size_mb": total_size / 1024 / 1024,
+                "checked_at": _now_iso(),
             }
+            if warnings:
+                result["warnings"] = warnings
+            return result
+        except Exception as e:
+            return {"status": "error", "error": str(e), "checked_at": _now_iso()}
-# 新增：日志备份和恢复
 class LogBackupManager:
-    """日志备份管理器"""
     def __init__(self, backup_dir: str = "backups"):
-        self.backup_dir = backup_dir
-        os.makedirs(backup_dir, exist_ok=True)
-    def create_backup(self, log_dir: str, backup_name: str = None) -> str:
-        """创建日志备份"""
-        if backup_name is None:
-            backup_name = f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
-        backup_path = os.path.join(self.backup_dir, f"{backup_name}.tar.gz")
-        with tarfile.open(backup_path, 'w:gz') as tar:
+        self.backup_dir = _ensure_dir(backup_dir)
+    def create_backup(self, log_dir: str, backup_name: Optional[str] = None) -> str:
+        backup_name = backup_name or f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        backup_path = str(Path(self.backup_dir) / f"{backup_name}.tar.gz")
+        with tarfile.open(backup_path, "w:gz") as tar:
             for log_file in Path(log_dir).glob("*.log"):
-                tar.add(log_file, arcname=log_file.name)
+                tar.add(str(log_file), arcname=log_file.name)
         return backup_path
     def restore_backup(self, backup_path: str, restore_dir: str) -> bool:
-        """恢复日志备份"""
         try:
-            with tarfile.open(backup_path, 'r:gz') as tar:
-                tar.extractall(restore_dir)
+            base = Path(restore_dir)
+            _ensure_dir(base)
+            with tarfile.open(backup_path, "r:gz") as tar:
+                for member in tar.getmembers():
+                    if not member.name or member.name.startswith("/") or ".." in Path(member.name).parts:
+                        raise RuntimeError(f"不安全的备份成员路径: {member.name}")
+                    target = base / member.name
+                    if not _is_within_directory(base, target):
+                        raise RuntimeError(f"不安全的备份成员路径: {member.name}")
+                tar.extractall(str(base))
             return True
-        except Exception as e:
-            print(f"恢复备份失败: {e}")
+        except Exception:
+            _logger.exception("恢复备份失败")
             return False
     def list_backups(self) -> List[Dict[str, Any]]:
-        """列出所有备份"""
-        backups = []
+        backups: List[Dict[str, Any]] = []
         for backup_file in Path(self.backup_dir).glob("*.tar.gz"):
             stat = backup_file.stat()
-            backups.append({
-                'name': backup_file.name,
-                'size_mb': stat.st_size / 1024 / 1024,
-                'created': datetime.fromtimestamp(stat.st_mtime).isoformat()
-            })
-        return sorted(backups, key=lambda x: x['created'], reverse=True)
-# 导出所有类
+            backups.append(
+                {
+                    "name": backup_file.name,
+                    "path": str(backup_file),
+                    "size_mb": stat.st_size / 1024 / 1024,
+                    "created": datetime.fromtimestamp(stat.st_mtime).isoformat(timespec="seconds"),
+                }
+            )
+        return sorted(backups, key=lambda x: x["created"], reverse=True)
 __all__ = [
-    'LogFilter',
-    'LogAggregator',
-    'PerformanceMonitor',
-    'DistributedLogger',
-    'MemoryOptimizer',
-    'LogRouter',
-    'LogSecurity',
-    'LogArchiver',
-    'LogDatabase',
-    'LogStreamProcessor',
-    'LogAnalyzer',
-    'LogHealthChecker',
-    'LogBackupManager'
-]
+    "LogFilter",
+    "LogSecurity",
+    "DistributedLogger",
+    "LogAggregator",
+    "PerformanceMonitor",
+    "LogArchiver",
+    "LogDatabase",
+    "LogStreamProcessor",
+    "LogAnalyzer",
+    "LogHealthChecker",
+    "LogBackupManager",
+]

xmi-logger 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl

xmi-logger 0.0.7py3-none-any.whl → 0.0.9py3-none-any.whl