unrealon 0.1.19__tar.gz → 0.1.20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unrealon-0.1.19 → unrealon-0.1.20}/PKG-INFO +5 -1
- {unrealon-0.1.19 → unrealon-0.1.20}/pyproject.toml +11 -1
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/__init__.py +3 -1
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_client.py +18 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_version.py +1 -1
- unrealon-0.1.20/src/unrealon/parsers/README.md +400 -0
- unrealon-0.1.20/src/unrealon/parsers/__init__.py +51 -0
- unrealon-0.1.20/src/unrealon/parsers/api_parser.py +281 -0
- unrealon-0.1.20/src/unrealon/parsers/base.py +230 -0
- unrealon-0.1.20/src/unrealon/parsers/browser_parser.py +313 -0
- unrealon-0.1.20/src/unrealon/parsers/cli.py +388 -0
- unrealon-0.1.20/src/unrealon/parsers/monitor.py +147 -0
- unrealon-0.1.20/src/unrealon/parsers/storage.py +104 -0
- unrealon-0.1.20/src/unrealon/parsers/upload.py +311 -0
- unrealon-0.1.20/src/unrealon/parsers/utils/__init__.py +18 -0
- unrealon-0.1.20/src/unrealon/parsers/utils/cleaner.py +93 -0
- unrealon-0.1.20/src/unrealon/parsers/utils/notify.py +135 -0
- unrealon-0.1.20/src/unrealon/parsers/utils/ocr.py +186 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/.gitignore +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/README.md +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/examples/README.md +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/github/README.md +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/github/pyproject.toml +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/github/unrealon/_api/generated/services/pyproject.toml +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/enums.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/helpers/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/helpers/logger.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/helpers/retry.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/pyproject.toml +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/sync_client.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_config.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_constants.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/lifecycle.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/signals.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/state.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/exceptions/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/exceptions/handlers.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/exceptions/types.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_config.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_connection.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_constants.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_handlers.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_logging.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_messaging.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_metrics.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_reconnect.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_registration.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_types.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/circuit_breaker.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/unrealon_pb2.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/unrealon_pb2.pyi +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/unrealon_pb2_grpc.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/stream_service.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_config.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_formatters.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_handlers.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_logger.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_project.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/models/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/runner.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/scheduling/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/scheduling/_manager.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/scheduling/_models.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/services/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/utils/__init__.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/utils/metrics.py +0 -0
- {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/utils/system.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unrealon
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.20
|
|
4
4
|
Summary: Unrealon SDK - Service management for Django backend (registration, heartbeat, logging, commands)
|
|
5
5
|
Project-URL: Homepage, https://github.com/markolofsen/unrealon-sdk
|
|
6
6
|
Project-URL: Documentation, https://unrealon.com
|
|
@@ -17,6 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
19
|
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: click>=8.1.0
|
|
21
|
+
Requires-Dist: cmdop
|
|
20
22
|
Requires-Dist: croniter<7.0.0,>=6.0.0
|
|
21
23
|
Requires-Dist: grpcio-tools<2.0.0,>=1.76.0
|
|
22
24
|
Requires-Dist: grpcio<2.0.0,>=1.76.0
|
|
@@ -26,6 +28,8 @@ Requires-Dist: psutil>=6.0.0
|
|
|
26
28
|
Requires-Dist: pydantic-settings>=2.7.0
|
|
27
29
|
Requires-Dist: pydantic<3.0.0,>=2.10.0
|
|
28
30
|
Requires-Dist: rich<15.0.0,>=14.3.1
|
|
31
|
+
Requires-Dist: sdkrouter
|
|
32
|
+
Requires-Dist: sdkrouter-tools
|
|
29
33
|
Requires-Dist: tenacity>=9.1.0
|
|
30
34
|
Provides-Extra: dev
|
|
31
35
|
Requires-Dist: build>=1.2.0; extra == 'dev'
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "unrealon"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.20"
|
|
8
8
|
description = "Unrealon SDK - Service management for Django backend (registration, heartbeat, logging, commands)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -35,6 +35,10 @@ dependencies = [
|
|
|
35
35
|
"protobuf (>=6.33.5,<7.0.0)",
|
|
36
36
|
"rich (>=14.3.1,<15.0.0)",
|
|
37
37
|
"croniter (>=6.0.0,<7.0.0)",
|
|
38
|
+
"click>=8.1.0",
|
|
39
|
+
"cmdop",
|
|
40
|
+
"sdkrouter",
|
|
41
|
+
"sdkrouter-tools",
|
|
38
42
|
]
|
|
39
43
|
|
|
40
44
|
[project.optional-dependencies]
|
|
@@ -93,3 +97,9 @@ strict = true
|
|
|
93
97
|
asyncio_mode = "auto"
|
|
94
98
|
testpaths = ["tests"]
|
|
95
99
|
pythonpath = ["src"]
|
|
100
|
+
|
|
101
|
+
# Local development dependencies (for parsers module)
|
|
102
|
+
[tool.uv.sources]
|
|
103
|
+
cmdop = { path = "../../../../../@projects/cmdop/projects/software/cmdop_sdk/libs/sdk_python", editable = true }
|
|
104
|
+
sdkrouter = { path = "../../../../../@projects/sdkrouter/solution/packages/sdkrouter_py", editable = true }
|
|
105
|
+
sdkrouter-tools = { path = "../../../../../@projects/sdkrouter/solution/packages/sdkrouter_tools_py", editable = true }
|
|
@@ -27,7 +27,6 @@ Example:
|
|
|
27
27
|
|
|
28
28
|
from ._client import AsyncServiceClient, ServiceClient
|
|
29
29
|
from ._config import UnrealonConfig, configure, get_config, reset_config
|
|
30
|
-
from .runner import TaskRunner
|
|
31
30
|
from ._version import __version__
|
|
32
31
|
from .core import (
|
|
33
32
|
LifecycleConfig,
|
|
@@ -54,6 +53,7 @@ from .exceptions import (
|
|
|
54
53
|
from .grpc import GRPCStreamService
|
|
55
54
|
from .logging import get_logger
|
|
56
55
|
from .models import ServiceStatus
|
|
56
|
+
from .runner import TaskRunner
|
|
57
57
|
from .scheduling import Schedule, ScheduleResult, ScheduleRunStatus
|
|
58
58
|
|
|
59
59
|
__all__ = [
|
|
@@ -99,4 +99,6 @@ __all__ = [
|
|
|
99
99
|
"ScheduleRunStatus",
|
|
100
100
|
# Runner
|
|
101
101
|
"TaskRunner",
|
|
102
|
+
# Parsers submodule (import as: from unrealon.parsers import ...)
|
|
103
|
+
# Note: parsers module requires optional dependencies: pip install unrealon[parsers]
|
|
102
104
|
]
|
|
@@ -79,6 +79,7 @@ class ServiceClient:
|
|
|
79
79
|
"_logger",
|
|
80
80
|
"_cloud_handler",
|
|
81
81
|
"_resume_event",
|
|
82
|
+
"_log_level",
|
|
82
83
|
)
|
|
83
84
|
|
|
84
85
|
def __init__(
|
|
@@ -94,6 +95,7 @@ class ServiceClient:
|
|
|
94
95
|
heartbeat_interval: int | None = None,
|
|
95
96
|
log_batch_size: int | None = None,
|
|
96
97
|
log_flush_interval: float | None = None,
|
|
98
|
+
log_level: str = "INFO",
|
|
97
99
|
) -> None:
|
|
98
100
|
"""
|
|
99
101
|
Initialize service client.
|
|
@@ -109,6 +111,7 @@ class ServiceClient:
|
|
|
109
111
|
heartbeat_interval: Heartbeat interval in seconds
|
|
110
112
|
log_batch_size: Number of logs to batch before sending
|
|
111
113
|
log_flush_interval: Max seconds to wait before flushing logs
|
|
114
|
+
log_level: Minimum log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
|
|
112
115
|
"""
|
|
113
116
|
config_kwargs: dict[str, object] = {}
|
|
114
117
|
if api_key:
|
|
@@ -151,11 +154,14 @@ class ServiceClient:
|
|
|
151
154
|
self._resume_event.set() # Start as "not paused" (event is set)
|
|
152
155
|
|
|
153
156
|
# Initialize logger with Rich console + file, cloud handler added on start
|
|
157
|
+
self._log_level = log_level.upper()
|
|
154
158
|
self._logger: UnrealonLogger = get_logger(
|
|
155
159
|
name=self._config.service_name,
|
|
160
|
+
level=self._log_level, # type: ignore[arg-type]
|
|
156
161
|
log_to_cloud=False, # Will be connected after gRPC start
|
|
157
162
|
)
|
|
158
163
|
self._cloud_handler: CloudHandler = CloudHandler()
|
|
164
|
+
self._cloud_handler.setLevel(getattr(logging, self._log_level))
|
|
159
165
|
|
|
160
166
|
@property
|
|
161
167
|
def grpc(self) -> GRPCStreamService:
|
|
@@ -603,10 +609,17 @@ class ServiceClient:
|
|
|
603
609
|
|
|
604
610
|
def _setup_signal_handlers(self) -> None:
|
|
605
611
|
"""Setup graceful shutdown signal handlers."""
|
|
612
|
+
import os
|
|
606
613
|
|
|
607
614
|
def signal_handler(signum: int, _frame: FrameType | None) -> None:
|
|
615
|
+
if self._shutdown_requested:
|
|
616
|
+
# Second signal - force exit immediately
|
|
617
|
+
logger.info("Received signal %d again, forcing exit...", signum)
|
|
618
|
+
os._exit(1)
|
|
608
619
|
logger.info("Received signal %d, requesting shutdown...", signum)
|
|
609
620
|
self._shutdown_requested = True
|
|
621
|
+
# Unblock any waiting threads
|
|
622
|
+
self._resume_event.set()
|
|
610
623
|
|
|
611
624
|
try:
|
|
612
625
|
self._original_sigint = signal.signal(signal.SIGINT, signal_handler)
|
|
@@ -631,6 +644,7 @@ class AsyncServiceClient:
|
|
|
631
644
|
"_grpc",
|
|
632
645
|
"_logger",
|
|
633
646
|
"_cloud_handler",
|
|
647
|
+
"_log_level",
|
|
634
648
|
)
|
|
635
649
|
|
|
636
650
|
def __init__(
|
|
@@ -643,6 +657,7 @@ class AsyncServiceClient:
|
|
|
643
657
|
dev_mode: bool = False,
|
|
644
658
|
source_code: str | None = None,
|
|
645
659
|
description: str | None = None,
|
|
660
|
+
log_level: str = "INFO",
|
|
646
661
|
) -> None:
|
|
647
662
|
"""Initialize async service client."""
|
|
648
663
|
config_kwargs: dict[str, object] = {}
|
|
@@ -674,11 +689,14 @@ class AsyncServiceClient:
|
|
|
674
689
|
self._grpc: GRPCStreamService | None = None
|
|
675
690
|
|
|
676
691
|
# Initialize logger with Rich console + file, cloud handler added on start
|
|
692
|
+
self._log_level = log_level.upper()
|
|
677
693
|
self._logger: UnrealonLogger = get_logger(
|
|
678
694
|
name=self._config.service_name,
|
|
695
|
+
level=self._log_level, # type: ignore[arg-type]
|
|
679
696
|
log_to_cloud=False,
|
|
680
697
|
)
|
|
681
698
|
self._cloud_handler: CloudHandler = CloudHandler()
|
|
699
|
+
self._cloud_handler.setLevel(getattr(logging, self._log_level))
|
|
682
700
|
|
|
683
701
|
@property
|
|
684
702
|
def grpc(self) -> GRPCStreamService:
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
# Unrealon Parsers
|
|
2
|
+
|
|
3
|
+
A comprehensive framework for building data parsers with built-in monitoring, streaming upload, and CLI support.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install unrealon
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For browser-based parsing, you'll also need CMDOP:
|
|
12
|
+
```bash
|
|
13
|
+
pip install cmdop
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Quick Start
|
|
17
|
+
|
|
18
|
+
### API-based Parser
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from unrealon.parsers import BaseAPIParser, Monitor
|
|
22
|
+
|
|
23
|
+
class MyAPIParser(BaseAPIParser):
|
|
24
|
+
SOURCE_CODE = "myparser"
|
|
25
|
+
CURRENCY = "USD"
|
|
26
|
+
|
|
27
|
+
async def fetch_listing_page(self, page: int, limit: int = 0) -> tuple[list[dict], int]:
|
|
28
|
+
"""Fetch one page of listings."""
|
|
29
|
+
url = f"https://api.example.com/items?page={page}"
|
|
30
|
+
data = await self._get_json(url)
|
|
31
|
+
return data.get("items", []), data.get("total", 0)
|
|
32
|
+
|
|
33
|
+
def transform_item(self, item: dict, detail: dict | None = None) -> dict:
|
|
34
|
+
"""Transform raw item to upload format."""
|
|
35
|
+
return {
|
|
36
|
+
"id": item["id"],
|
|
37
|
+
"url": f"https://example.com/item/{item['id']}",
|
|
38
|
+
"text": item["description"],
|
|
39
|
+
"photos": item.get("images", []),
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if __name__ == "__main__":
|
|
43
|
+
MyAPIParser.main(
|
|
44
|
+
api_key="pk_your_production_key",
|
|
45
|
+
dev_api_key="dk_your_development_key",
|
|
46
|
+
)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Browser-based Parser
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from unrealon.parsers import BaseBrowserParser
|
|
53
|
+
|
|
54
|
+
class MyBrowserParser(BaseBrowserParser):
|
|
55
|
+
SOURCE_CODE = "myparser"
|
|
56
|
+
CURRENCY = "USD"
|
|
57
|
+
|
|
58
|
+
def fetch_listing(self, browser, pages: int = 3, limit: int = 0) -> list[dict]:
|
|
59
|
+
"""Fetch listing pages using browser."""
|
|
60
|
+
items = []
|
|
61
|
+
for page in range(1, pages + 1):
|
|
62
|
+
browser.navigate(f"https://example.com/items?page={page}")
|
|
63
|
+
# Extract items from page...
|
|
64
|
+
items.extend(extracted_items)
|
|
65
|
+
return items
|
|
66
|
+
|
|
67
|
+
def fetch_detail(self, browser, url: str) -> dict:
|
|
68
|
+
"""Fetch detail page."""
|
|
69
|
+
browser.navigate(url)
|
|
70
|
+
return {
|
|
71
|
+
"text": browser.get_text("main"),
|
|
72
|
+
"images": browser.get_images("img.gallery"),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
if __name__ == "__main__":
|
|
76
|
+
MyBrowserParser.main(api_key="pk_...")
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Core Components
|
|
80
|
+
|
|
81
|
+
### BaseParser (Abstract)
|
|
82
|
+
|
|
83
|
+
Base class for all parsers. Provides:
|
|
84
|
+
- CLI integration with interactive menu
|
|
85
|
+
- Streaming upload support
|
|
86
|
+
- Local storage backup
|
|
87
|
+
- Monitoring integration
|
|
88
|
+
|
|
89
|
+
**Required attributes:**
|
|
90
|
+
- `SOURCE_CODE: str` - Parser identifier (e.g., "encar", "myparser")
|
|
91
|
+
- `CURRENCY: str` - Currency code (e.g., "USD", "KRW", "EUR")
|
|
92
|
+
|
|
93
|
+
**Required methods:**
|
|
94
|
+
- `run(pages, limit, skip_details)` - Main parsing logic
|
|
95
|
+
- `transform_item(item, detail)` - Convert raw item to upload format
|
|
96
|
+
|
|
97
|
+
### BaseAPIParser
|
|
98
|
+
|
|
99
|
+
For parsers using direct HTTP/API calls. Extends BaseParser with:
|
|
100
|
+
- Built-in `httpx.AsyncClient`
|
|
101
|
+
- Helper methods: `_get_json()`, `_post_json()`
|
|
102
|
+
- Async `run_async()` method
|
|
103
|
+
|
|
104
|
+
**Required methods:**
|
|
105
|
+
- `fetch_listing_page(page, limit)` - Fetch one page, return `(items, total_count)`
|
|
106
|
+
- `transform_item(item, detail)` - Transform item
|
|
107
|
+
|
|
108
|
+
**Optional methods:**
|
|
109
|
+
- `fetch_detail(item)` - Fetch additional details
|
|
110
|
+
- `get_http_headers()` - Custom HTTP headers
|
|
111
|
+
|
|
112
|
+
**Configuration:**
|
|
113
|
+
```python
|
|
114
|
+
class MyParser(BaseAPIParser):
|
|
115
|
+
PAGE_SIZE = 50 # Items per page
|
|
116
|
+
REQUEST_TIMEOUT = 30.0 # HTTP timeout in seconds
|
|
117
|
+
DELAY_BETWEEN_PAGES = 0.5 # Delay between page requests
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### BaseBrowserParser
|
|
121
|
+
|
|
122
|
+
For parsers using browser automation via CMDOP. Extends BaseParser with:
|
|
123
|
+
- CMDOP browser integration
|
|
124
|
+
- Automatic session management
|
|
125
|
+
|
|
126
|
+
**Required methods:**
|
|
127
|
+
- `fetch_listing(browser, pages, limit)` - Fetch listings
|
|
128
|
+
- `transform_item(item, detail)` - Transform item
|
|
129
|
+
|
|
130
|
+
**Optional methods:**
|
|
131
|
+
- `fetch_detail(browser, url)` - Fetch detail page
|
|
132
|
+
|
|
133
|
+
## CLI
|
|
134
|
+
|
|
135
|
+
All parsers get automatic CLI support:
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
# Interactive menu (no arguments)
|
|
139
|
+
python my_parser.py
|
|
140
|
+
|
|
141
|
+
# Production mode
|
|
142
|
+
python my_parser.py --prod --pages 10
|
|
143
|
+
|
|
144
|
+
# Development mode
|
|
145
|
+
python my_parser.py --dev --limit 5
|
|
146
|
+
|
|
147
|
+
# Skip detail fetching
|
|
148
|
+
python my_parser.py --prod --pages 3 --skip-details
|
|
149
|
+
|
|
150
|
+
# Continuous mode (wait for commands)
|
|
151
|
+
python my_parser.py --prod --continuous
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**CLI Options:**
|
|
155
|
+
| Option | Description |
|
|
156
|
+
|--------|-------------|
|
|
157
|
+
| `--pages, -p` | Number of pages to parse (default: 3) |
|
|
158
|
+
| `--limit, -l` | Max items (0 = no limit) |
|
|
159
|
+
| `--skip-details` | Skip fetching detail pages |
|
|
160
|
+
| `--dev` | Use development server |
|
|
161
|
+
| `--prod` | Use production server |
|
|
162
|
+
| `--continuous` | Wait for commands from Unrealon |
|
|
163
|
+
| `--headless/--no-headless` | Browser headless mode |
|
|
164
|
+
|
|
165
|
+
## Monitoring
|
|
166
|
+
|
|
167
|
+
Built-in integration with Unrealon monitoring service.
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from unrealon.parsers import get_monitor, Monitor
|
|
171
|
+
|
|
172
|
+
with get_monitor("myparser", api_key="pk_...", dev_mode=False) as m:
|
|
173
|
+
m.log.info("Starting parser")
|
|
174
|
+
|
|
175
|
+
# Track progress
|
|
176
|
+
m.increment_processed(10)
|
|
177
|
+
m.increment_errors(1)
|
|
178
|
+
|
|
179
|
+
# Status control
|
|
180
|
+
m.set_busy() # Processing
|
|
181
|
+
m.set_idle() # Waiting
|
|
182
|
+
|
|
183
|
+
# Check for interrupts (pause/stop commands)
|
|
184
|
+
m.check_interrupt()
|
|
185
|
+
|
|
186
|
+
# Use runner for automatic interrupt handling
|
|
187
|
+
for item in m.runner.iterate(items):
|
|
188
|
+
process(item)
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
## Streaming Upload
|
|
192
|
+
|
|
193
|
+
Non-blocking upload that runs in a background thread.
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from unrealon.parsers import StreamingUploader
|
|
197
|
+
|
|
198
|
+
def my_upload_func(item: dict) -> tuple[bool, int, int, str | None]:
|
|
199
|
+
"""Upload single item. Returns (success, photos_added, photos_failed, error)."""
|
|
200
|
+
# Your upload logic here
|
|
201
|
+
return (True, 5, 0, None)
|
|
202
|
+
|
|
203
|
+
uploader = StreamingUploader(
|
|
204
|
+
source_code="myparser",
|
|
205
|
+
currency="USD",
|
|
206
|
+
upload_func=my_upload_func,
|
|
207
|
+
logger=monitor.log,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
# Queue items for upload (non-blocking)
|
|
211
|
+
uploader.upload_batch(items, page_num=1)
|
|
212
|
+
uploader.upload_batch(more_items, page_num=2)
|
|
213
|
+
|
|
214
|
+
# Wait for completion and get stats
|
|
215
|
+
stats = uploader.finish()
|
|
216
|
+
print(f"Uploaded: {stats.success}, Failed: {stats.failed}")
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Local Storage
|
|
220
|
+
|
|
221
|
+
Backup parsed data to local JSON files.
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
from unrealon.parsers import ResultStorage
|
|
225
|
+
|
|
226
|
+
storage = ResultStorage("myparser", root_dir="results")
|
|
227
|
+
|
|
228
|
+
# Save item
|
|
229
|
+
storage.save("item-123", {"id": "123", "text": "...", "photos": [...]})
|
|
230
|
+
|
|
231
|
+
# Load item
|
|
232
|
+
data = storage.load("item-123")
|
|
233
|
+
|
|
234
|
+
# Check existence
|
|
235
|
+
if storage.exists("item-123"):
|
|
236
|
+
...
|
|
237
|
+
|
|
238
|
+
# List all IDs
|
|
239
|
+
ids = storage.list_ids()
|
|
240
|
+
|
|
241
|
+
# Get statistics
|
|
242
|
+
stats = storage.get_stats() # {"root": "results/myparser", "count": 150, "size_mb": 2.5}
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
## Utilities
|
|
246
|
+
|
|
247
|
+
### HTML Cleaner
|
|
248
|
+
|
|
249
|
+
Clean HTML and save in multiple formats for analysis.
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
from unrealon.parsers.utils import clean_and_save, ALL_FORMATS
|
|
253
|
+
from pathlib import Path
|
|
254
|
+
|
|
255
|
+
clean_and_save(html, "listing", out_dir=Path("cleaned"))
|
|
256
|
+
|
|
257
|
+
# Saves:
|
|
258
|
+
# - listing_raw.html (original)
|
|
259
|
+
# - listing.html (cleaned DOM)
|
|
260
|
+
# - listing.md (markdown)
|
|
261
|
+
# - listing.aom.yaml (accessibility tree)
|
|
262
|
+
# - listing.xtree.txt (tree structure)
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### OCR Tool
|
|
266
|
+
|
|
267
|
+
Screenshot pages and extract text via OCR.
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
from unrealon.parsers.utils import OCRTool, OCRResult
|
|
271
|
+
|
|
272
|
+
ocr = OCRTool(language_hint="en")
|
|
273
|
+
|
|
274
|
+
# With existing browser session
|
|
275
|
+
result = ocr.extract("https://example.com", browser=browser)
|
|
276
|
+
print(result.text)
|
|
277
|
+
print(result.cost)
|
|
278
|
+
|
|
279
|
+
# Standalone (creates its own browser)
|
|
280
|
+
result = ocr.extract("https://example.com")
|
|
281
|
+
|
|
282
|
+
# From existing image
|
|
283
|
+
result = ocr.extract_from_file(Path("screenshot.png"))
|
|
284
|
+
result = ocr.extract_from_bytes(png_bytes)
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### Telegram Notifications
|
|
288
|
+
|
|
289
|
+
Send parser status updates to Telegram.
|
|
290
|
+
|
|
291
|
+
```python
|
|
292
|
+
from unrealon.parsers.utils import ParserNotifier
|
|
293
|
+
|
|
294
|
+
notifier = ParserNotifier(
|
|
295
|
+
source_code="myparser",
|
|
296
|
+
bot_token="123:ABC...",
|
|
297
|
+
chat_id="-123456",
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
notifier.started(pages=10)
|
|
301
|
+
notifier.progress(50, 100, photos=250)
|
|
302
|
+
notifier.completed(items=100, success=98, failed=2, duration="00:05:23")
|
|
303
|
+
notifier.warning("Rate limited, slowing down")
|
|
304
|
+
notifier.failed("Connection timeout", url="https://...")
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## Custom Uploader
|
|
308
|
+
|
|
309
|
+
Create your own uploader for specific APIs:
|
|
310
|
+
|
|
311
|
+
```python
|
|
312
|
+
from unrealon.parsers import StreamingUploader, Monitor
|
|
313
|
+
|
|
314
|
+
def create_my_uploader(monitor: Monitor, mode: str) -> StreamingUploader:
|
|
315
|
+
"""Create uploader for my API."""
|
|
316
|
+
|
|
317
|
+
def upload_item(item: dict) -> tuple[bool, int, int, str | None]:
|
|
318
|
+
# Call your API here
|
|
319
|
+
response = my_api.upload(item)
|
|
320
|
+
if response.ok:
|
|
321
|
+
return (True, response.photos_added, 0, None)
|
|
322
|
+
else:
|
|
323
|
+
return (False, 0, 0, response.error)
|
|
324
|
+
|
|
325
|
+
return StreamingUploader(
|
|
326
|
+
source_code="myparser",
|
|
327
|
+
currency="USD",
|
|
328
|
+
upload_func=upload_item,
|
|
329
|
+
logger=monitor.log,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
# Use in parser
|
|
333
|
+
if __name__ == "__main__":
|
|
334
|
+
MyParser.main(
|
|
335
|
+
api_key="pk_...",
|
|
336
|
+
create_uploader=create_my_uploader,
|
|
337
|
+
)
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
## Configuration
|
|
341
|
+
|
|
342
|
+
### Parser Class Attributes
|
|
343
|
+
|
|
344
|
+
```python
|
|
345
|
+
class MyParser(BaseAPIParser):
|
|
346
|
+
# Required
|
|
347
|
+
SOURCE_CODE = "myparser"
|
|
348
|
+
CURRENCY = "USD"
|
|
349
|
+
|
|
350
|
+
# Optional (API parser)
|
|
351
|
+
PAGE_SIZE = 20
|
|
352
|
+
REQUEST_TIMEOUT = 30.0
|
|
353
|
+
DELAY_BETWEEN_PAGES = 0.3
|
|
354
|
+
|
|
355
|
+
# Optional (base parser)
|
|
356
|
+
UPLOAD_BATCH_SIZE = 20
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
### main() Arguments
|
|
360
|
+
|
|
361
|
+
```python
|
|
362
|
+
MyParser.main(
|
|
363
|
+
description="My Parser", # CLI description
|
|
364
|
+
api_key="pk_...", # Production Unrealon API key
|
|
365
|
+
dev_api_key="dk_...", # Development Unrealon API key
|
|
366
|
+
service_name_prefix="myproject-", # Prefix for service registration
|
|
367
|
+
create_uploader=my_uploader_factory, # Custom uploader factory
|
|
368
|
+
)
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
## Error Handling
|
|
372
|
+
|
|
373
|
+
Parsers support graceful interruption:
|
|
374
|
+
|
|
375
|
+
```python
|
|
376
|
+
from unrealon.exceptions import StopInterrupt, PauseInterrupt
|
|
377
|
+
|
|
378
|
+
try:
|
|
379
|
+
for item in items:
|
|
380
|
+
monitor.check_interrupt() # Raises if stop/pause requested
|
|
381
|
+
process(item)
|
|
382
|
+
except StopInterrupt:
|
|
383
|
+
print("Parser stopped by command")
|
|
384
|
+
except PauseInterrupt:
|
|
385
|
+
print("Parser paused")
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
On Ctrl+C, parsers abort immediately without waiting for pending uploads.
|
|
389
|
+
|
|
390
|
+
## Dependencies
|
|
391
|
+
|
|
392
|
+
Core:
|
|
393
|
+
- `httpx` - HTTP client
|
|
394
|
+
- `rich` - Console output
|
|
395
|
+
- `click` - CLI framework
|
|
396
|
+
|
|
397
|
+
Optional:
|
|
398
|
+
- `cmdop` - Browser automation (for BaseBrowserParser)
|
|
399
|
+
- `sdkrouter` - OCR and other tools
|
|
400
|
+
- `sdkrouter-tools` - HTML cleaner, Telegram sender
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Unrealon Parsers - base classes for building data parsers.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from unrealon.parsers import BaseAPIParser, BaseBrowserParser
|
|
6
|
+
|
|
7
|
+
class MyParser(BaseAPIParser):
|
|
8
|
+
SOURCE_CODE = "myparser"
|
|
9
|
+
CURRENCY = "USD"
|
|
10
|
+
|
|
11
|
+
async def fetch_listing_page(self, page: int, limit: int = 0):
|
|
12
|
+
...
|
|
13
|
+
|
|
14
|
+
def transform_item(self, item: dict, detail: dict | None = None):
|
|
15
|
+
...
|
|
16
|
+
|
|
17
|
+
if __name__ == "__main__":
|
|
18
|
+
MyParser.main()
|
|
19
|
+
"""
|
|
20
|
+
from .api_parser import BaseAPIParser
|
|
21
|
+
from .base import BaseParser
|
|
22
|
+
from .browser_parser import BaseBrowserParser
|
|
23
|
+
from .cli import CLIConfig, cli_options, create_parser_cli
|
|
24
|
+
from .monitor import Monitor, get_monitor
|
|
25
|
+
from .storage import ResultStorage
|
|
26
|
+
from .upload import StreamingStats, StreamingUploader
|
|
27
|
+
from .utils import ALL_FORMATS, OCRResult, OCRTool, ParserNotifier, clean_and_save
|
|
28
|
+
|
|
29
|
+
__all__ = [
|
|
30
|
+
# Base classes
|
|
31
|
+
"BaseParser",
|
|
32
|
+
"BaseAPIParser",
|
|
33
|
+
"BaseBrowserParser",
|
|
34
|
+
# CLI
|
|
35
|
+
"CLIConfig",
|
|
36
|
+
"cli_options",
|
|
37
|
+
"create_parser_cli",
|
|
38
|
+
# Upload & Storage
|
|
39
|
+
"StreamingUploader",
|
|
40
|
+
"StreamingStats",
|
|
41
|
+
"ResultStorage",
|
|
42
|
+
# Monitoring
|
|
43
|
+
"Monitor",
|
|
44
|
+
"get_monitor",
|
|
45
|
+
# Utils
|
|
46
|
+
"clean_and_save",
|
|
47
|
+
"ALL_FORMATS",
|
|
48
|
+
"ParserNotifier",
|
|
49
|
+
"OCRTool",
|
|
50
|
+
"OCRResult",
|
|
51
|
+
]
|