unrealon 0.1.19__tar.gz → 0.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. {unrealon-0.1.19 → unrealon-0.1.20}/PKG-INFO +5 -1
  2. {unrealon-0.1.19 → unrealon-0.1.20}/pyproject.toml +11 -1
  3. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/__init__.py +3 -1
  4. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_client.py +18 -0
  5. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_version.py +1 -1
  6. unrealon-0.1.20/src/unrealon/parsers/README.md +400 -0
  7. unrealon-0.1.20/src/unrealon/parsers/__init__.py +51 -0
  8. unrealon-0.1.20/src/unrealon/parsers/api_parser.py +281 -0
  9. unrealon-0.1.20/src/unrealon/parsers/base.py +230 -0
  10. unrealon-0.1.20/src/unrealon/parsers/browser_parser.py +313 -0
  11. unrealon-0.1.20/src/unrealon/parsers/cli.py +388 -0
  12. unrealon-0.1.20/src/unrealon/parsers/monitor.py +147 -0
  13. unrealon-0.1.20/src/unrealon/parsers/storage.py +104 -0
  14. unrealon-0.1.20/src/unrealon/parsers/upload.py +311 -0
  15. unrealon-0.1.20/src/unrealon/parsers/utils/__init__.py +18 -0
  16. unrealon-0.1.20/src/unrealon/parsers/utils/cleaner.py +93 -0
  17. unrealon-0.1.20/src/unrealon/parsers/utils/notify.py +135 -0
  18. unrealon-0.1.20/src/unrealon/parsers/utils/ocr.py +186 -0
  19. {unrealon-0.1.19 → unrealon-0.1.20}/.gitignore +0 -0
  20. {unrealon-0.1.19 → unrealon-0.1.20}/README.md +0 -0
  21. {unrealon-0.1.19 → unrealon-0.1.20}/examples/README.md +0 -0
  22. {unrealon-0.1.19 → unrealon-0.1.20}/github/README.md +0 -0
  23. {unrealon-0.1.19 → unrealon-0.1.20}/github/pyproject.toml +0 -0
  24. {unrealon-0.1.19 → unrealon-0.1.20}/github/unrealon/_api/generated/services/pyproject.toml +0 -0
  25. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/__init__.py +0 -0
  26. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/client.py +0 -0
  27. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/__init__.py +0 -0
  28. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/__init__.py +0 -0
  29. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/client.py +0 -0
  30. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/enums.py +0 -0
  31. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/helpers/__init__.py +0 -0
  32. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/helpers/logger.py +0 -0
  33. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/helpers/retry.py +0 -0
  34. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/pyproject.toml +0 -0
  35. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/__init__.py +0 -0
  36. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/client.py +0 -0
  37. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/models.py +0 -0
  38. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__api_keys/sync_client.py +0 -0
  39. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/__init__.py +0 -0
  40. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/client.py +0 -0
  41. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/models.py +0 -0
  42. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_control/sync_client.py +0 -0
  43. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/__init__.py +0 -0
  44. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/client.py +0 -0
  45. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/models.py +0 -0
  46. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__process_jobs/sync_client.py +0 -0
  47. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/__init__.py +0 -0
  48. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/client.py +0 -0
  49. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/models.py +0 -0
  50. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_events/sync_client.py +0 -0
  51. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/__init__.py +0 -0
  52. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/client.py +0 -0
  53. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/models.py +0 -0
  54. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedule_runs/sync_client.py +0 -0
  55. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/__init__.py +0 -0
  56. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/client.py +0 -0
  57. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/models.py +0 -0
  58. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__schedules/sync_client.py +0 -0
  59. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/__init__.py +0 -0
  60. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/client.py +0 -0
  61. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/models.py +0 -0
  62. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_commands/sync_client.py +0 -0
  63. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/__init__.py +0 -0
  64. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/client.py +0 -0
  65. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/models.py +0 -0
  66. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_control/sync_client.py +0 -0
  67. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/__init__.py +0 -0
  68. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/client.py +0 -0
  69. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/models.py +0 -0
  70. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_logs/sync_client.py +0 -0
  71. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/__init__.py +0 -0
  72. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/client.py +0 -0
  73. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/models.py +0 -0
  74. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__service_sdk/sync_client.py +0 -0
  75. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/__init__.py +0 -0
  76. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/client.py +0 -0
  77. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/models.py +0 -0
  78. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/services__api__services/sync_client.py +0 -0
  79. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_api/generated/services/sync_client.py +0 -0
  80. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_config.py +0 -0
  81. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/_constants.py +0 -0
  82. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/__init__.py +0 -0
  83. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/lifecycle.py +0 -0
  84. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/signals.py +0 -0
  85. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/core/state.py +0 -0
  86. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/exceptions/__init__.py +0 -0
  87. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/exceptions/handlers.py +0 -0
  88. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/exceptions/types.py +0 -0
  89. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/__init__.py +0 -0
  90. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_config.py +0 -0
  91. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_connection.py +0 -0
  92. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_constants.py +0 -0
  93. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_handlers.py +0 -0
  94. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_logging.py +0 -0
  95. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_messaging.py +0 -0
  96. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_metrics.py +0 -0
  97. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_reconnect.py +0 -0
  98. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_registration.py +0 -0
  99. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/_types.py +0 -0
  100. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/circuit_breaker.py +0 -0
  101. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/__init__.py +0 -0
  102. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/unrealon_pb2.py +0 -0
  103. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/unrealon_pb2.pyi +0 -0
  104. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/generated/unrealon_pb2_grpc.py +0 -0
  105. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/grpc/stream_service.py +0 -0
  106. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/__init__.py +0 -0
  107. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_config.py +0 -0
  108. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_formatters.py +0 -0
  109. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_handlers.py +0 -0
  110. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_logger.py +0 -0
  111. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/logging/_project.py +0 -0
  112. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/models/__init__.py +0 -0
  113. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/runner.py +0 -0
  114. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/scheduling/__init__.py +0 -0
  115. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/scheduling/_manager.py +0 -0
  116. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/scheduling/_models.py +0 -0
  117. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/services/__init__.py +0 -0
  118. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/utils/__init__.py +0 -0
  119. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/utils/metrics.py +0 -0
  120. {unrealon-0.1.19 → unrealon-0.1.20}/src/unrealon/utils/system.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unrealon
3
- Version: 0.1.19
3
+ Version: 0.1.20
4
4
  Summary: Unrealon SDK - Service management for Django backend (registration, heartbeat, logging, commands)
5
5
  Project-URL: Homepage, https://github.com/markolofsen/unrealon-sdk
6
6
  Project-URL: Documentation, https://unrealon.com
@@ -17,6 +17,8 @@ Classifier: Programming Language :: Python :: 3.11
17
17
  Classifier: Programming Language :: Python :: 3.12
18
18
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
19
  Requires-Python: >=3.10
20
+ Requires-Dist: click>=8.1.0
21
+ Requires-Dist: cmdop
20
22
  Requires-Dist: croniter<7.0.0,>=6.0.0
21
23
  Requires-Dist: grpcio-tools<2.0.0,>=1.76.0
22
24
  Requires-Dist: grpcio<2.0.0,>=1.76.0
@@ -26,6 +28,8 @@ Requires-Dist: psutil>=6.0.0
26
28
  Requires-Dist: pydantic-settings>=2.7.0
27
29
  Requires-Dist: pydantic<3.0.0,>=2.10.0
28
30
  Requires-Dist: rich<15.0.0,>=14.3.1
31
+ Requires-Dist: sdkrouter
32
+ Requires-Dist: sdkrouter-tools
29
33
  Requires-Dist: tenacity>=9.1.0
30
34
  Provides-Extra: dev
31
35
  Requires-Dist: build>=1.2.0; extra == 'dev'
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "unrealon"
7
- version = "0.1.19"
7
+ version = "0.1.20"
8
8
  description = "Unrealon SDK - Service management for Django backend (registration, heartbeat, logging, commands)"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -35,6 +35,10 @@ dependencies = [
35
35
  "protobuf (>=6.33.5,<7.0.0)",
36
36
  "rich (>=14.3.1,<15.0.0)",
37
37
  "croniter (>=6.0.0,<7.0.0)",
38
+ "click>=8.1.0",
39
+ "cmdop",
40
+ "sdkrouter",
41
+ "sdkrouter-tools",
38
42
  ]
39
43
 
40
44
  [project.optional-dependencies]
@@ -93,3 +97,9 @@ strict = true
93
97
  asyncio_mode = "auto"
94
98
  testpaths = ["tests"]
95
99
  pythonpath = ["src"]
100
+
101
+ # Local development dependencies (for parsers module)
102
+ [tool.uv.sources]
103
+ cmdop = { path = "../../../../../@projects/cmdop/projects/software/cmdop_sdk/libs/sdk_python", editable = true }
104
+ sdkrouter = { path = "../../../../../@projects/sdkrouter/solution/packages/sdkrouter_py", editable = true }
105
+ sdkrouter-tools = { path = "../../../../../@projects/sdkrouter/solution/packages/sdkrouter_tools_py", editable = true }
@@ -27,7 +27,6 @@ Example:
27
27
 
28
28
  from ._client import AsyncServiceClient, ServiceClient
29
29
  from ._config import UnrealonConfig, configure, get_config, reset_config
30
- from .runner import TaskRunner
31
30
  from ._version import __version__
32
31
  from .core import (
33
32
  LifecycleConfig,
@@ -54,6 +53,7 @@ from .exceptions import (
54
53
  from .grpc import GRPCStreamService
55
54
  from .logging import get_logger
56
55
  from .models import ServiceStatus
56
+ from .runner import TaskRunner
57
57
  from .scheduling import Schedule, ScheduleResult, ScheduleRunStatus
58
58
 
59
59
  __all__ = [
@@ -99,4 +99,6 @@ __all__ = [
99
99
  "ScheduleRunStatus",
100
100
  # Runner
101
101
  "TaskRunner",
102
+ # Parsers submodule (import as: from unrealon.parsers import ...)
103
+ # Note: parsers module requires optional dependencies: pip install unrealon[parsers]
102
104
  ]
@@ -79,6 +79,7 @@ class ServiceClient:
79
79
  "_logger",
80
80
  "_cloud_handler",
81
81
  "_resume_event",
82
+ "_log_level",
82
83
  )
83
84
 
84
85
  def __init__(
@@ -94,6 +95,7 @@ class ServiceClient:
94
95
  heartbeat_interval: int | None = None,
95
96
  log_batch_size: int | None = None,
96
97
  log_flush_interval: float | None = None,
98
+ log_level: str = "INFO",
97
99
  ) -> None:
98
100
  """
99
101
  Initialize service client.
@@ -109,6 +111,7 @@ class ServiceClient:
109
111
  heartbeat_interval: Heartbeat interval in seconds
110
112
  log_batch_size: Number of logs to batch before sending
111
113
  log_flush_interval: Max seconds to wait before flushing logs
114
+ log_level: Minimum log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
112
115
  """
113
116
  config_kwargs: dict[str, object] = {}
114
117
  if api_key:
@@ -151,11 +154,14 @@ class ServiceClient:
151
154
  self._resume_event.set() # Start as "not paused" (event is set)
152
155
 
153
156
  # Initialize logger with Rich console + file, cloud handler added on start
157
+ self._log_level = log_level.upper()
154
158
  self._logger: UnrealonLogger = get_logger(
155
159
  name=self._config.service_name,
160
+ level=self._log_level, # type: ignore[arg-type]
156
161
  log_to_cloud=False, # Will be connected after gRPC start
157
162
  )
158
163
  self._cloud_handler: CloudHandler = CloudHandler()
164
+ self._cloud_handler.setLevel(getattr(logging, self._log_level))
159
165
 
160
166
  @property
161
167
  def grpc(self) -> GRPCStreamService:
@@ -603,10 +609,17 @@ class ServiceClient:
603
609
 
604
610
  def _setup_signal_handlers(self) -> None:
605
611
  """Setup graceful shutdown signal handlers."""
612
+ import os
606
613
 
607
614
  def signal_handler(signum: int, _frame: FrameType | None) -> None:
615
+ if self._shutdown_requested:
616
+ # Second signal - force exit immediately
617
+ logger.info("Received signal %d again, forcing exit...", signum)
618
+ os._exit(1)
608
619
  logger.info("Received signal %d, requesting shutdown...", signum)
609
620
  self._shutdown_requested = True
621
+ # Unblock any waiting threads
622
+ self._resume_event.set()
610
623
 
611
624
  try:
612
625
  self._original_sigint = signal.signal(signal.SIGINT, signal_handler)
@@ -631,6 +644,7 @@ class AsyncServiceClient:
631
644
  "_grpc",
632
645
  "_logger",
633
646
  "_cloud_handler",
647
+ "_log_level",
634
648
  )
635
649
 
636
650
  def __init__(
@@ -643,6 +657,7 @@ class AsyncServiceClient:
643
657
  dev_mode: bool = False,
644
658
  source_code: str | None = None,
645
659
  description: str | None = None,
660
+ log_level: str = "INFO",
646
661
  ) -> None:
647
662
  """Initialize async service client."""
648
663
  config_kwargs: dict[str, object] = {}
@@ -674,11 +689,14 @@ class AsyncServiceClient:
674
689
  self._grpc: GRPCStreamService | None = None
675
690
 
676
691
  # Initialize logger with Rich console + file, cloud handler added on start
692
+ self._log_level = log_level.upper()
677
693
  self._logger: UnrealonLogger = get_logger(
678
694
  name=self._config.service_name,
695
+ level=self._log_level, # type: ignore[arg-type]
679
696
  log_to_cloud=False,
680
697
  )
681
698
  self._cloud_handler: CloudHandler = CloudHandler()
699
+ self._cloud_handler.setLevel(getattr(logging, self._log_level))
682
700
 
683
701
  @property
684
702
  def grpc(self) -> GRPCStreamService:
@@ -1,3 +1,3 @@
1
1
  """Version information."""
2
2
 
3
- __version__ = "0.1.19"
3
+ __version__ = "0.1.20"
@@ -0,0 +1,400 @@
1
+ # Unrealon Parsers
2
+
3
+ A comprehensive framework for building data parsers with built-in monitoring, streaming upload, and CLI support.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install unrealon
9
+ ```
10
+
11
+ For browser-based parsing, you'll also need CMDOP:
12
+ ```bash
13
+ pip install cmdop
14
+ ```
15
+
16
+ ## Quick Start
17
+
18
+ ### API-based Parser
19
+
20
+ ```python
21
+ from unrealon.parsers import BaseAPIParser, Monitor
22
+
23
+ class MyAPIParser(BaseAPIParser):
24
+ SOURCE_CODE = "myparser"
25
+ CURRENCY = "USD"
26
+
27
+ async def fetch_listing_page(self, page: int, limit: int = 0) -> tuple[list[dict], int]:
28
+ """Fetch one page of listings."""
29
+ url = f"https://api.example.com/items?page={page}"
30
+ data = await self._get_json(url)
31
+ return data.get("items", []), data.get("total", 0)
32
+
33
+ def transform_item(self, item: dict, detail: dict | None = None) -> dict:
34
+ """Transform raw item to upload format."""
35
+ return {
36
+ "id": item["id"],
37
+ "url": f"https://example.com/item/{item['id']}",
38
+ "text": item["description"],
39
+ "photos": item.get("images", []),
40
+ }
41
+
42
+ if __name__ == "__main__":
43
+ MyAPIParser.main(
44
+ api_key="pk_your_production_key",
45
+ dev_api_key="dk_your_development_key",
46
+ )
47
+ ```
48
+
49
+ ### Browser-based Parser
50
+
51
+ ```python
52
+ from unrealon.parsers import BaseBrowserParser
53
+
54
+ class MyBrowserParser(BaseBrowserParser):
55
+ SOURCE_CODE = "myparser"
56
+ CURRENCY = "USD"
57
+
58
+ def fetch_listing(self, browser, pages: int = 3, limit: int = 0) -> list[dict]:
59
+ """Fetch listing pages using browser."""
60
+ items = []
61
+ for page in range(1, pages + 1):
62
+ browser.navigate(f"https://example.com/items?page={page}")
63
+ # Extract items from page...
64
+ items.extend(extracted_items)
65
+ return items
66
+
67
+ def fetch_detail(self, browser, url: str) -> dict:
68
+ """Fetch detail page."""
69
+ browser.navigate(url)
70
+ return {
71
+ "text": browser.get_text("main"),
72
+ "images": browser.get_images("img.gallery"),
73
+ }
74
+
75
+ if __name__ == "__main__":
76
+ MyBrowserParser.main(api_key="pk_...")
77
+ ```
78
+
79
+ ## Core Components
80
+
81
+ ### BaseParser (Abstract)
82
+
83
+ Base class for all parsers. Provides:
84
+ - CLI integration with interactive menu
85
+ - Streaming upload support
86
+ - Local storage backup
87
+ - Monitoring integration
88
+
89
+ **Required attributes:**
90
+ - `SOURCE_CODE: str` - Parser identifier (e.g., "encar", "myparser")
91
+ - `CURRENCY: str` - Currency code (e.g., "USD", "KRW", "EUR")
92
+
93
+ **Required methods:**
94
+ - `run(pages, limit, skip_details)` - Main parsing logic
95
+ - `transform_item(item, detail)` - Convert raw item to upload format
96
+
97
+ ### BaseAPIParser
98
+
99
+ For parsers using direct HTTP/API calls. Extends BaseParser with:
100
+ - Built-in `httpx.AsyncClient`
101
+ - Helper methods: `_get_json()`, `_post_json()`
102
+ - Async `run_async()` method
103
+
104
+ **Required methods:**
105
+ - `fetch_listing_page(page, limit)` - Fetch one page, return `(items, total_count)`
106
+ - `transform_item(item, detail)` - Transform item
107
+
108
+ **Optional methods:**
109
+ - `fetch_detail(item)` - Fetch additional details
110
+ - `get_http_headers()` - Custom HTTP headers
111
+
112
+ **Configuration:**
113
+ ```python
114
+ class MyParser(BaseAPIParser):
115
+ PAGE_SIZE = 50 # Items per page
116
+ REQUEST_TIMEOUT = 30.0 # HTTP timeout in seconds
117
+ DELAY_BETWEEN_PAGES = 0.5 # Delay between page requests
118
+ ```
119
+
120
+ ### BaseBrowserParser
121
+
122
+ For parsers using browser automation via CMDOP. Extends BaseParser with:
123
+ - CMDOP browser integration
124
+ - Automatic session management
125
+
126
+ **Required methods:**
127
+ - `fetch_listing(browser, pages, limit)` - Fetch listings
128
+ - `transform_item(item, detail)` - Transform item
129
+
130
+ **Optional methods:**
131
+ - `fetch_detail(browser, url)` - Fetch detail page
132
+
133
+ ## CLI
134
+
135
+ All parsers get automatic CLI support:
136
+
137
+ ```bash
138
+ # Interactive menu (no arguments)
139
+ python my_parser.py
140
+
141
+ # Production mode
142
+ python my_parser.py --prod --pages 10
143
+
144
+ # Development mode
145
+ python my_parser.py --dev --limit 5
146
+
147
+ # Skip detail fetching
148
+ python my_parser.py --prod --pages 3 --skip-details
149
+
150
+ # Continuous mode (wait for commands)
151
+ python my_parser.py --prod --continuous
152
+ ```
153
+
154
+ **CLI Options:**
155
+ | Option | Description |
156
+ |--------|-------------|
157
+ | `--pages, -p` | Number of pages to parse (default: 3) |
158
+ | `--limit, -l` | Max items (0 = no limit) |
159
+ | `--skip-details` | Skip fetching detail pages |
160
+ | `--dev` | Use development server |
161
+ | `--prod` | Use production server |
162
+ | `--continuous` | Wait for commands from Unrealon |
163
+ | `--headless/--no-headless` | Browser headless mode |
164
+
165
+ ## Monitoring
166
+
167
+ Built-in integration with Unrealon monitoring service.
168
+
169
+ ```python
170
+ from unrealon.parsers import get_monitor, Monitor
171
+
172
+ with get_monitor("myparser", api_key="pk_...", dev_mode=False) as m:
173
+ m.log.info("Starting parser")
174
+
175
+ # Track progress
176
+ m.increment_processed(10)
177
+ m.increment_errors(1)
178
+
179
+ # Status control
180
+ m.set_busy() # Processing
181
+ m.set_idle() # Waiting
182
+
183
+ # Check for interrupts (pause/stop commands)
184
+ m.check_interrupt()
185
+
186
+ # Use runner for automatic interrupt handling
187
+ for item in m.runner.iterate(items):
188
+ process(item)
189
+ ```
190
+
191
+ ## Streaming Upload
192
+
193
+ Non-blocking upload that runs in a background thread.
194
+
195
+ ```python
196
+ from unrealon.parsers import StreamingUploader
197
+
198
+ def my_upload_func(item: dict) -> tuple[bool, int, int, str | None]:
199
+ """Upload single item. Returns (success, photos_added, photos_failed, error)."""
200
+ # Your upload logic here
201
+ return (True, 5, 0, None)
202
+
203
+ uploader = StreamingUploader(
204
+ source_code="myparser",
205
+ currency="USD",
206
+ upload_func=my_upload_func,
207
+ logger=monitor.log,
208
+ )
209
+
210
+ # Queue items for upload (non-blocking)
211
+ uploader.upload_batch(items, page_num=1)
212
+ uploader.upload_batch(more_items, page_num=2)
213
+
214
+ # Wait for completion and get stats
215
+ stats = uploader.finish()
216
+ print(f"Uploaded: {stats.success}, Failed: {stats.failed}")
217
+ ```
218
+
219
+ ## Local Storage
220
+
221
+ Backup parsed data to local JSON files.
222
+
223
+ ```python
224
+ from unrealon.parsers import ResultStorage
225
+
226
+ storage = ResultStorage("myparser", root_dir="results")
227
+
228
+ # Save item
229
+ storage.save("item-123", {"id": "123", "text": "...", "photos": [...]})
230
+
231
+ # Load item
232
+ data = storage.load("item-123")
233
+
234
+ # Check existence
235
+ if storage.exists("item-123"):
236
+ ...
237
+
238
+ # List all IDs
239
+ ids = storage.list_ids()
240
+
241
+ # Get statistics
242
+ stats = storage.get_stats() # {"root": "results/myparser", "count": 150, "size_mb": 2.5}
243
+ ```
244
+
245
+ ## Utilities
246
+
247
+ ### HTML Cleaner
248
+
249
+ Clean HTML and save in multiple formats for analysis.
250
+
251
+ ```python
252
+ from unrealon.parsers.utils import clean_and_save, ALL_FORMATS
253
+ from pathlib import Path
254
+
255
+ clean_and_save(html, "listing", out_dir=Path("cleaned"))
256
+
257
+ # Saves:
258
+ # - listing_raw.html (original)
259
+ # - listing.html (cleaned DOM)
260
+ # - listing.md (markdown)
261
+ # - listing.aom.yaml (accessibility tree)
262
+ # - listing.xtree.txt (tree structure)
263
+ ```
264
+
265
+ ### OCR Tool
266
+
267
+ Screenshot pages and extract text via OCR.
268
+
269
+ ```python
270
+ from unrealon.parsers.utils import OCRTool, OCRResult
271
+
272
+ ocr = OCRTool(language_hint="en")
273
+
274
+ # With existing browser session
275
+ result = ocr.extract("https://example.com", browser=browser)
276
+ print(result.text)
277
+ print(result.cost)
278
+
279
+ # Standalone (creates its own browser)
280
+ result = ocr.extract("https://example.com")
281
+
282
+ # From existing image
283
+ result = ocr.extract_from_file(Path("screenshot.png"))
284
+ result = ocr.extract_from_bytes(png_bytes)
285
+ ```
286
+
287
+ ### Telegram Notifications
288
+
289
+ Send parser status updates to Telegram.
290
+
291
+ ```python
292
+ from unrealon.parsers.utils import ParserNotifier
293
+
294
+ notifier = ParserNotifier(
295
+ source_code="myparser",
296
+ bot_token="123:ABC...",
297
+ chat_id="-123456",
298
+ )
299
+
300
+ notifier.started(pages=10)
301
+ notifier.progress(50, 100, photos=250)
302
+ notifier.completed(items=100, success=98, failed=2, duration="00:05:23")
303
+ notifier.warning("Rate limited, slowing down")
304
+ notifier.failed("Connection timeout", url="https://...")
305
+ ```
306
+
307
+ ## Custom Uploader
308
+
309
+ Create your own uploader for specific APIs:
310
+
311
+ ```python
312
+ from unrealon.parsers import StreamingUploader, Monitor
313
+
314
+ def create_my_uploader(monitor: Monitor, mode: str) -> StreamingUploader:
315
+ """Create uploader for my API."""
316
+
317
+ def upload_item(item: dict) -> tuple[bool, int, int, str | None]:
318
+ # Call your API here
319
+ response = my_api.upload(item)
320
+ if response.ok:
321
+ return (True, response.photos_added, 0, None)
322
+ else:
323
+ return (False, 0, 0, response.error)
324
+
325
+ return StreamingUploader(
326
+ source_code="myparser",
327
+ currency="USD",
328
+ upload_func=upload_item,
329
+ logger=monitor.log,
330
+ )
331
+
332
+ # Use in parser
333
+ if __name__ == "__main__":
334
+ MyParser.main(
335
+ api_key="pk_...",
336
+ create_uploader=create_my_uploader,
337
+ )
338
+ ```
339
+
340
+ ## Configuration
341
+
342
+ ### Parser Class Attributes
343
+
344
+ ```python
345
+ class MyParser(BaseAPIParser):
346
+ # Required
347
+ SOURCE_CODE = "myparser"
348
+ CURRENCY = "USD"
349
+
350
+ # Optional (API parser)
351
+ PAGE_SIZE = 20
352
+ REQUEST_TIMEOUT = 30.0
353
+ DELAY_BETWEEN_PAGES = 0.3
354
+
355
+ # Optional (base parser)
356
+ UPLOAD_BATCH_SIZE = 20
357
+ ```
358
+
359
+ ### main() Arguments
360
+
361
+ ```python
362
+ MyParser.main(
363
+ description="My Parser", # CLI description
364
+ api_key="pk_...", # Production Unrealon API key
365
+ dev_api_key="dk_...", # Development Unrealon API key
366
+ service_name_prefix="myproject-", # Prefix for service registration
367
+ create_uploader=my_uploader_factory, # Custom uploader factory
368
+ )
369
+ ```
370
+
371
+ ## Error Handling
372
+
373
+ Parsers support graceful interruption:
374
+
375
+ ```python
376
+ from unrealon.exceptions import StopInterrupt, PauseInterrupt
377
+
378
+ try:
379
+ for item in items:
380
+ monitor.check_interrupt() # Raises if stop/pause requested
381
+ process(item)
382
+ except StopInterrupt:
383
+ print("Parser stopped by command")
384
+ except PauseInterrupt:
385
+ print("Parser paused")
386
+ ```
387
+
388
+ On Ctrl+C, parsers abort immediately without waiting for pending uploads.
389
+
390
+ ## Dependencies
391
+
392
+ Core:
393
+ - `httpx` - HTTP client
394
+ - `rich` - Console output
395
+ - `click` - CLI framework
396
+
397
+ Optional:
398
+ - `cmdop` - Browser automation (for BaseBrowserParser)
399
+ - `sdkrouter` - OCR and other tools
400
+ - `sdkrouter-tools` - HTML cleaner, Telegram sender
@@ -0,0 +1,51 @@
1
+ """
2
+ Unrealon Parsers - base classes for building data parsers.
3
+
4
+ Usage:
5
+ from unrealon.parsers import BaseAPIParser, BaseBrowserParser
6
+
7
+ class MyParser(BaseAPIParser):
8
+ SOURCE_CODE = "myparser"
9
+ CURRENCY = "USD"
10
+
11
+ async def fetch_listing_page(self, page: int, limit: int = 0):
12
+ ...
13
+
14
+ def transform_item(self, item: dict, detail: dict | None = None):
15
+ ...
16
+
17
+ if __name__ == "__main__":
18
+ MyParser.main()
19
+ """
20
+ from .api_parser import BaseAPIParser
21
+ from .base import BaseParser
22
+ from .browser_parser import BaseBrowserParser
23
+ from .cli import CLIConfig, cli_options, create_parser_cli
24
+ from .monitor import Monitor, get_monitor
25
+ from .storage import ResultStorage
26
+ from .upload import StreamingStats, StreamingUploader
27
+ from .utils import ALL_FORMATS, OCRResult, OCRTool, ParserNotifier, clean_and_save
28
+
29
+ __all__ = [
30
+ # Base classes
31
+ "BaseParser",
32
+ "BaseAPIParser",
33
+ "BaseBrowserParser",
34
+ # CLI
35
+ "CLIConfig",
36
+ "cli_options",
37
+ "create_parser_cli",
38
+ # Upload & Storage
39
+ "StreamingUploader",
40
+ "StreamingStats",
41
+ "ResultStorage",
42
+ # Monitoring
43
+ "Monitor",
44
+ "get_monitor",
45
+ # Utils
46
+ "clean_and_save",
47
+ "ALL_FORMATS",
48
+ "ParserNotifier",
49
+ "OCRTool",
50
+ "OCRResult",
51
+ ]