watchmen-collector-kernel 16.4.6__py3-none-any.whl → 16.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- watchmen_collector_kernel/common/__init__.py +2 -2
- watchmen_collector_kernel/common/constants.py +16 -0
- watchmen_collector_kernel/model/__init__.py +15 -1
- watchmen_collector_kernel/model/change_data_json.py +28 -0
- watchmen_collector_kernel/model/change_data_record.py +19 -0
- watchmen_collector_kernel/model/collector_model_config.py +12 -0
- watchmen_collector_kernel/model/collector_table_config.py +78 -0
- watchmen_collector_kernel/model/competitive_lock.py +11 -0
- watchmen_collector_kernel/model/condition.py +73 -0
- watchmen_collector_kernel/model/scheduled_task.py +36 -0
- watchmen_collector_kernel/model/trigger_event.py +11 -0
- watchmen_collector_kernel/model/trigger_model.py +8 -0
- watchmen_collector_kernel/model/trigger_table.py +11 -0
- watchmen_collector_kernel/service/__init__.py +8 -0
- watchmen_collector_kernel/service/criteria_builder.py +53 -0
- watchmen_collector_kernel/service/data_capture.py +60 -0
- watchmen_collector_kernel/service/extract_source.py +71 -0
- watchmen_collector_kernel/service/extract_utils.py +70 -0
- watchmen_collector_kernel/service/lock_clean.py +39 -0
- watchmen_collector_kernel/service/lock_helper.py +28 -0
- watchmen_collector_kernel/service/task_housekeeping.py +61 -0
- watchmen_collector_kernel/service/task_service.py +72 -0
- watchmen_collector_kernel/service/trigger_collector.py +147 -0
- watchmen_collector_kernel/storage/__init__.py +17 -0
- watchmen_collector_kernel/storage/change_data_json_service.py +171 -0
- watchmen_collector_kernel/storage/change_data_record_service.py +185 -0
- watchmen_collector_kernel/storage/collector_model_config_service.py +107 -0
- watchmen_collector_kernel/storage/collector_table_config_service.py +168 -0
- watchmen_collector_kernel/storage/competitive_lock_service.py +83 -0
- watchmen_collector_kernel/storage/scheduled_task_service.py +155 -0
- watchmen_collector_kernel/storage/trigger_event_service.py +86 -0
- watchmen_collector_kernel/storage/trigger_model_service.py +96 -0
- watchmen_collector_kernel/storage/trigger_table_service.py +113 -0
- {watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/LICENSE +0 -0
- {watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/METADATA +9 -9
- watchmen_collector_kernel-16.4.8.dist-info/RECORD +38 -0
- {watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/WHEEL +0 -0
- watchmen_collector_kernel/common/settings.py +0 -14
- watchmen_collector_kernel/connector/__init__.py +0 -1
- watchmen_collector_kernel/connector/handler.py +0 -42
- watchmen_collector_kernel/connector/housekeeping.py +0 -58
- watchmen_collector_kernel/connector/s3_connector.py +0 -210
- watchmen_collector_kernel/lock/__init__.py +0 -3
- watchmen_collector_kernel/lock/distributed_lock.py +0 -23
- watchmen_collector_kernel/lock/oss_collector_lock_service.py +0 -127
- watchmen_collector_kernel/lock/unique_key_distributed_lock.py +0 -39
- watchmen_collector_kernel/model/oss_collector_competitive_lock.py +0 -15
- watchmen_collector_kernel-16.4.6.dist-info/RECORD +0 -17
@@ -0,0 +1,113 @@
|
|
1
|
+
from typing import Optional, List
|
2
|
+
|
3
|
+
from watchmen_auth import PrincipalService
|
4
|
+
from watchmen_collector_kernel.model import TriggerTable
|
5
|
+
from watchmen_meta.common import TupleShaper, TupleService
|
6
|
+
from watchmen_meta.common.storage_service import StorableId
|
7
|
+
from watchmen_model.common import Storable, TableTriggerId
|
8
|
+
from watchmen_storage import EntityName, EntityRow, EntityShaper, TransactionalStorageSPI, SnowflakeGenerator, \
|
9
|
+
EntityCriteriaJoint, EntityCriteriaExpression, ColumnNameLiteral, EntitySortColumn
|
10
|
+
|
11
|
+
|
12
|
+
class TriggerTableShaper(EntityShaper):
|
13
|
+
|
14
|
+
def serialize(self, entity: TriggerTable) -> EntityRow:
|
15
|
+
return TupleShaper.serialize_tenant_based(entity, {
|
16
|
+
'table_trigger_id': entity.tableTriggerId,
|
17
|
+
'table_name': entity.tableName,
|
18
|
+
'model_name': entity.modelName,
|
19
|
+
'is_extracted': entity.isExtracted,
|
20
|
+
'data_count': entity.dataCount,
|
21
|
+
'model_trigger_id': entity.modelTriggerId,
|
22
|
+
'event_trigger_id': entity.eventTriggerId
|
23
|
+
})
|
24
|
+
|
25
|
+
def deserialize(self, row: EntityRow) -> TriggerTable:
|
26
|
+
# noinspection PyTypeChecker
|
27
|
+
return TupleShaper.deserialize_tenant_based(row, TriggerTable(
|
28
|
+
tableTriggerId=row.get('table_trigger_id'),
|
29
|
+
tableName=row.get('table_name'),
|
30
|
+
modelName=row.get('model_name'),
|
31
|
+
isExtracted=row.get('is_extracted'),
|
32
|
+
dataCount=row.get('data_count'),
|
33
|
+
modelTriggerId=row.get('model_trigger_id'),
|
34
|
+
eventTriggerId=row.get('event_trigger_id')
|
35
|
+
))
|
36
|
+
|
37
|
+
|
38
|
+
TRIGGER_TABLE_TABLE = 'trigger_table'
|
39
|
+
TRIGGER_TABLE_ENTITY_SHAPER = TriggerTableShaper()
|
40
|
+
|
41
|
+
|
42
|
+
class TriggerTableService(TupleService):
|
43
|
+
|
44
|
+
def should_record_operation(self) -> bool:
|
45
|
+
return False
|
46
|
+
|
47
|
+
def get_entity_name(self) -> EntityName:
|
48
|
+
return TRIGGER_TABLE_TABLE
|
49
|
+
|
50
|
+
def get_entity_shaper(self) -> EntityShaper:
|
51
|
+
return TRIGGER_TABLE_ENTITY_SHAPER
|
52
|
+
|
53
|
+
def get_storable_id_column_name(self) -> EntityName:
|
54
|
+
return 'table_trigger_id'
|
55
|
+
|
56
|
+
def get_storable_id(self, storable: TriggerTable) -> StorableId:
|
57
|
+
# noinspection PyTypeChecker
|
58
|
+
return storable.tableTriggerId
|
59
|
+
|
60
|
+
def set_storable_id(
|
61
|
+
self, storable: TriggerTable, storable_id: TableTriggerId) -> Storable:
|
62
|
+
storable.tableTriggerId = storable_id
|
63
|
+
return storable
|
64
|
+
|
65
|
+
def update_table_trigger(self, trigger: TriggerTable):
|
66
|
+
self.begin_transaction()
|
67
|
+
try:
|
68
|
+
result = self.update(trigger)
|
69
|
+
self.commit_transaction()
|
70
|
+
return result
|
71
|
+
except Exception as e:
|
72
|
+
self.rollback_transaction()
|
73
|
+
raise e
|
74
|
+
|
75
|
+
def find_unfinished(self) -> Optional[List[TriggerTable]]:
|
76
|
+
self.begin_transaction()
|
77
|
+
try:
|
78
|
+
# noinspection PyTypeChecker
|
79
|
+
return self.storage.find_distinct_values(
|
80
|
+
self.get_entity_finder_for_columns(
|
81
|
+
criteria=[EntityCriteriaExpression(left=ColumnNameLiteral(columnName='is_extracted'), right=False)],
|
82
|
+
distinctColumnNames=['table_trigger_id',
|
83
|
+
'tenant_id'],
|
84
|
+
distinctValueOnSingleColumn=False)
|
85
|
+
)
|
86
|
+
finally:
|
87
|
+
self.close_transaction()
|
88
|
+
|
89
|
+
def find_by_id(self, trigger_id: TableTriggerId) -> Optional[TriggerTable]:
|
90
|
+
self.begin_transaction()
|
91
|
+
try:
|
92
|
+
return self.storage.find_by_id(trigger_id, self.get_entity_id_helper())
|
93
|
+
finally:
|
94
|
+
self.close_transaction()
|
95
|
+
|
96
|
+
def find_by_model_trigger_id(self, model_trigger_id: int) -> List[TriggerTable]:
|
97
|
+
self.begin_transaction()
|
98
|
+
try:
|
99
|
+
# noinspection PyTypeChecker
|
100
|
+
return self.storage.find(self.get_entity_finder(
|
101
|
+
criteria=[
|
102
|
+
EntityCriteriaExpression(left=ColumnNameLiteral(columnName='model_trigger_id'), right=model_trigger_id)
|
103
|
+
]
|
104
|
+
))
|
105
|
+
finally:
|
106
|
+
self.close_transaction()
|
107
|
+
|
108
|
+
|
109
|
+
def get_trigger_table_service(storage: TransactionalStorageSPI,
|
110
|
+
snowflake_generator: SnowflakeGenerator,
|
111
|
+
principal_service: PrincipalService
|
112
|
+
) -> TriggerTableService:
|
113
|
+
return TriggerTableService(storage, snowflake_generator, principal_service)
|
{watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/LICENSE
RENAMED
File without changes
|
{watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: watchmen-collector-kernel
|
3
|
-
Version: 16.4.
|
3
|
+
Version: 16.4.8
|
4
4
|
Summary:
|
5
5
|
License: MIT
|
6
6
|
Author: botlikes
|
@@ -18,11 +18,11 @@ Provides-Extra: oracle
|
|
18
18
|
Provides-Extra: oss
|
19
19
|
Provides-Extra: postgresql
|
20
20
|
Provides-Extra: s3
|
21
|
-
Requires-Dist: watchmen-
|
22
|
-
Requires-Dist: watchmen-storage-mongodb (==16.4.
|
23
|
-
Requires-Dist: watchmen-storage-mssql (==16.4.
|
24
|
-
Requires-Dist: watchmen-storage-mysql (==16.4.
|
25
|
-
Requires-Dist: watchmen-storage-oracle (==16.4.
|
26
|
-
Requires-Dist: watchmen-storage-oss (==16.4.
|
27
|
-
Requires-Dist: watchmen-storage-postgresql (==16.4.
|
28
|
-
Requires-Dist: watchmen-storage-s3 (==16.4.
|
21
|
+
Requires-Dist: watchmen-data-kernel (==16.4.8)
|
22
|
+
Requires-Dist: watchmen-storage-mongodb (==16.4.8) ; extra == "mongodb"
|
23
|
+
Requires-Dist: watchmen-storage-mssql (==16.4.8) ; extra == "mssql"
|
24
|
+
Requires-Dist: watchmen-storage-mysql (==16.4.8) ; extra == "mysql"
|
25
|
+
Requires-Dist: watchmen-storage-oracle (==16.4.8) ; extra == "oracle"
|
26
|
+
Requires-Dist: watchmen-storage-oss (==16.4.8) ; extra == "oss"
|
27
|
+
Requires-Dist: watchmen-storage-postgresql (==16.4.8) ; extra == "postgresql"
|
28
|
+
Requires-Dist: watchmen-storage-s3 (==16.4.8) ; extra == "s3"
|
@@ -0,0 +1,38 @@
|
|
1
|
+
watchmen_collector_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
watchmen_collector_kernel/common/__init__.py,sha256=QJHylPJAMoSbSa6EfgiQTLNtkQKSCUJucf7ZG3Nqswg,175
|
3
|
+
watchmen_collector_kernel/common/constants.py,sha256=mIsGBhAjHbmeuVeNYDOKLhiBqG2sSX-0vs8ipEEIo9E,302
|
4
|
+
watchmen_collector_kernel/model/__init__.py,sha256=ZhsKn7a2KW7LaVj87coIRC4Z8wsysE2RXn6v8AmoUsg,554
|
5
|
+
watchmen_collector_kernel/model/change_data_json.py,sha256=-oNIGsvYo8KJJsSFusbGa9kIuFfE0otLnWe-zz9O42A,495
|
6
|
+
watchmen_collector_kernel/model/change_data_record.py,sha256=vadzugcX4nl6G3F-jt7DtuVqjE8vtlasu86DDjpaGGM,330
|
7
|
+
watchmen_collector_kernel/model/collector_model_config.py,sha256=oE0T9I_9uGppttoytvzl7-35WgwlWu7s6awNtsK4Q20,288
|
8
|
+
watchmen_collector_kernel/model/collector_table_config.py,sha256=R80d2rqfVRj_rZ6-mognaLYe26R7r3g4nFR3Wj-4YLY,2143
|
9
|
+
watchmen_collector_kernel/model/competitive_lock.py,sha256=fxDRuzZpsgrZjGeuADS3zRYqQioqWhg72Pr592U_Yvw,264
|
10
|
+
watchmen_collector_kernel/model/condition.py,sha256=jGsjvb5z9XRFW0B55k-iC2_Zle4rl-A5J_6PB6EV6mA,1942
|
11
|
+
watchmen_collector_kernel/model/scheduled_task.py,sha256=Zb6blDfDmxX90vZwAODQLQtyfI_0JTb1aWGi80voAyY,752
|
12
|
+
watchmen_collector_kernel/model/trigger_event.py,sha256=ZjVgWQS_qqILchw4fGFzxn1tJUMx77s2_UpyiBd7azA,251
|
13
|
+
watchmen_collector_kernel/model/trigger_model.py,sha256=NpnIbdax8vQeGYeGcOMpWpjYUhkeZUil--6-yTNpv3k,175
|
14
|
+
watchmen_collector_kernel/model/trigger_table.py,sha256=aPYJlIb-8aX_-O6vZQp3DU58zYtyosYLGFjpL81ijds,229
|
15
|
+
watchmen_collector_kernel/service/__init__.py,sha256=hsg-38u7V3QizNmmuEtOw8gC7QEOopVInLbY97MiwuM,426
|
16
|
+
watchmen_collector_kernel/service/criteria_builder.py,sha256=NAk0yQBn4KARttqEOwWozxIlqkVuGPTQ8n3Zk6gjwcQ,2360
|
17
|
+
watchmen_collector_kernel/service/data_capture.py,sha256=45asoQQlv016NKsPoiUVgaSCOJzUBHDCmWr7XPt-8VI,2873
|
18
|
+
watchmen_collector_kernel/service/extract_source.py,sha256=MYldG4gR6t-2JeVJ6nL5aAb59AjxkQWj3dVwzJKXN9Q,2928
|
19
|
+
watchmen_collector_kernel/service/extract_utils.py,sha256=PIyxFG7U37ZXLYlOdIK-Dh-kBptXPFqk-9-b1LuPL1A,2452
|
20
|
+
watchmen_collector_kernel/service/lock_clean.py,sha256=qeNc8vZtfxXTNpGhGc_ZnUD9iRGK2VXNEpv1VglVWCA,965
|
21
|
+
watchmen_collector_kernel/service/lock_helper.py,sha256=GxqjIsCD-jn5kDICVWS1LlTFCuMpnRSpcrQtPLv5bxs,842
|
22
|
+
watchmen_collector_kernel/service/task_housekeeping.py,sha256=i1lnZ4jo-RLjLQD0Z7HSLQqjXuSqdL3zqHDfFVaNbe8,1673
|
23
|
+
watchmen_collector_kernel/service/task_service.py,sha256=XNc-nudTpk569ibqd5wLpzLGlcHUAf71Acn9_iKuS2Y,3091
|
24
|
+
watchmen_collector_kernel/service/trigger_collector.py,sha256=IP_ILJNMhahBogtvfS1iHDOJAxKCDBDJFXTDi98R11M,6790
|
25
|
+
watchmen_collector_kernel/storage/__init__.py,sha256=sstizR2fcwo4ZhqfiqceTxxS49HTtfExCPMeEaYc9bc,832
|
26
|
+
watchmen_collector_kernel/storage/change_data_json_service.py,sha256=JvaIFWPDzq6CHKNbMaUvbIaXs0iAT4IMUFj-zeZFPcw,7393
|
27
|
+
watchmen_collector_kernel/storage/change_data_record_service.py,sha256=MOju5vTl-ZQrpEfL7DytvB_T_JM0dYyXGAzrhNnzb2Y,7463
|
28
|
+
watchmen_collector_kernel/storage/collector_model_config_service.py,sha256=kWLjmppPpRuN0VptiAEGA38BJ20fFD5Q5gxPZJRbQYs,3830
|
29
|
+
watchmen_collector_kernel/storage/collector_table_config_service.py,sha256=aVZeN5AhzUORHaZQUDo_MlKcX6AkgcHcQeI6UwAoAT0,5659
|
30
|
+
watchmen_collector_kernel/storage/competitive_lock_service.py,sha256=jjxxHb4Dr0DIDX_g26wtCa8vGLGLnc4BJQGl0d3J31M,2534
|
31
|
+
watchmen_collector_kernel/storage/scheduled_task_service.py,sha256=ROHYW-pTQaL8kWkLgk9h18XsxfKGjsReMoBY0hoD4SM,5340
|
32
|
+
watchmen_collector_kernel/storage/trigger_event_service.py,sha256=DKSCQ0sYSH3uAtGmokvncD9FIYvjoVsk7QtqdRMQGhM,3099
|
33
|
+
watchmen_collector_kernel/storage/trigger_model_service.py,sha256=0-_YcL5FNGHy2MKn8vTYrWFaYOPW_u4iKQRG00AOfCI,3219
|
34
|
+
watchmen_collector_kernel/storage/trigger_table_service.py,sha256=RHiFJaT3UQmCpz-gxnfsoBPV7lS8JM1i9DCQn3BnfA4,3779
|
35
|
+
watchmen_collector_kernel-16.4.8.dist-info/LICENSE,sha256=iuuG7ErblOdcEZi5u89VXS0VIUiTb4flerGp_PAS9E4,1061
|
36
|
+
watchmen_collector_kernel-16.4.8.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
|
37
|
+
watchmen_collector_kernel-16.4.8.dist-info/METADATA,sha256=7-zGOrH90-yWi6UZGqtvw4qbSvOFs6a934l9YrQoLVM,1135
|
38
|
+
watchmen_collector_kernel-16.4.8.dist-info/RECORD,,
|
{watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/WHEEL
RENAMED
File without changes
|
@@ -1,14 +0,0 @@
|
|
1
|
-
from watchmen_model.common import SettingsModel
|
2
|
-
|
3
|
-
|
4
|
-
class S3CollectorSettings(SettingsModel):
|
5
|
-
access_key_id: str
|
6
|
-
secret_access_key: str
|
7
|
-
bucket_name: str
|
8
|
-
region: str
|
9
|
-
token: str
|
10
|
-
tenant_id: int
|
11
|
-
consume_prefix: str
|
12
|
-
dead_prefix: str
|
13
|
-
max_keys: int = 10
|
14
|
-
clean_task_interval: int = 3600
|
@@ -1 +0,0 @@
|
|
1
|
-
from .s3_connector import init_s3_collector
|
@@ -1,42 +0,0 @@
|
|
1
|
-
|
2
|
-
from watchmen_data_kernel.storage import TopicTrigger
|
3
|
-
from watchmen_meta.common import ask_snowflake_generator, ask_super_admin
|
4
|
-
|
5
|
-
from watchmen_model.pipeline_kernel import PipelineTriggerData
|
6
|
-
from watchmen_pipeline_kernel.pipeline import create_monitor_log_pipeline_invoker, PipelineTrigger
|
7
|
-
|
8
|
-
|
9
|
-
from watchmen_data_kernel.meta import TopicService
|
10
|
-
from watchmen_data_kernel.service import ask_topic_data_service, ask_topic_storage
|
11
|
-
|
12
|
-
|
13
|
-
async def handle_trigger_data(trigger_data: PipelineTriggerData, topic_trigger: TopicTrigger) -> None:
|
14
|
-
# use super admin
|
15
|
-
principal_service = ask_super_admin()
|
16
|
-
# change the tenant_id
|
17
|
-
principal_service.tenantId = trigger_data.tenantId
|
18
|
-
schema = TopicService(principal_service).find_schema_by_name(trigger_data.code, trigger_data.tenantId)
|
19
|
-
trace_id = str(ask_snowflake_generator().next_id())
|
20
|
-
await PipelineTrigger(
|
21
|
-
trigger_topic_schema=schema,
|
22
|
-
trigger_type=trigger_data.triggerType,
|
23
|
-
trigger_data=trigger_data.data,
|
24
|
-
trace_id=trace_id,
|
25
|
-
principal_service=principal_service,
|
26
|
-
asynchronized=False,
|
27
|
-
handle_monitor_log=create_monitor_log_pipeline_invoker(trace_id, principal_service)
|
28
|
-
).start(topic_trigger)
|
29
|
-
|
30
|
-
|
31
|
-
def save_topic_data(trigger_data: PipelineTriggerData) -> TopicTrigger:
|
32
|
-
# use super admin
|
33
|
-
principal_service = ask_super_admin()
|
34
|
-
# change the tenant_id
|
35
|
-
principal_service.tenantId = trigger_data.tenantId
|
36
|
-
schema = TopicService(principal_service).find_schema_by_name(trigger_data.code, trigger_data.tenantId)
|
37
|
-
if schema is None:
|
38
|
-
raise Exception("schema is not not found. %s, %s", trigger_data.code, trigger_data.tenantId)
|
39
|
-
data = schema.prepare_data(trigger_data.data, principal_service)
|
40
|
-
storage = ask_topic_storage(schema, principal_service)
|
41
|
-
service = ask_topic_data_service(schema, storage, principal_service)
|
42
|
-
return service.trigger_by_insert(data)
|
@@ -1,58 +0,0 @@
|
|
1
|
-
from datetime import datetime, date, timedelta
|
2
|
-
from logging import getLogger
|
3
|
-
from threading import Thread
|
4
|
-
from typing import List
|
5
|
-
|
6
|
-
from watchmen_collector_kernel.common import S3CollectorSettings
|
7
|
-
from watchmen_collector_kernel.lock import get_oss_collector_lock_service
|
8
|
-
from watchmen_collector_kernel.model import OSSCollectorCompetitiveLock
|
9
|
-
from time import sleep
|
10
|
-
|
11
|
-
|
12
|
-
class CleanTask:
|
13
|
-
|
14
|
-
def __init__(self, settings: S3CollectorSettings):
|
15
|
-
self.lock_service = get_oss_collector_lock_service()
|
16
|
-
self.processed_date = []
|
17
|
-
self.cleanTaskInterval = settings.clean_task_interval
|
18
|
-
|
19
|
-
def run(self):
|
20
|
-
try:
|
21
|
-
while True:
|
22
|
-
self.clean_task()
|
23
|
-
sleep(self.cleanTaskInterval)
|
24
|
-
except Exception as e:
|
25
|
-
getLogger(__name__).error(e, exc_info=True, stack_info=True)
|
26
|
-
self.restart()
|
27
|
-
|
28
|
-
def clean_task(self):
|
29
|
-
query_datetime = datetime.now()
|
30
|
-
query_date = query_datetime.date() - timedelta(hours=query_datetime.hour,
|
31
|
-
minutes=query_datetime.minute,
|
32
|
-
seconds=query_datetime.second,
|
33
|
-
microseconds=query_datetime.microsecond)
|
34
|
-
if self.is_processed(query_date):
|
35
|
-
return "Done"
|
36
|
-
else:
|
37
|
-
tasks = self.get_task_list(query_date)
|
38
|
-
for task in tasks:
|
39
|
-
self.lock_service.delete_by_id(task.lockId)
|
40
|
-
self.processed_date.append(query_date.strftime('%Y-%m-%d'))
|
41
|
-
|
42
|
-
def get_task_list(self, clean_date) -> List[OSSCollectorCompetitiveLock]:
|
43
|
-
return self.lock_service.find_completed_task(clean_date)
|
44
|
-
|
45
|
-
def is_processed(self, query_date: date) -> bool:
|
46
|
-
date_str = query_date.strftime('%Y-%m-%d')
|
47
|
-
if date_str in self.processed_date:
|
48
|
-
return True
|
49
|
-
else:
|
50
|
-
return False
|
51
|
-
|
52
|
-
def restart(self):
|
53
|
-
Thread(target=CleanTask.run, args=(self,), daemon=True).start()
|
54
|
-
|
55
|
-
|
56
|
-
def init_task_housekeeping(settings: S3CollectorSettings):
|
57
|
-
cleanTask = CleanTask(settings)
|
58
|
-
Thread(target=CleanTask.run, args=(cleanTask,), daemon=True).start()
|
@@ -1,210 +0,0 @@
|
|
1
|
-
import asyncio
|
2
|
-
from enum import Enum
|
3
|
-
from logging import getLogger
|
4
|
-
from threading import Thread
|
5
|
-
from time import sleep
|
6
|
-
from typing import Optional, Dict, Any
|
7
|
-
|
8
|
-
from watchmen_collector_kernel.common import S3CollectorSettings
|
9
|
-
from watchmen_collector_kernel.lock import get_oss_collector_lock_service, get_unique_key_distributed_lock, \
|
10
|
-
DistributedLock
|
11
|
-
from watchmen_collector_kernel.model import OSSCollectorCompetitiveLock
|
12
|
-
from watchmen_data_kernel.storage import TopicTrigger
|
13
|
-
from watchmen_meta.common import ask_snowflake_generator
|
14
|
-
|
15
|
-
from watchmen_storage_s3 import SimpleStorageService, ObjectContent
|
16
|
-
|
17
|
-
from watchmen_model.pipeline_kernel import PipelineTriggerData
|
18
|
-
from watchmen_model.common import Storable
|
19
|
-
|
20
|
-
from .handler import save_topic_data, handle_trigger_data
|
21
|
-
from .housekeeping import init_task_housekeeping
|
22
|
-
|
23
|
-
logger = getLogger(__name__)
|
24
|
-
|
25
|
-
identifier_delimiter = "~"
|
26
|
-
|
27
|
-
|
28
|
-
class STATUS(str, Enum):
|
29
|
-
CHECK_KEY_FAILED = "CHECK_KEY_FAILED"
|
30
|
-
DEPENDENCY_FAILED = "DEPENDENCY_FAILED"
|
31
|
-
CREATE_TASK_FAILED = "CREATE_TASK_FAILED"
|
32
|
-
EMPTY_PAYLOAD = "EMPTY_PAYLOAD"
|
33
|
-
COMPLETED_TASK = "COMPLETED_TASK"
|
34
|
-
PROCESS_TASK_FAILED = "PROCESS_TASK_FAILED"
|
35
|
-
|
36
|
-
|
37
|
-
def init_s3_collector(settings: S3CollectorSettings):
|
38
|
-
S3Connector(settings).create_connector()
|
39
|
-
init_task_housekeeping(settings)
|
40
|
-
|
41
|
-
|
42
|
-
class Dependency(Storable):
|
43
|
-
model_name: str
|
44
|
-
object_id: str
|
45
|
-
|
46
|
-
|
47
|
-
class S3Connector:
|
48
|
-
|
49
|
-
def __init__(self, settings: S3CollectorSettings):
|
50
|
-
self.simpleStorageService = SimpleStorageService(access_key_id=settings.access_key_id,
|
51
|
-
access_key_secret=settings.secret_access_key,
|
52
|
-
endpoint=settings.region,
|
53
|
-
bucket_name=settings.bucket_name,
|
54
|
-
params=None)
|
55
|
-
self.lock_service = get_oss_collector_lock_service()
|
56
|
-
self.snowflakeGenerator = ask_snowflake_generator()
|
57
|
-
self.token = settings.token
|
58
|
-
self.tenant_id = settings.tenant_id
|
59
|
-
self.consume_prefix = settings.consume_prefix
|
60
|
-
self.dead_prefix = settings.dead_prefix
|
61
|
-
self.maxKeys = settings.max_keys
|
62
|
-
|
63
|
-
def create_connector(self) -> None:
|
64
|
-
Thread(target=S3Connector.run, args=(self,), daemon=True).start()
|
65
|
-
|
66
|
-
def run(self):
|
67
|
-
try:
|
68
|
-
while True:
|
69
|
-
objects = self.simpleStorageService.list_objects(max_keys=self.maxKeys, prefix=self.consume_prefix)
|
70
|
-
logger.info("objects size {}".format(len(objects)))
|
71
|
-
if len(objects) == 0:
|
72
|
-
sleep(5)
|
73
|
-
else:
|
74
|
-
for object_ in objects:
|
75
|
-
result = self.consume(object_)
|
76
|
-
if result == STATUS.CREATE_TASK_FAILED or result == STATUS.DEPENDENCY_FAILED:
|
77
|
-
# logger.info("CREATE_TASK_FAILED or DEPENDENCY_FAILED , key is {}".format(object_.key))
|
78
|
-
continue
|
79
|
-
elif result == STATUS.CHECK_KEY_FAILED or result == STATUS.COMPLETED_TASK or \
|
80
|
-
STATUS.EMPTY_PAYLOAD or result == STATUS.PROCESS_TASK_FAILED:
|
81
|
-
logger.info(
|
82
|
-
"CHECK_KEY_FAILED or COMPLETED_TASK or EMPTY_PAYLOAD or PROCESS_TASK_FAILED, key is {}".format(
|
83
|
-
object_.key))
|
84
|
-
break
|
85
|
-
except Exception as e:
|
86
|
-
logger.error(e, exc_info=True, stack_info=True)
|
87
|
-
sleep(300)
|
88
|
-
self.create_connector()
|
89
|
-
|
90
|
-
def consume(self, object_: ObjectContent) -> str:
|
91
|
-
object_key = self.get_identifier(self.consume_prefix, object_.key)
|
92
|
-
if self.validate_key_pattern(object_key):
|
93
|
-
dependency = self.get_dependency(object_key)
|
94
|
-
if self.check_dependency_finished(dependency):
|
95
|
-
distributed_lock = get_unique_key_distributed_lock(self.get_resource_lock(object_.key),
|
96
|
-
self.lock_service)
|
97
|
-
try:
|
98
|
-
if not self.ask_lock(distributed_lock):
|
99
|
-
return STATUS.CREATE_TASK_FAILED
|
100
|
-
else:
|
101
|
-
payload = self.get_payload(object_.key)
|
102
|
-
if payload:
|
103
|
-
try:
|
104
|
-
trigger_data = PipelineTriggerData(code=self.get_code(object_key), data=payload,
|
105
|
-
tenantId=self.tenant_id)
|
106
|
-
topic_trigger = self.save_data(trigger_data)
|
107
|
-
self.trigger_pipeline(trigger_data, topic_trigger)
|
108
|
-
self.simpleStorageService.delete_object(object_.key)
|
109
|
-
return STATUS.COMPLETED_TASK
|
110
|
-
except Exception as e:
|
111
|
-
logger.error(e, exc_info=True, stack_info=True)
|
112
|
-
self.move_to_dead_queue(object_.key, payload)
|
113
|
-
return STATUS.PROCESS_TASK_FAILED
|
114
|
-
else:
|
115
|
-
self.move_to_dead_queue(object_.key, payload)
|
116
|
-
return STATUS.EMPTY_PAYLOAD
|
117
|
-
finally:
|
118
|
-
self.ask_unlock(distributed_lock)
|
119
|
-
else:
|
120
|
-
return STATUS.DEPENDENCY_FAILED
|
121
|
-
else:
|
122
|
-
distributed_lock = get_unique_key_distributed_lock(self.get_resource_lock(object_.key),
|
123
|
-
self.lock_service)
|
124
|
-
try:
|
125
|
-
if not self.ask_lock(distributed_lock):
|
126
|
-
return STATUS.CREATE_TASK_FAILED
|
127
|
-
else:
|
128
|
-
payload = self.simpleStorageService.get_object(object_.key)
|
129
|
-
self.move_to_dead_queue(object_.key, payload)
|
130
|
-
return STATUS.CHECK_KEY_FAILED
|
131
|
-
finally:
|
132
|
-
self.ask_unlock(distributed_lock)
|
133
|
-
|
134
|
-
def get_payload(self, key: str) -> Dict:
|
135
|
-
return self.simpleStorageService.get_object(key)
|
136
|
-
|
137
|
-
def ask_lock(self, lock: DistributedLock) -> bool:
|
138
|
-
return lock.try_lock_nowait()
|
139
|
-
|
140
|
-
def ask_unlock(self, lock: DistributedLock) -> bool:
|
141
|
-
return lock.unlock()
|
142
|
-
|
143
|
-
def process(self, key: str, code: str, payload: Dict[str, Any] = None):
|
144
|
-
logger.info("start to process %s and %s", code, key)
|
145
|
-
trigger_data = PipelineTriggerData(code=code, data=payload, tenantId=self.tenant_id)
|
146
|
-
result = save_topic_data(trigger_data)
|
147
|
-
asyncio.run(handle_trigger_data(trigger_data, result))
|
148
|
-
|
149
|
-
def save_data(self, trigger_data: PipelineTriggerData) -> TopicTrigger:
|
150
|
-
return save_topic_data(trigger_data)
|
151
|
-
|
152
|
-
def trigger_pipeline(self, trigger_data: PipelineTriggerData, topic_trigger: TopicTrigger):
|
153
|
-
try:
|
154
|
-
asyncio.run(handle_trigger_data(trigger_data, topic_trigger))
|
155
|
-
except Exception as e:
|
156
|
-
logger.error(e, exc_info=True, stack_info=True)
|
157
|
-
|
158
|
-
def get_resource_lock(self, key: str) -> OSSCollectorCompetitiveLock:
|
159
|
-
object_key = self.get_identifier(self.consume_prefix, key)
|
160
|
-
key_parts = object_key.split(identifier_delimiter)
|
161
|
-
return OSSCollectorCompetitiveLock(lockId=self.snowflakeGenerator.next_id(),
|
162
|
-
resourceId=key,
|
163
|
-
modelName=key_parts[1],
|
164
|
-
objectId=key_parts[2],
|
165
|
-
tenantId=self.tenant_id,
|
166
|
-
status=0)
|
167
|
-
|
168
|
-
def get_dependency(self, key: str) -> Optional[Dependency]:
|
169
|
-
key_parts = key.split(identifier_delimiter)
|
170
|
-
if len(key_parts) == 5:
|
171
|
-
return Dependency(model_name=key_parts[3], object_id=key_parts[4])
|
172
|
-
elif len(key_parts) == 3:
|
173
|
-
return Dependency(model_name=key_parts[1], object_id=key_parts[2])
|
174
|
-
else:
|
175
|
-
return None
|
176
|
-
|
177
|
-
def check_dependency_finished(self, dependency: Optional[Dependency]) -> bool:
|
178
|
-
if dependency:
|
179
|
-
result = self.lock_service.find_by_dependency(dependency.model_name, dependency.object_id)
|
180
|
-
if result == 0:
|
181
|
-
return True
|
182
|
-
else:
|
183
|
-
return False
|
184
|
-
else:
|
185
|
-
return True
|
186
|
-
|
187
|
-
def get_code(self, identifier: str) -> str:
|
188
|
-
key_parts = identifier.split(identifier_delimiter)
|
189
|
-
return 'raw_' + key_parts[1].lower()
|
190
|
-
|
191
|
-
def validate_key_pattern(self, identifier: str) -> bool:
|
192
|
-
key_parts = identifier.split(identifier_delimiter)
|
193
|
-
if len(key_parts) == 3:
|
194
|
-
return True
|
195
|
-
elif len(key_parts) == 5:
|
196
|
-
return True
|
197
|
-
else:
|
198
|
-
return False
|
199
|
-
|
200
|
-
def move_to_dead_queue(self, key: str, payload: Optional[Dict]):
|
201
|
-
dead_queue_key = self.generate_dead_file_key(key)
|
202
|
-
self.simpleStorageService.put_object(dead_queue_key, payload)
|
203
|
-
self.simpleStorageService.delete_object(key)
|
204
|
-
|
205
|
-
def generate_dead_file_key(self, key_: str):
|
206
|
-
return self.dead_prefix + self.get_identifier(self.consume_prefix, key_)
|
207
|
-
|
208
|
-
@staticmethod
|
209
|
-
def get_identifier(prefix, key) -> str:
|
210
|
-
return key.removeprefix(prefix)
|
@@ -1,23 +0,0 @@
|
|
1
|
-
from abc import ABC, abstractmethod
|
2
|
-
|
3
|
-
|
4
|
-
class DistributedLock(ABC):
|
5
|
-
|
6
|
-
def __init__(self):
|
7
|
-
pass
|
8
|
-
|
9
|
-
@abstractmethod
|
10
|
-
def lock(self):
|
11
|
-
pass
|
12
|
-
|
13
|
-
@abstractmethod
|
14
|
-
def try_lock(self, timeout: int):
|
15
|
-
pass
|
16
|
-
|
17
|
-
@abstractmethod
|
18
|
-
def try_lock_nowait(self):
|
19
|
-
pass
|
20
|
-
|
21
|
-
@abstractmethod
|
22
|
-
def unlock(self):
|
23
|
-
pass
|
@@ -1,127 +0,0 @@
|
|
1
|
-
from datetime import datetime
|
2
|
-
from typing import List
|
3
|
-
|
4
|
-
from watchmen_meta.common import EntityService, ask_meta_storage
|
5
|
-
from watchmen_meta.common.storage_service import StorableId
|
6
|
-
from watchmen_model.common import Storable, OssCollectorCompetitiveLockId
|
7
|
-
from watchmen_storage import EntityShaper, EntityRow, EntityName, TransactionalStorageSPI, \
|
8
|
-
EntityHelper, EntityIdHelper, EntityFinder, ColumnNameLiteral, EntityCriteriaExpression, Entity
|
9
|
-
from watchmen_collector_kernel.model import OSSCollectorCompetitiveLock
|
10
|
-
|
11
|
-
|
12
|
-
class OSSCollectorCompetitiveLockShaper(EntityShaper):
|
13
|
-
def serialize(self, entity: OSSCollectorCompetitiveLock) -> EntityRow:
|
14
|
-
return {
|
15
|
-
'lock_id': entity.lockId,
|
16
|
-
'resource_id': entity.resourceId,
|
17
|
-
'model_name': entity.modelName,
|
18
|
-
'object_id': entity.objectId,
|
19
|
-
'registered_at': entity.registeredAt,
|
20
|
-
'tenant_id': entity.tenantId,
|
21
|
-
'status': entity.status
|
22
|
-
}
|
23
|
-
|
24
|
-
def deserialize(self, row: EntityRow) -> OSSCollectorCompetitiveLock:
|
25
|
-
return OSSCollectorCompetitiveLock(
|
26
|
-
lockId=row.get('lock_id'),
|
27
|
-
resourceId=row.get('resource_id'),
|
28
|
-
modelName=row.get('model_name'),
|
29
|
-
objectId=row.get('object_id'),
|
30
|
-
registeredAt=row.get('registered_at'),
|
31
|
-
tenantId=row.get('tenant_id'),
|
32
|
-
status=row.get('status')
|
33
|
-
)
|
34
|
-
|
35
|
-
|
36
|
-
OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE = 'oss_collector_competitive_lock'
|
37
|
-
OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER = OSSCollectorCompetitiveLockShaper()
|
38
|
-
|
39
|
-
|
40
|
-
class OssCollectorLockService(EntityService):
|
41
|
-
|
42
|
-
def __init__(self, storage: TransactionalStorageSPI):
|
43
|
-
super().__init__(storage)
|
44
|
-
|
45
|
-
def get_entity_name(self) -> EntityName:
|
46
|
-
return OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE
|
47
|
-
|
48
|
-
def get_entity_shaper(self) -> EntityShaper:
|
49
|
-
return OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER
|
50
|
-
|
51
|
-
def get_storable_id_column_name(self) -> EntityName:
|
52
|
-
return 'lock_id'
|
53
|
-
|
54
|
-
def get_storable_id(self, storable: OSSCollectorCompetitiveLock) -> StorableId:
|
55
|
-
return storable.lockId
|
56
|
-
|
57
|
-
def set_storable_id(self, storable: OSSCollectorCompetitiveLock,
|
58
|
-
storable_id: OssCollectorCompetitiveLockId) -> Storable:
|
59
|
-
storable.lockId = storable_id
|
60
|
-
return storable
|
61
|
-
|
62
|
-
def insert_one(self, lock: OSSCollectorCompetitiveLock):
|
63
|
-
try:
|
64
|
-
self.storage.connect()
|
65
|
-
self.storage.insert_one(
|
66
|
-
lock,
|
67
|
-
EntityHelper(name=OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE, shaper=OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER)
|
68
|
-
)
|
69
|
-
finally:
|
70
|
-
self.storage.close()
|
71
|
-
|
72
|
-
def delete_by_id(self, id_: OssCollectorCompetitiveLockId):
|
73
|
-
try:
|
74
|
-
self.storage.connect()
|
75
|
-
self.storage.delete_by_id(id_,
|
76
|
-
EntityIdHelper(idColumnName='lock_id',
|
77
|
-
name=OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE,
|
78
|
-
shaper=OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER)
|
79
|
-
)
|
80
|
-
finally:
|
81
|
-
self.storage.close()
|
82
|
-
|
83
|
-
def update_one(self, one: Entity) -> int:
|
84
|
-
try:
|
85
|
-
self.storage.connect()
|
86
|
-
self.storage.update_one(one,
|
87
|
-
EntityIdHelper(idColumnName='lock_id',
|
88
|
-
name=OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE,
|
89
|
-
shaper=OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER)
|
90
|
-
)
|
91
|
-
finally:
|
92
|
-
self.storage.close()
|
93
|
-
|
94
|
-
def find_by_dependency(self, model_name: str, object_id: str) -> int:
|
95
|
-
try:
|
96
|
-
self.storage.connect()
|
97
|
-
return self.storage.count(EntityFinder(
|
98
|
-
name=self.get_entity_name(),
|
99
|
-
shaper=self.get_entity_shaper(),
|
100
|
-
criteria=[
|
101
|
-
EntityCriteriaExpression(left=ColumnNameLiteral(columnName='model_name'), right=model_name),
|
102
|
-
EntityCriteriaExpression(left=ColumnNameLiteral(columnName='object_id'), right=object_id),
|
103
|
-
EntityCriteriaExpression(left=ColumnNameLiteral(columnName='status'), right=0)
|
104
|
-
]
|
105
|
-
))
|
106
|
-
finally:
|
107
|
-
self.storage.close()
|
108
|
-
|
109
|
-
def find_completed_task(self, query_date: datetime) -> List:
|
110
|
-
try:
|
111
|
-
self.storage.connect()
|
112
|
-
return self.storage.find(EntityFinder(
|
113
|
-
name=self.get_entity_name(),
|
114
|
-
shaper=self.get_entity_shaper(),
|
115
|
-
criteria=[
|
116
|
-
EntityCriteriaExpression(left=ColumnNameLiteral(columnName='registered_at'),
|
117
|
-
operator="less-than",
|
118
|
-
right=query_date),
|
119
|
-
EntityCriteriaExpression(left=ColumnNameLiteral(columnName='status'), right=1)
|
120
|
-
]
|
121
|
-
))
|
122
|
-
finally:
|
123
|
-
self.storage.close()
|
124
|
-
|
125
|
-
|
126
|
-
def get_oss_collector_lock_service() -> OssCollectorLockService:
|
127
|
-
return OssCollectorLockService(ask_meta_storage())
|