watchmen-collector-kernel 16.4.6__py3-none-any.whl → 16.4.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. watchmen_collector_kernel/common/__init__.py +2 -2
  2. watchmen_collector_kernel/common/constants.py +16 -0
  3. watchmen_collector_kernel/model/__init__.py +15 -1
  4. watchmen_collector_kernel/model/change_data_json.py +28 -0
  5. watchmen_collector_kernel/model/change_data_record.py +19 -0
  6. watchmen_collector_kernel/model/collector_model_config.py +12 -0
  7. watchmen_collector_kernel/model/collector_table_config.py +78 -0
  8. watchmen_collector_kernel/model/competitive_lock.py +11 -0
  9. watchmen_collector_kernel/model/condition.py +73 -0
  10. watchmen_collector_kernel/model/scheduled_task.py +36 -0
  11. watchmen_collector_kernel/model/trigger_event.py +11 -0
  12. watchmen_collector_kernel/model/trigger_model.py +8 -0
  13. watchmen_collector_kernel/model/trigger_table.py +11 -0
  14. watchmen_collector_kernel/service/__init__.py +8 -0
  15. watchmen_collector_kernel/service/criteria_builder.py +53 -0
  16. watchmen_collector_kernel/service/data_capture.py +60 -0
  17. watchmen_collector_kernel/service/extract_source.py +71 -0
  18. watchmen_collector_kernel/service/extract_utils.py +70 -0
  19. watchmen_collector_kernel/service/lock_clean.py +39 -0
  20. watchmen_collector_kernel/service/lock_helper.py +28 -0
  21. watchmen_collector_kernel/service/task_housekeeping.py +61 -0
  22. watchmen_collector_kernel/service/task_service.py +72 -0
  23. watchmen_collector_kernel/service/trigger_collector.py +147 -0
  24. watchmen_collector_kernel/storage/__init__.py +17 -0
  25. watchmen_collector_kernel/storage/change_data_json_service.py +171 -0
  26. watchmen_collector_kernel/storage/change_data_record_service.py +185 -0
  27. watchmen_collector_kernel/storage/collector_model_config_service.py +107 -0
  28. watchmen_collector_kernel/storage/collector_table_config_service.py +168 -0
  29. watchmen_collector_kernel/storage/competitive_lock_service.py +83 -0
  30. watchmen_collector_kernel/storage/scheduled_task_service.py +155 -0
  31. watchmen_collector_kernel/storage/trigger_event_service.py +86 -0
  32. watchmen_collector_kernel/storage/trigger_model_service.py +96 -0
  33. watchmen_collector_kernel/storage/trigger_table_service.py +113 -0
  34. {watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/LICENSE +0 -0
  35. {watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/METADATA +9 -9
  36. watchmen_collector_kernel-16.4.8.dist-info/RECORD +38 -0
  37. {watchmen_collector_kernel-16.4.6.dist-info → watchmen_collector_kernel-16.4.8.dist-info}/WHEEL +0 -0
  38. watchmen_collector_kernel/common/settings.py +0 -14
  39. watchmen_collector_kernel/connector/__init__.py +0 -1
  40. watchmen_collector_kernel/connector/handler.py +0 -42
  41. watchmen_collector_kernel/connector/housekeeping.py +0 -58
  42. watchmen_collector_kernel/connector/s3_connector.py +0 -210
  43. watchmen_collector_kernel/lock/__init__.py +0 -3
  44. watchmen_collector_kernel/lock/distributed_lock.py +0 -23
  45. watchmen_collector_kernel/lock/oss_collector_lock_service.py +0 -127
  46. watchmen_collector_kernel/lock/unique_key_distributed_lock.py +0 -39
  47. watchmen_collector_kernel/model/oss_collector_competitive_lock.py +0 -15
  48. watchmen_collector_kernel-16.4.6.dist-info/RECORD +0 -17
@@ -0,0 +1,113 @@
1
+ from typing import Optional, List
2
+
3
+ from watchmen_auth import PrincipalService
4
+ from watchmen_collector_kernel.model import TriggerTable
5
+ from watchmen_meta.common import TupleShaper, TupleService
6
+ from watchmen_meta.common.storage_service import StorableId
7
+ from watchmen_model.common import Storable, TableTriggerId
8
+ from watchmen_storage import EntityName, EntityRow, EntityShaper, TransactionalStorageSPI, SnowflakeGenerator, \
9
+ EntityCriteriaJoint, EntityCriteriaExpression, ColumnNameLiteral, EntitySortColumn
10
+
11
+
12
+ class TriggerTableShaper(EntityShaper):
13
+
14
+ def serialize(self, entity: TriggerTable) -> EntityRow:
15
+ return TupleShaper.serialize_tenant_based(entity, {
16
+ 'table_trigger_id': entity.tableTriggerId,
17
+ 'table_name': entity.tableName,
18
+ 'model_name': entity.modelName,
19
+ 'is_extracted': entity.isExtracted,
20
+ 'data_count': entity.dataCount,
21
+ 'model_trigger_id': entity.modelTriggerId,
22
+ 'event_trigger_id': entity.eventTriggerId
23
+ })
24
+
25
+ def deserialize(self, row: EntityRow) -> TriggerTable:
26
+ # noinspection PyTypeChecker
27
+ return TupleShaper.deserialize_tenant_based(row, TriggerTable(
28
+ tableTriggerId=row.get('table_trigger_id'),
29
+ tableName=row.get('table_name'),
30
+ modelName=row.get('model_name'),
31
+ isExtracted=row.get('is_extracted'),
32
+ dataCount=row.get('data_count'),
33
+ modelTriggerId=row.get('model_trigger_id'),
34
+ eventTriggerId=row.get('event_trigger_id')
35
+ ))
36
+
37
+
38
+ TRIGGER_TABLE_TABLE = 'trigger_table'
39
+ TRIGGER_TABLE_ENTITY_SHAPER = TriggerTableShaper()
40
+
41
+
42
+ class TriggerTableService(TupleService):
43
+
44
+ def should_record_operation(self) -> bool:
45
+ return False
46
+
47
+ def get_entity_name(self) -> EntityName:
48
+ return TRIGGER_TABLE_TABLE
49
+
50
+ def get_entity_shaper(self) -> EntityShaper:
51
+ return TRIGGER_TABLE_ENTITY_SHAPER
52
+
53
+ def get_storable_id_column_name(self) -> EntityName:
54
+ return 'table_trigger_id'
55
+
56
+ def get_storable_id(self, storable: TriggerTable) -> StorableId:
57
+ # noinspection PyTypeChecker
58
+ return storable.tableTriggerId
59
+
60
+ def set_storable_id(
61
+ self, storable: TriggerTable, storable_id: TableTriggerId) -> Storable:
62
+ storable.tableTriggerId = storable_id
63
+ return storable
64
+
65
+ def update_table_trigger(self, trigger: TriggerTable):
66
+ self.begin_transaction()
67
+ try:
68
+ result = self.update(trigger)
69
+ self.commit_transaction()
70
+ return result
71
+ except Exception as e:
72
+ self.rollback_transaction()
73
+ raise e
74
+
75
+ def find_unfinished(self) -> Optional[List[TriggerTable]]:
76
+ self.begin_transaction()
77
+ try:
78
+ # noinspection PyTypeChecker
79
+ return self.storage.find_distinct_values(
80
+ self.get_entity_finder_for_columns(
81
+ criteria=[EntityCriteriaExpression(left=ColumnNameLiteral(columnName='is_extracted'), right=False)],
82
+ distinctColumnNames=['table_trigger_id',
83
+ 'tenant_id'],
84
+ distinctValueOnSingleColumn=False)
85
+ )
86
+ finally:
87
+ self.close_transaction()
88
+
89
+ def find_by_id(self, trigger_id: TableTriggerId) -> Optional[TriggerTable]:
90
+ self.begin_transaction()
91
+ try:
92
+ return self.storage.find_by_id(trigger_id, self.get_entity_id_helper())
93
+ finally:
94
+ self.close_transaction()
95
+
96
+ def find_by_model_trigger_id(self, model_trigger_id: int) -> List[TriggerTable]:
97
+ self.begin_transaction()
98
+ try:
99
+ # noinspection PyTypeChecker
100
+ return self.storage.find(self.get_entity_finder(
101
+ criteria=[
102
+ EntityCriteriaExpression(left=ColumnNameLiteral(columnName='model_trigger_id'), right=model_trigger_id)
103
+ ]
104
+ ))
105
+ finally:
106
+ self.close_transaction()
107
+
108
+
109
+ def get_trigger_table_service(storage: TransactionalStorageSPI,
110
+ snowflake_generator: SnowflakeGenerator,
111
+ principal_service: PrincipalService
112
+ ) -> TriggerTableService:
113
+ return TriggerTableService(storage, snowflake_generator, principal_service)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: watchmen-collector-kernel
3
- Version: 16.4.6
3
+ Version: 16.4.8
4
4
  Summary:
5
5
  License: MIT
6
6
  Author: botlikes
@@ -18,11 +18,11 @@ Provides-Extra: oracle
18
18
  Provides-Extra: oss
19
19
  Provides-Extra: postgresql
20
20
  Provides-Extra: s3
21
- Requires-Dist: watchmen-pipeline-kernel (==16.4.6)
22
- Requires-Dist: watchmen-storage-mongodb (==16.4.6) ; extra == "mongodb"
23
- Requires-Dist: watchmen-storage-mssql (==16.4.6) ; extra == "mssql"
24
- Requires-Dist: watchmen-storage-mysql (==16.4.6) ; extra == "mysql"
25
- Requires-Dist: watchmen-storage-oracle (==16.4.6) ; extra == "oracle"
26
- Requires-Dist: watchmen-storage-oss (==16.4.6) ; extra == "oss"
27
- Requires-Dist: watchmen-storage-postgresql (==16.4.6) ; extra == "postgresql"
28
- Requires-Dist: watchmen-storage-s3 (==16.4.6) ; extra == "s3"
21
+ Requires-Dist: watchmen-data-kernel (==16.4.8)
22
+ Requires-Dist: watchmen-storage-mongodb (==16.4.8) ; extra == "mongodb"
23
+ Requires-Dist: watchmen-storage-mssql (==16.4.8) ; extra == "mssql"
24
+ Requires-Dist: watchmen-storage-mysql (==16.4.8) ; extra == "mysql"
25
+ Requires-Dist: watchmen-storage-oracle (==16.4.8) ; extra == "oracle"
26
+ Requires-Dist: watchmen-storage-oss (==16.4.8) ; extra == "oss"
27
+ Requires-Dist: watchmen-storage-postgresql (==16.4.8) ; extra == "postgresql"
28
+ Requires-Dist: watchmen-storage-s3 (==16.4.8) ; extra == "s3"
@@ -0,0 +1,38 @@
1
+ watchmen_collector_kernel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ watchmen_collector_kernel/common/__init__.py,sha256=QJHylPJAMoSbSa6EfgiQTLNtkQKSCUJucf7ZG3Nqswg,175
3
+ watchmen_collector_kernel/common/constants.py,sha256=mIsGBhAjHbmeuVeNYDOKLhiBqG2sSX-0vs8ipEEIo9E,302
4
+ watchmen_collector_kernel/model/__init__.py,sha256=ZhsKn7a2KW7LaVj87coIRC4Z8wsysE2RXn6v8AmoUsg,554
5
+ watchmen_collector_kernel/model/change_data_json.py,sha256=-oNIGsvYo8KJJsSFusbGa9kIuFfE0otLnWe-zz9O42A,495
6
+ watchmen_collector_kernel/model/change_data_record.py,sha256=vadzugcX4nl6G3F-jt7DtuVqjE8vtlasu86DDjpaGGM,330
7
+ watchmen_collector_kernel/model/collector_model_config.py,sha256=oE0T9I_9uGppttoytvzl7-35WgwlWu7s6awNtsK4Q20,288
8
+ watchmen_collector_kernel/model/collector_table_config.py,sha256=R80d2rqfVRj_rZ6-mognaLYe26R7r3g4nFR3Wj-4YLY,2143
9
+ watchmen_collector_kernel/model/competitive_lock.py,sha256=fxDRuzZpsgrZjGeuADS3zRYqQioqWhg72Pr592U_Yvw,264
10
+ watchmen_collector_kernel/model/condition.py,sha256=jGsjvb5z9XRFW0B55k-iC2_Zle4rl-A5J_6PB6EV6mA,1942
11
+ watchmen_collector_kernel/model/scheduled_task.py,sha256=Zb6blDfDmxX90vZwAODQLQtyfI_0JTb1aWGi80voAyY,752
12
+ watchmen_collector_kernel/model/trigger_event.py,sha256=ZjVgWQS_qqILchw4fGFzxn1tJUMx77s2_UpyiBd7azA,251
13
+ watchmen_collector_kernel/model/trigger_model.py,sha256=NpnIbdax8vQeGYeGcOMpWpjYUhkeZUil--6-yTNpv3k,175
14
+ watchmen_collector_kernel/model/trigger_table.py,sha256=aPYJlIb-8aX_-O6vZQp3DU58zYtyosYLGFjpL81ijds,229
15
+ watchmen_collector_kernel/service/__init__.py,sha256=hsg-38u7V3QizNmmuEtOw8gC7QEOopVInLbY97MiwuM,426
16
+ watchmen_collector_kernel/service/criteria_builder.py,sha256=NAk0yQBn4KARttqEOwWozxIlqkVuGPTQ8n3Zk6gjwcQ,2360
17
+ watchmen_collector_kernel/service/data_capture.py,sha256=45asoQQlv016NKsPoiUVgaSCOJzUBHDCmWr7XPt-8VI,2873
18
+ watchmen_collector_kernel/service/extract_source.py,sha256=MYldG4gR6t-2JeVJ6nL5aAb59AjxkQWj3dVwzJKXN9Q,2928
19
+ watchmen_collector_kernel/service/extract_utils.py,sha256=PIyxFG7U37ZXLYlOdIK-Dh-kBptXPFqk-9-b1LuPL1A,2452
20
+ watchmen_collector_kernel/service/lock_clean.py,sha256=qeNc8vZtfxXTNpGhGc_ZnUD9iRGK2VXNEpv1VglVWCA,965
21
+ watchmen_collector_kernel/service/lock_helper.py,sha256=GxqjIsCD-jn5kDICVWS1LlTFCuMpnRSpcrQtPLv5bxs,842
22
+ watchmen_collector_kernel/service/task_housekeeping.py,sha256=i1lnZ4jo-RLjLQD0Z7HSLQqjXuSqdL3zqHDfFVaNbe8,1673
23
+ watchmen_collector_kernel/service/task_service.py,sha256=XNc-nudTpk569ibqd5wLpzLGlcHUAf71Acn9_iKuS2Y,3091
24
+ watchmen_collector_kernel/service/trigger_collector.py,sha256=IP_ILJNMhahBogtvfS1iHDOJAxKCDBDJFXTDi98R11M,6790
25
+ watchmen_collector_kernel/storage/__init__.py,sha256=sstizR2fcwo4ZhqfiqceTxxS49HTtfExCPMeEaYc9bc,832
26
+ watchmen_collector_kernel/storage/change_data_json_service.py,sha256=JvaIFWPDzq6CHKNbMaUvbIaXs0iAT4IMUFj-zeZFPcw,7393
27
+ watchmen_collector_kernel/storage/change_data_record_service.py,sha256=MOju5vTl-ZQrpEfL7DytvB_T_JM0dYyXGAzrhNnzb2Y,7463
28
+ watchmen_collector_kernel/storage/collector_model_config_service.py,sha256=kWLjmppPpRuN0VptiAEGA38BJ20fFD5Q5gxPZJRbQYs,3830
29
+ watchmen_collector_kernel/storage/collector_table_config_service.py,sha256=aVZeN5AhzUORHaZQUDo_MlKcX6AkgcHcQeI6UwAoAT0,5659
30
+ watchmen_collector_kernel/storage/competitive_lock_service.py,sha256=jjxxHb4Dr0DIDX_g26wtCa8vGLGLnc4BJQGl0d3J31M,2534
31
+ watchmen_collector_kernel/storage/scheduled_task_service.py,sha256=ROHYW-pTQaL8kWkLgk9h18XsxfKGjsReMoBY0hoD4SM,5340
32
+ watchmen_collector_kernel/storage/trigger_event_service.py,sha256=DKSCQ0sYSH3uAtGmokvncD9FIYvjoVsk7QtqdRMQGhM,3099
33
+ watchmen_collector_kernel/storage/trigger_model_service.py,sha256=0-_YcL5FNGHy2MKn8vTYrWFaYOPW_u4iKQRG00AOfCI,3219
34
+ watchmen_collector_kernel/storage/trigger_table_service.py,sha256=RHiFJaT3UQmCpz-gxnfsoBPV7lS8JM1i9DCQn3BnfA4,3779
35
+ watchmen_collector_kernel-16.4.8.dist-info/LICENSE,sha256=iuuG7ErblOdcEZi5u89VXS0VIUiTb4flerGp_PAS9E4,1061
36
+ watchmen_collector_kernel-16.4.8.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
37
+ watchmen_collector_kernel-16.4.8.dist-info/METADATA,sha256=7-zGOrH90-yWi6UZGqtvw4qbSvOFs6a934l9YrQoLVM,1135
38
+ watchmen_collector_kernel-16.4.8.dist-info/RECORD,,
@@ -1,14 +0,0 @@
1
- from watchmen_model.common import SettingsModel
2
-
3
-
4
- class S3CollectorSettings(SettingsModel):
5
- access_key_id: str
6
- secret_access_key: str
7
- bucket_name: str
8
- region: str
9
- token: str
10
- tenant_id: int
11
- consume_prefix: str
12
- dead_prefix: str
13
- max_keys: int = 10
14
- clean_task_interval: int = 3600
@@ -1 +0,0 @@
1
- from .s3_connector import init_s3_collector
@@ -1,42 +0,0 @@
1
-
2
- from watchmen_data_kernel.storage import TopicTrigger
3
- from watchmen_meta.common import ask_snowflake_generator, ask_super_admin
4
-
5
- from watchmen_model.pipeline_kernel import PipelineTriggerData
6
- from watchmen_pipeline_kernel.pipeline import create_monitor_log_pipeline_invoker, PipelineTrigger
7
-
8
-
9
- from watchmen_data_kernel.meta import TopicService
10
- from watchmen_data_kernel.service import ask_topic_data_service, ask_topic_storage
11
-
12
-
13
- async def handle_trigger_data(trigger_data: PipelineTriggerData, topic_trigger: TopicTrigger) -> None:
14
- # use super admin
15
- principal_service = ask_super_admin()
16
- # change the tenant_id
17
- principal_service.tenantId = trigger_data.tenantId
18
- schema = TopicService(principal_service).find_schema_by_name(trigger_data.code, trigger_data.tenantId)
19
- trace_id = str(ask_snowflake_generator().next_id())
20
- await PipelineTrigger(
21
- trigger_topic_schema=schema,
22
- trigger_type=trigger_data.triggerType,
23
- trigger_data=trigger_data.data,
24
- trace_id=trace_id,
25
- principal_service=principal_service,
26
- asynchronized=False,
27
- handle_monitor_log=create_monitor_log_pipeline_invoker(trace_id, principal_service)
28
- ).start(topic_trigger)
29
-
30
-
31
- def save_topic_data(trigger_data: PipelineTriggerData) -> TopicTrigger:
32
- # use super admin
33
- principal_service = ask_super_admin()
34
- # change the tenant_id
35
- principal_service.tenantId = trigger_data.tenantId
36
- schema = TopicService(principal_service).find_schema_by_name(trigger_data.code, trigger_data.tenantId)
37
- if schema is None:
38
- raise Exception("schema is not not found. %s, %s", trigger_data.code, trigger_data.tenantId)
39
- data = schema.prepare_data(trigger_data.data, principal_service)
40
- storage = ask_topic_storage(schema, principal_service)
41
- service = ask_topic_data_service(schema, storage, principal_service)
42
- return service.trigger_by_insert(data)
@@ -1,58 +0,0 @@
1
- from datetime import datetime, date, timedelta
2
- from logging import getLogger
3
- from threading import Thread
4
- from typing import List
5
-
6
- from watchmen_collector_kernel.common import S3CollectorSettings
7
- from watchmen_collector_kernel.lock import get_oss_collector_lock_service
8
- from watchmen_collector_kernel.model import OSSCollectorCompetitiveLock
9
- from time import sleep
10
-
11
-
12
- class CleanTask:
13
-
14
- def __init__(self, settings: S3CollectorSettings):
15
- self.lock_service = get_oss_collector_lock_service()
16
- self.processed_date = []
17
- self.cleanTaskInterval = settings.clean_task_interval
18
-
19
- def run(self):
20
- try:
21
- while True:
22
- self.clean_task()
23
- sleep(self.cleanTaskInterval)
24
- except Exception as e:
25
- getLogger(__name__).error(e, exc_info=True, stack_info=True)
26
- self.restart()
27
-
28
- def clean_task(self):
29
- query_datetime = datetime.now()
30
- query_date = query_datetime.date() - timedelta(hours=query_datetime.hour,
31
- minutes=query_datetime.minute,
32
- seconds=query_datetime.second,
33
- microseconds=query_datetime.microsecond)
34
- if self.is_processed(query_date):
35
- return "Done"
36
- else:
37
- tasks = self.get_task_list(query_date)
38
- for task in tasks:
39
- self.lock_service.delete_by_id(task.lockId)
40
- self.processed_date.append(query_date.strftime('%Y-%m-%d'))
41
-
42
- def get_task_list(self, clean_date) -> List[OSSCollectorCompetitiveLock]:
43
- return self.lock_service.find_completed_task(clean_date)
44
-
45
- def is_processed(self, query_date: date) -> bool:
46
- date_str = query_date.strftime('%Y-%m-%d')
47
- if date_str in self.processed_date:
48
- return True
49
- else:
50
- return False
51
-
52
- def restart(self):
53
- Thread(target=CleanTask.run, args=(self,), daemon=True).start()
54
-
55
-
56
- def init_task_housekeeping(settings: S3CollectorSettings):
57
- cleanTask = CleanTask(settings)
58
- Thread(target=CleanTask.run, args=(cleanTask,), daemon=True).start()
@@ -1,210 +0,0 @@
1
- import asyncio
2
- from enum import Enum
3
- from logging import getLogger
4
- from threading import Thread
5
- from time import sleep
6
- from typing import Optional, Dict, Any
7
-
8
- from watchmen_collector_kernel.common import S3CollectorSettings
9
- from watchmen_collector_kernel.lock import get_oss_collector_lock_service, get_unique_key_distributed_lock, \
10
- DistributedLock
11
- from watchmen_collector_kernel.model import OSSCollectorCompetitiveLock
12
- from watchmen_data_kernel.storage import TopicTrigger
13
- from watchmen_meta.common import ask_snowflake_generator
14
-
15
- from watchmen_storage_s3 import SimpleStorageService, ObjectContent
16
-
17
- from watchmen_model.pipeline_kernel import PipelineTriggerData
18
- from watchmen_model.common import Storable
19
-
20
- from .handler import save_topic_data, handle_trigger_data
21
- from .housekeeping import init_task_housekeeping
22
-
23
- logger = getLogger(__name__)
24
-
25
- identifier_delimiter = "~"
26
-
27
-
28
- class STATUS(str, Enum):
29
- CHECK_KEY_FAILED = "CHECK_KEY_FAILED"
30
- DEPENDENCY_FAILED = "DEPENDENCY_FAILED"
31
- CREATE_TASK_FAILED = "CREATE_TASK_FAILED"
32
- EMPTY_PAYLOAD = "EMPTY_PAYLOAD"
33
- COMPLETED_TASK = "COMPLETED_TASK"
34
- PROCESS_TASK_FAILED = "PROCESS_TASK_FAILED"
35
-
36
-
37
- def init_s3_collector(settings: S3CollectorSettings):
38
- S3Connector(settings).create_connector()
39
- init_task_housekeeping(settings)
40
-
41
-
42
- class Dependency(Storable):
43
- model_name: str
44
- object_id: str
45
-
46
-
47
- class S3Connector:
48
-
49
- def __init__(self, settings: S3CollectorSettings):
50
- self.simpleStorageService = SimpleStorageService(access_key_id=settings.access_key_id,
51
- access_key_secret=settings.secret_access_key,
52
- endpoint=settings.region,
53
- bucket_name=settings.bucket_name,
54
- params=None)
55
- self.lock_service = get_oss_collector_lock_service()
56
- self.snowflakeGenerator = ask_snowflake_generator()
57
- self.token = settings.token
58
- self.tenant_id = settings.tenant_id
59
- self.consume_prefix = settings.consume_prefix
60
- self.dead_prefix = settings.dead_prefix
61
- self.maxKeys = settings.max_keys
62
-
63
- def create_connector(self) -> None:
64
- Thread(target=S3Connector.run, args=(self,), daemon=True).start()
65
-
66
- def run(self):
67
- try:
68
- while True:
69
- objects = self.simpleStorageService.list_objects(max_keys=self.maxKeys, prefix=self.consume_prefix)
70
- logger.info("objects size {}".format(len(objects)))
71
- if len(objects) == 0:
72
- sleep(5)
73
- else:
74
- for object_ in objects:
75
- result = self.consume(object_)
76
- if result == STATUS.CREATE_TASK_FAILED or result == STATUS.DEPENDENCY_FAILED:
77
- # logger.info("CREATE_TASK_FAILED or DEPENDENCY_FAILED , key is {}".format(object_.key))
78
- continue
79
- elif result == STATUS.CHECK_KEY_FAILED or result == STATUS.COMPLETED_TASK or \
80
- STATUS.EMPTY_PAYLOAD or result == STATUS.PROCESS_TASK_FAILED:
81
- logger.info(
82
- "CHECK_KEY_FAILED or COMPLETED_TASK or EMPTY_PAYLOAD or PROCESS_TASK_FAILED, key is {}".format(
83
- object_.key))
84
- break
85
- except Exception as e:
86
- logger.error(e, exc_info=True, stack_info=True)
87
- sleep(300)
88
- self.create_connector()
89
-
90
- def consume(self, object_: ObjectContent) -> str:
91
- object_key = self.get_identifier(self.consume_prefix, object_.key)
92
- if self.validate_key_pattern(object_key):
93
- dependency = self.get_dependency(object_key)
94
- if self.check_dependency_finished(dependency):
95
- distributed_lock = get_unique_key_distributed_lock(self.get_resource_lock(object_.key),
96
- self.lock_service)
97
- try:
98
- if not self.ask_lock(distributed_lock):
99
- return STATUS.CREATE_TASK_FAILED
100
- else:
101
- payload = self.get_payload(object_.key)
102
- if payload:
103
- try:
104
- trigger_data = PipelineTriggerData(code=self.get_code(object_key), data=payload,
105
- tenantId=self.tenant_id)
106
- topic_trigger = self.save_data(trigger_data)
107
- self.trigger_pipeline(trigger_data, topic_trigger)
108
- self.simpleStorageService.delete_object(object_.key)
109
- return STATUS.COMPLETED_TASK
110
- except Exception as e:
111
- logger.error(e, exc_info=True, stack_info=True)
112
- self.move_to_dead_queue(object_.key, payload)
113
- return STATUS.PROCESS_TASK_FAILED
114
- else:
115
- self.move_to_dead_queue(object_.key, payload)
116
- return STATUS.EMPTY_PAYLOAD
117
- finally:
118
- self.ask_unlock(distributed_lock)
119
- else:
120
- return STATUS.DEPENDENCY_FAILED
121
- else:
122
- distributed_lock = get_unique_key_distributed_lock(self.get_resource_lock(object_.key),
123
- self.lock_service)
124
- try:
125
- if not self.ask_lock(distributed_lock):
126
- return STATUS.CREATE_TASK_FAILED
127
- else:
128
- payload = self.simpleStorageService.get_object(object_.key)
129
- self.move_to_dead_queue(object_.key, payload)
130
- return STATUS.CHECK_KEY_FAILED
131
- finally:
132
- self.ask_unlock(distributed_lock)
133
-
134
- def get_payload(self, key: str) -> Dict:
135
- return self.simpleStorageService.get_object(key)
136
-
137
- def ask_lock(self, lock: DistributedLock) -> bool:
138
- return lock.try_lock_nowait()
139
-
140
- def ask_unlock(self, lock: DistributedLock) -> bool:
141
- return lock.unlock()
142
-
143
- def process(self, key: str, code: str, payload: Dict[str, Any] = None):
144
- logger.info("start to process %s and %s", code, key)
145
- trigger_data = PipelineTriggerData(code=code, data=payload, tenantId=self.tenant_id)
146
- result = save_topic_data(trigger_data)
147
- asyncio.run(handle_trigger_data(trigger_data, result))
148
-
149
- def save_data(self, trigger_data: PipelineTriggerData) -> TopicTrigger:
150
- return save_topic_data(trigger_data)
151
-
152
- def trigger_pipeline(self, trigger_data: PipelineTriggerData, topic_trigger: TopicTrigger):
153
- try:
154
- asyncio.run(handle_trigger_data(trigger_data, topic_trigger))
155
- except Exception as e:
156
- logger.error(e, exc_info=True, stack_info=True)
157
-
158
- def get_resource_lock(self, key: str) -> OSSCollectorCompetitiveLock:
159
- object_key = self.get_identifier(self.consume_prefix, key)
160
- key_parts = object_key.split(identifier_delimiter)
161
- return OSSCollectorCompetitiveLock(lockId=self.snowflakeGenerator.next_id(),
162
- resourceId=key,
163
- modelName=key_parts[1],
164
- objectId=key_parts[2],
165
- tenantId=self.tenant_id,
166
- status=0)
167
-
168
- def get_dependency(self, key: str) -> Optional[Dependency]:
169
- key_parts = key.split(identifier_delimiter)
170
- if len(key_parts) == 5:
171
- return Dependency(model_name=key_parts[3], object_id=key_parts[4])
172
- elif len(key_parts) == 3:
173
- return Dependency(model_name=key_parts[1], object_id=key_parts[2])
174
- else:
175
- return None
176
-
177
- def check_dependency_finished(self, dependency: Optional[Dependency]) -> bool:
178
- if dependency:
179
- result = self.lock_service.find_by_dependency(dependency.model_name, dependency.object_id)
180
- if result == 0:
181
- return True
182
- else:
183
- return False
184
- else:
185
- return True
186
-
187
- def get_code(self, identifier: str) -> str:
188
- key_parts = identifier.split(identifier_delimiter)
189
- return 'raw_' + key_parts[1].lower()
190
-
191
- def validate_key_pattern(self, identifier: str) -> bool:
192
- key_parts = identifier.split(identifier_delimiter)
193
- if len(key_parts) == 3:
194
- return True
195
- elif len(key_parts) == 5:
196
- return True
197
- else:
198
- return False
199
-
200
- def move_to_dead_queue(self, key: str, payload: Optional[Dict]):
201
- dead_queue_key = self.generate_dead_file_key(key)
202
- self.simpleStorageService.put_object(dead_queue_key, payload)
203
- self.simpleStorageService.delete_object(key)
204
-
205
- def generate_dead_file_key(self, key_: str):
206
- return self.dead_prefix + self.get_identifier(self.consume_prefix, key_)
207
-
208
- @staticmethod
209
- def get_identifier(prefix, key) -> str:
210
- return key.removeprefix(prefix)
@@ -1,3 +0,0 @@
1
- from .distributed_lock import DistributedLock
2
- from .oss_collector_lock_service import get_oss_collector_lock_service
3
- from .unique_key_distributed_lock import get_unique_key_distributed_lock
@@ -1,23 +0,0 @@
1
- from abc import ABC, abstractmethod
2
-
3
-
4
- class DistributedLock(ABC):
5
-
6
- def __init__(self):
7
- pass
8
-
9
- @abstractmethod
10
- def lock(self):
11
- pass
12
-
13
- @abstractmethod
14
- def try_lock(self, timeout: int):
15
- pass
16
-
17
- @abstractmethod
18
- def try_lock_nowait(self):
19
- pass
20
-
21
- @abstractmethod
22
- def unlock(self):
23
- pass
@@ -1,127 +0,0 @@
1
- from datetime import datetime
2
- from typing import List
3
-
4
- from watchmen_meta.common import EntityService, ask_meta_storage
5
- from watchmen_meta.common.storage_service import StorableId
6
- from watchmen_model.common import Storable, OssCollectorCompetitiveLockId
7
- from watchmen_storage import EntityShaper, EntityRow, EntityName, TransactionalStorageSPI, \
8
- EntityHelper, EntityIdHelper, EntityFinder, ColumnNameLiteral, EntityCriteriaExpression, Entity
9
- from watchmen_collector_kernel.model import OSSCollectorCompetitiveLock
10
-
11
-
12
- class OSSCollectorCompetitiveLockShaper(EntityShaper):
13
- def serialize(self, entity: OSSCollectorCompetitiveLock) -> EntityRow:
14
- return {
15
- 'lock_id': entity.lockId,
16
- 'resource_id': entity.resourceId,
17
- 'model_name': entity.modelName,
18
- 'object_id': entity.objectId,
19
- 'registered_at': entity.registeredAt,
20
- 'tenant_id': entity.tenantId,
21
- 'status': entity.status
22
- }
23
-
24
- def deserialize(self, row: EntityRow) -> OSSCollectorCompetitiveLock:
25
- return OSSCollectorCompetitiveLock(
26
- lockId=row.get('lock_id'),
27
- resourceId=row.get('resource_id'),
28
- modelName=row.get('model_name'),
29
- objectId=row.get('object_id'),
30
- registeredAt=row.get('registered_at'),
31
- tenantId=row.get('tenant_id'),
32
- status=row.get('status')
33
- )
34
-
35
-
36
- OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE = 'oss_collector_competitive_lock'
37
- OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER = OSSCollectorCompetitiveLockShaper()
38
-
39
-
40
- class OssCollectorLockService(EntityService):
41
-
42
- def __init__(self, storage: TransactionalStorageSPI):
43
- super().__init__(storage)
44
-
45
- def get_entity_name(self) -> EntityName:
46
- return OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE
47
-
48
- def get_entity_shaper(self) -> EntityShaper:
49
- return OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER
50
-
51
- def get_storable_id_column_name(self) -> EntityName:
52
- return 'lock_id'
53
-
54
- def get_storable_id(self, storable: OSSCollectorCompetitiveLock) -> StorableId:
55
- return storable.lockId
56
-
57
- def set_storable_id(self, storable: OSSCollectorCompetitiveLock,
58
- storable_id: OssCollectorCompetitiveLockId) -> Storable:
59
- storable.lockId = storable_id
60
- return storable
61
-
62
- def insert_one(self, lock: OSSCollectorCompetitiveLock):
63
- try:
64
- self.storage.connect()
65
- self.storage.insert_one(
66
- lock,
67
- EntityHelper(name=OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE, shaper=OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER)
68
- )
69
- finally:
70
- self.storage.close()
71
-
72
- def delete_by_id(self, id_: OssCollectorCompetitiveLockId):
73
- try:
74
- self.storage.connect()
75
- self.storage.delete_by_id(id_,
76
- EntityIdHelper(idColumnName='lock_id',
77
- name=OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE,
78
- shaper=OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER)
79
- )
80
- finally:
81
- self.storage.close()
82
-
83
- def update_one(self, one: Entity) -> int:
84
- try:
85
- self.storage.connect()
86
- self.storage.update_one(one,
87
- EntityIdHelper(idColumnName='lock_id',
88
- name=OSS_COLLECTOR_COMPETITIVE_LOCK_TABLE,
89
- shaper=OSS_COLLECTOR_COMPETITIVE_LOCK_ENTITY_SHAPER)
90
- )
91
- finally:
92
- self.storage.close()
93
-
94
- def find_by_dependency(self, model_name: str, object_id: str) -> int:
95
- try:
96
- self.storage.connect()
97
- return self.storage.count(EntityFinder(
98
- name=self.get_entity_name(),
99
- shaper=self.get_entity_shaper(),
100
- criteria=[
101
- EntityCriteriaExpression(left=ColumnNameLiteral(columnName='model_name'), right=model_name),
102
- EntityCriteriaExpression(left=ColumnNameLiteral(columnName='object_id'), right=object_id),
103
- EntityCriteriaExpression(left=ColumnNameLiteral(columnName='status'), right=0)
104
- ]
105
- ))
106
- finally:
107
- self.storage.close()
108
-
109
- def find_completed_task(self, query_date: datetime) -> List:
110
- try:
111
- self.storage.connect()
112
- return self.storage.find(EntityFinder(
113
- name=self.get_entity_name(),
114
- shaper=self.get_entity_shaper(),
115
- criteria=[
116
- EntityCriteriaExpression(left=ColumnNameLiteral(columnName='registered_at'),
117
- operator="less-than",
118
- right=query_date),
119
- EntityCriteriaExpression(left=ColumnNameLiteral(columnName='status'), right=1)
120
- ]
121
- ))
122
- finally:
123
- self.storage.close()
124
-
125
-
126
- def get_oss_collector_lock_service() -> OssCollectorLockService:
127
- return OssCollectorLockService(ask_meta_storage())