splight-lib 5.4.3__tar.gz → 5.5.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/PKG-INFO +1 -1
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/pyproject.toml +1 -1
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/__init__.py +2 -2
- splight_lib-5.5.0.dev1/splight_lib/client/datalake/builder.py +26 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/remote_client.py +131 -21
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/tests/test_datalake.py +3 -3
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/base.py +4 -9
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/native.py +10 -7
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/pipeline.py +1 -1
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/settings.py +11 -2
- splight_lib-5.4.3/splight_lib/client/datalake/builder.py +0 -24
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/LICENSE.txt +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/README.md +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/abstract/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/abstract/client.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/auth/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/auth/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/auth/mac_auth.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/auth/token.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/database/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/database/abstract.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/database/builder.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/database/classmap.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/database/local_client.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/database/remote_client.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/abstract.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/buffer.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/local_client.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/datalake/schemas.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/file_handler.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/filter.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/hub/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/hub/abstract.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/hub/client.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/client/tests/test_database.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/component/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/component/abstract.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/component/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/component/spec.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/component/tests/test_abstract.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/component/tests/test_spec.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/constants.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/encryption.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/engine.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/scheduling.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/task.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/tests/test_execution.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/tests/test_scheduling.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/execution/trigger.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/logging/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/logging/_internal.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/logging/component.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/logging/constants.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/logging/logging.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/logging/tests/test_logging.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/alert.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/asset.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/attribute.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/component.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/dashboard.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/data_address.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/file.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/function.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/generic.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/hub.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/metadata.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/secret.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/tests/models.json +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/tests/test_component_object_instance.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/tests/test_database_model.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/tests/test_metadata.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/tests/test_models.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/restclient/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/restclient/client.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/restclient/exceptions.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/restclient/tests/test_restclient.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/restclient/types.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/stringcase.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/testing/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/tests/FakeProc.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/tests/asset_geometries.json +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/tests/test_api_contracts.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/tests/test_encryption.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/utils/__init__.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/utils/custom_model.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/utils/hub.py +0 -0
- {splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/version.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from splight_lib.client.datalake.builder import DatalakeClientBuilder
|
|
2
2
|
from splight_lib.client.datalake.local_client import LocalDatalakeClient
|
|
3
|
-
from splight_lib.client.datalake.remote_client import
|
|
3
|
+
from splight_lib.client.datalake.remote_client import SyncRemoteDatalakeClient
|
|
4
4
|
|
|
5
5
|
__all__ = [
|
|
6
6
|
DatalakeClientBuilder,
|
|
7
|
-
|
|
7
|
+
SyncRemoteDatalakeClient,
|
|
8
8
|
LocalDatalakeClient,
|
|
9
9
|
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from splight_lib.client.datalake.abstract import AbstractDatalakeClient
|
|
4
|
+
from splight_lib.client.datalake.local_client import LocalDatalakeClient
|
|
5
|
+
from splight_lib.client.datalake.remote_client import (
|
|
6
|
+
BufferedAsyncRemoteDatalakeClient,
|
|
7
|
+
BufferedSyncRemoteDataClient,
|
|
8
|
+
SyncRemoteDatalakeClient,
|
|
9
|
+
)
|
|
10
|
+
from splight_lib.settings import DatalakeClientType
|
|
11
|
+
|
|
12
|
+
DL_CLIENT_TYPE_MAP = {
|
|
13
|
+
DatalakeClientType.BUFFERED_ASYNC: BufferedAsyncRemoteDatalakeClient,
|
|
14
|
+
DatalakeClientType.BUFFERED_SYNC: BufferedSyncRemoteDataClient,
|
|
15
|
+
DatalakeClientType.SYNC: SyncRemoteDatalakeClient,
|
|
16
|
+
DatalakeClientType.LOCAL: LocalDatalakeClient,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DatalakeClientBuilder:
|
|
21
|
+
@staticmethod
|
|
22
|
+
def build(
|
|
23
|
+
dl_client_type: DatalakeClientType = DatalakeClientType.BUFFERED_ASYNC,
|
|
24
|
+
parameters: Dict[str, Any] = {},
|
|
25
|
+
) -> AbstractDatalakeClient:
|
|
26
|
+
return DL_CLIENT_TYPE_MAP[dl_client_type](**parameters)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from datetime import datetime
|
|
1
2
|
from threading import Lock, Thread
|
|
2
3
|
from time import sleep
|
|
3
|
-
from typing import Dict, List, Union
|
|
4
|
+
from typing import Dict, List, Optional, Union
|
|
4
5
|
|
|
5
6
|
import pandas as pd
|
|
6
7
|
from furl import furl
|
|
@@ -25,7 +26,7 @@ from splight_lib.stringcase import camelcase
|
|
|
25
26
|
logger = get_splight_logger()
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
class
|
|
29
|
+
class SyncRemoteDatalakeClient(AbstractDatalakeClient):
|
|
29
30
|
_PREFIX = "/data"
|
|
30
31
|
|
|
31
32
|
def __init__(
|
|
@@ -85,12 +86,18 @@ class RemoteDatalakeClient(AbstractDatalakeClient):
|
|
|
85
86
|
collection: str = DEFAULT_COLLECTION,
|
|
86
87
|
sort_field: str = DEFAULT_SORT_FIELD,
|
|
87
88
|
sort_direction: int = -1,
|
|
89
|
+
from_timestamp: Optional[datetime] = None,
|
|
90
|
+
to_timestamp: Optional[datetime] = None,
|
|
91
|
+
extra_pipeline: List[Dict] = [],
|
|
92
|
+
aggregation_query: Dict = {},
|
|
88
93
|
limit: int = 10000,
|
|
89
|
-
**filters,
|
|
90
94
|
) -> List[Dict]:
|
|
91
95
|
# POST /data/read
|
|
92
96
|
url = self._base_url / f"{self._PREFIX}/read"
|
|
97
|
+
pipeline = [{"$match": match}, *extra_pipeline]
|
|
93
98
|
data_request = DataRequest(
|
|
99
|
+
from_timestmap=from_timestamp,
|
|
100
|
+
to_timestamp=to_timestamp,
|
|
94
101
|
collection=collection,
|
|
95
102
|
sort_field=sort_field,
|
|
96
103
|
sort_direction=sort_direction,
|
|
@@ -99,11 +106,11 @@ class RemoteDatalakeClient(AbstractDatalakeClient):
|
|
|
99
106
|
{
|
|
100
107
|
"ref_id": "output",
|
|
101
108
|
"type": "QUERY",
|
|
102
|
-
"pipeline":
|
|
109
|
+
"pipeline": pipeline,
|
|
103
110
|
"expression": None,
|
|
104
111
|
}
|
|
105
112
|
],
|
|
106
|
-
|
|
113
|
+
aggregation_query=aggregation_query,
|
|
107
114
|
)
|
|
108
115
|
response = self._restclient.post(url, json=data_request.dict())
|
|
109
116
|
response.raise_for_status()
|
|
@@ -116,12 +123,18 @@ class RemoteDatalakeClient(AbstractDatalakeClient):
|
|
|
116
123
|
collection: str = DEFAULT_COLLECTION,
|
|
117
124
|
sort_field: str = DEFAULT_SORT_FIELD,
|
|
118
125
|
sort_direction: int = -1,
|
|
126
|
+
from_timestamp: Optional[datetime] = None,
|
|
127
|
+
to_timestamp: Optional[datetime] = None,
|
|
128
|
+
extra_pipeline: List[Dict] = [],
|
|
129
|
+
aggregation_query: Dict = {},
|
|
119
130
|
limit: int = 10000,
|
|
120
|
-
**filters,
|
|
121
131
|
) -> List[Dict]:
|
|
122
132
|
# POST /data/read
|
|
123
133
|
url = self._base_url / f"{self._PREFIX}/read"
|
|
134
|
+
pipeline = [{"$match": match}, *extra_pipeline]
|
|
124
135
|
data_request = DataRequest(
|
|
136
|
+
from_timestmap=from_timestamp,
|
|
137
|
+
to_timestamp=to_timestamp,
|
|
125
138
|
collection=collection,
|
|
126
139
|
sort_field=sort_field,
|
|
127
140
|
sort_direction=sort_direction,
|
|
@@ -130,15 +143,11 @@ class RemoteDatalakeClient(AbstractDatalakeClient):
|
|
|
130
143
|
{
|
|
131
144
|
"ref_id": "output",
|
|
132
145
|
"type": "QUERY",
|
|
133
|
-
"pipeline":
|
|
134
|
-
{
|
|
135
|
-
"$match": match,
|
|
136
|
-
}
|
|
137
|
-
],
|
|
146
|
+
"pipeline": pipeline,
|
|
138
147
|
"expression": None,
|
|
139
148
|
}
|
|
140
149
|
],
|
|
141
|
-
|
|
150
|
+
aggregation_query=aggregation_query,
|
|
142
151
|
)
|
|
143
152
|
response = await self._restclient.async_post(
|
|
144
153
|
url, json=data_request.dict()
|
|
@@ -161,11 +170,23 @@ class RemoteDatalakeClient(AbstractDatalakeClient):
|
|
|
161
170
|
collection: str = DEFAULT_COLLECTION,
|
|
162
171
|
sort_field: str = DEFAULT_SORT_FIELD,
|
|
163
172
|
sort_direction: int = -1,
|
|
173
|
+
from_timestamp: Optional[datetime] = None,
|
|
174
|
+
to_timestamp: Optional[datetime] = None,
|
|
175
|
+
extra_pipeline: List[Dict] = [],
|
|
176
|
+
aggregation_query: Dict = {},
|
|
164
177
|
limit: int = 10000,
|
|
165
|
-
**
|
|
178
|
+
**kwargs,
|
|
166
179
|
) -> pd.DataFrame:
|
|
167
180
|
data = self._get(
|
|
168
|
-
match,
|
|
181
|
+
match,
|
|
182
|
+
collection,
|
|
183
|
+
sort_field,
|
|
184
|
+
sort_direction,
|
|
185
|
+
from_timestamp,
|
|
186
|
+
to_timestamp,
|
|
187
|
+
extra_pipeline,
|
|
188
|
+
aggregation_query,
|
|
189
|
+
limit,
|
|
169
190
|
)
|
|
170
191
|
df = pd.DataFrame(data)
|
|
171
192
|
if not df.empty:
|
|
@@ -177,7 +198,7 @@ class RemoteDatalakeClient(AbstractDatalakeClient):
|
|
|
177
198
|
return df
|
|
178
199
|
|
|
179
200
|
|
|
180
|
-
class
|
|
201
|
+
class BufferedAsyncRemoteDatalakeClient(SyncRemoteDatalakeClient):
|
|
181
202
|
_PREFIX = "data"
|
|
182
203
|
|
|
183
204
|
def __init__(
|
|
@@ -218,9 +239,9 @@ class BufferedRemoteDatalakeClient(RemoteDatalakeClient):
|
|
|
218
239
|
buffer_size, buffer_timeout
|
|
219
240
|
),
|
|
220
241
|
}
|
|
242
|
+
self._lock = Lock()
|
|
221
243
|
self._flush_thread = Thread(target=self._flusher, daemon=True)
|
|
222
244
|
self._flush_thread.start()
|
|
223
|
-
self._lock = Lock()
|
|
224
245
|
logger.debug(
|
|
225
246
|
"Buffered Remote datalake client initialized.",
|
|
226
247
|
tags=LogTags.DATALAKE,
|
|
@@ -256,22 +277,111 @@ class BufferedRemoteDatalakeClient(RemoteDatalakeClient):
|
|
|
256
277
|
def _flush_buffer(
|
|
257
278
|
self, collection: str, buffer: DatalakeDocumentBuffer
|
|
258
279
|
) -> None:
|
|
259
|
-
|
|
260
|
-
|
|
280
|
+
with self._lock:
|
|
281
|
+
if buffer.should_flush():
|
|
282
|
+
try:
|
|
283
|
+
logger.debug(
|
|
284
|
+
"Flushing datalake buffer with %s elements",
|
|
285
|
+
len(buffer.data),
|
|
286
|
+
)
|
|
287
|
+
self._send_documents(collection, buffer.data)
|
|
288
|
+
buffer.reset()
|
|
289
|
+
except Exception:
|
|
290
|
+
logger.error("Unable to save documents", exc_info=True)
|
|
291
|
+
|
|
292
|
+
@retry(SPLIGHT_REQUEST_EXCEPTIONS, tries=3, delay=2, jitter=1)
|
|
293
|
+
def _send_documents(self, collection: str, docs: List[Dict]) -> List[Dict]:
|
|
294
|
+
url = self._base_url / f"{self._PREFIX}/write"
|
|
295
|
+
collection = camelcase(collection)
|
|
296
|
+
data = {
|
|
297
|
+
"collection": collection,
|
|
298
|
+
"records": docs,
|
|
299
|
+
}
|
|
300
|
+
response = self._restclient.post(url, json=data)
|
|
301
|
+
response.raise_for_status()
|
|
302
|
+
return docs
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
class BufferedSyncRemoteDataClient(SyncRemoteDatalakeClient):
|
|
306
|
+
_PREFIX = "data"
|
|
307
|
+
|
|
308
|
+
def __init__(
|
|
309
|
+
self,
|
|
310
|
+
base_url: str,
|
|
311
|
+
access_id: str,
|
|
312
|
+
secret_key: str,
|
|
313
|
+
buffer_size: int = DL_BUFFER_SIZE,
|
|
314
|
+
buffer_timeout: float = DL_BUFFER_TIMEOUT,
|
|
315
|
+
*args,
|
|
316
|
+
**kwargs,
|
|
317
|
+
):
|
|
318
|
+
super().__init__(
|
|
319
|
+
base_url,
|
|
320
|
+
access_id,
|
|
321
|
+
secret_key,
|
|
322
|
+
buffer_size,
|
|
323
|
+
buffer_timeout,
|
|
324
|
+
*args,
|
|
325
|
+
**kwargs,
|
|
326
|
+
)
|
|
327
|
+
self._base_url = furl(base_url)
|
|
328
|
+
token = SplightAuthToken(
|
|
329
|
+
access_key=access_id,
|
|
330
|
+
secret_key=secret_key,
|
|
331
|
+
)
|
|
332
|
+
self._restclient = SplightRestClient()
|
|
333
|
+
self._restclient.update_headers(token.header)
|
|
334
|
+
|
|
335
|
+
logger.debug(
|
|
336
|
+
"Initializing buffer with size %s and timeout %s",
|
|
337
|
+
buffer_size,
|
|
338
|
+
buffer_timeout,
|
|
339
|
+
)
|
|
340
|
+
self._data_buffers = {
|
|
341
|
+
"default": DatalakeDocumentBuffer(buffer_size, buffer_timeout),
|
|
342
|
+
"routine_evaluations": DatalakeDocumentBuffer(
|
|
343
|
+
buffer_size, buffer_timeout
|
|
344
|
+
),
|
|
345
|
+
}
|
|
346
|
+
self._lock = Lock()
|
|
347
|
+
logger.debug(
|
|
348
|
+
"Synchronous Buffered Remote datalake client initialized.",
|
|
349
|
+
tags=LogTags.DATALAKE,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
def save(
|
|
353
|
+
self, collection: str, instances: Union[List[Dict], Dict]
|
|
354
|
+
) -> List[Dict]:
|
|
355
|
+
logger.debug("Saving documents in datalake")
|
|
356
|
+
instances = instances if isinstance(instances, List) else [instances]
|
|
357
|
+
if collection not in self._data_buffers:
|
|
358
|
+
raise InvalidCollectionName(collection)
|
|
359
|
+
buffer = self._data_buffers[collection]
|
|
360
|
+
with self._lock:
|
|
361
|
+
if buffer.should_flush():
|
|
261
362
|
logger.debug(
|
|
262
363
|
"Flushing datalake buffer with %s elements",
|
|
263
364
|
len(buffer.data),
|
|
264
365
|
)
|
|
265
366
|
self._send_documents(collection, buffer.data)
|
|
266
367
|
buffer.reset()
|
|
267
|
-
|
|
268
|
-
|
|
368
|
+
buffer.add_documents(instances)
|
|
369
|
+
return instances
|
|
370
|
+
|
|
371
|
+
def save_dataframe(
|
|
372
|
+
self, dataframe: pd.DataFrame, collection: str = DEFAULT_COLLECTION
|
|
373
|
+
) -> None:
|
|
374
|
+
logger.debug("Saving dataframe in datalake")
|
|
375
|
+
dataframe["timestamp"] = dataframe["timestamp"].apply(
|
|
376
|
+
lambda x: x.tz_localize(tz="UTC").isoformat()
|
|
377
|
+
)
|
|
378
|
+
instances = dataframe.to_dict("records")
|
|
379
|
+
self.save(collection, instances)
|
|
269
380
|
|
|
270
381
|
@retry(SPLIGHT_REQUEST_EXCEPTIONS, tries=3, delay=2, jitter=1)
|
|
271
382
|
def _send_documents(self, collection: str, docs: List[Dict]) -> List[Dict]:
|
|
272
383
|
url = self._base_url / f"{self._PREFIX}/write"
|
|
273
384
|
collection = camelcase(collection)
|
|
274
|
-
collection = camelcase(collection)
|
|
275
385
|
data = {
|
|
276
386
|
"collection": collection,
|
|
277
387
|
"records": docs,
|
|
@@ -2,7 +2,7 @@ import os
|
|
|
2
2
|
|
|
3
3
|
from pytest_mock import MockerFixture
|
|
4
4
|
|
|
5
|
-
from splight_lib.client.datalake import
|
|
5
|
+
from splight_lib.client.datalake import SyncRemoteDatalakeClient # noqa E402
|
|
6
6
|
from splight_lib.client.datalake.remote_client import ( # noqa E402
|
|
7
7
|
SplightRestClient,
|
|
8
8
|
)
|
|
@@ -34,7 +34,7 @@ def test_initialization(mocker: MockerFixture):
|
|
|
34
34
|
"update_headers",
|
|
35
35
|
return_value=None,
|
|
36
36
|
)
|
|
37
|
-
_ =
|
|
37
|
+
_ = SyncRemoteDatalakeClient(
|
|
38
38
|
base_url=base_url,
|
|
39
39
|
access_id=access_id,
|
|
40
40
|
secret_key=secret_key,
|
|
@@ -49,7 +49,7 @@ def test_save(mocker: MockerFixture):
|
|
|
49
49
|
secret_key = os.getenv("SECRET_KEY")
|
|
50
50
|
access_id = os.getenv("ACCESS_ID")
|
|
51
51
|
|
|
52
|
-
client =
|
|
52
|
+
client = SyncRemoteDatalakeClient(
|
|
53
53
|
base_url=base_url,
|
|
54
54
|
access_id=access_id,
|
|
55
55
|
secret_key=secret_key,
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from datetime import datetime, timezone
|
|
3
|
-
from typing import ClassVar, Dict, List
|
|
3
|
+
from typing import ClassVar, Dict, List
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
|
-
from pydantic import BaseModel, Field, PrivateAttr
|
|
6
|
+
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
|
7
7
|
|
|
8
8
|
from splight_lib.client.database import DatabaseClientBuilder
|
|
9
9
|
from splight_lib.client.database.abstract import AbstractDatabaseClient
|
|
@@ -75,14 +75,10 @@ class SplightDatalakeBaseModel(BaseModel):
|
|
|
75
75
|
timestamp: datetime = Field(
|
|
76
76
|
default_factory=lambda: datetime.now(timezone.utc)
|
|
77
77
|
)
|
|
78
|
-
instance_id: str = ""
|
|
79
|
-
instance_type: str = ""
|
|
80
78
|
_collection_name: ClassVar[str] = "DatalakeModel"
|
|
81
79
|
_dl_client: AbstractDatalakeClient = PrivateAttr()
|
|
82
80
|
|
|
83
|
-
|
|
84
|
-
def convert_to_string(cls, value: Optional[str]):
|
|
85
|
-
return value if value else ""
|
|
81
|
+
model_config = ConfigDict(extra="ignore")
|
|
86
82
|
|
|
87
83
|
@classmethod
|
|
88
84
|
def get(
|
|
@@ -174,8 +170,7 @@ class SplightDatalakeBaseModel(BaseModel):
|
|
|
174
170
|
@staticmethod
|
|
175
171
|
def __get_datalake_client() -> AbstractDatalakeClient:
|
|
176
172
|
db_client = DatalakeClientBuilder.build(
|
|
177
|
-
|
|
178
|
-
use_buffer=settings.USE_BUFFER,
|
|
173
|
+
dl_client_type=settings.DL_CLIENT_TYPE,
|
|
179
174
|
parameters={
|
|
180
175
|
"path": settings.CURRENT_DIR,
|
|
181
176
|
"base_url": settings.SPLIGHT_PLATFORM_API_HOST,
|
|
@@ -23,21 +23,24 @@ class NativeOutput(SplightDatalakeBaseModel):
|
|
|
23
23
|
def get(
|
|
24
24
|
cls, asset: str, attribute: str, **params: Dict
|
|
25
25
|
) -> List["NativeOutput"]:
|
|
26
|
-
params["output_format"] = cls._output_format
|
|
27
26
|
return super().get(asset, attribute, **params)
|
|
28
27
|
|
|
29
28
|
@classmethod
|
|
30
|
-
async def async_get(
|
|
31
|
-
params
|
|
32
|
-
|
|
29
|
+
async def async_get(
|
|
30
|
+
cls, asset: str, attribute: str, **params: Dict
|
|
31
|
+
) -> List["NativeOutput"]:
|
|
32
|
+
return await super().async_get(asset, attribute, **params)
|
|
33
33
|
|
|
34
34
|
@classmethod
|
|
35
35
|
def get_dataframe(cls, asset: str, attribute: str, **params: Dict):
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
df = super().get_dataframe(asset, attribute, **params)
|
|
37
|
+
df["output_format"] = cls._output_format
|
|
38
|
+
return df
|
|
38
39
|
|
|
39
40
|
@classmethod
|
|
40
|
-
def latest(
|
|
41
|
+
def latest(
|
|
42
|
+
cls, asset: str, attribute: str, expiration: Optional[timedelta] = None
|
|
43
|
+
):
|
|
41
44
|
from_timestamp = None
|
|
42
45
|
if expiration:
|
|
43
46
|
from_timestamp = datetime.now(timezone.utc) - expiration
|
|
@@ -12,7 +12,7 @@ from splight_lib.stringcase import camelcase
|
|
|
12
12
|
|
|
13
13
|
def get_datalake_client() -> AbstractDatalakeClient:
|
|
14
14
|
client = DatalakeClientBuilder.build(
|
|
15
|
-
|
|
15
|
+
dl_client_type=settings.DL_CLIENT_TYPE,
|
|
16
16
|
parameters={
|
|
17
17
|
"path": settings.CURRENT_DIR,
|
|
18
18
|
"base_url": settings.SPLIGHT_PLATFORM_API_HOST,
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from enum import Enum
|
|
2
3
|
from typing import Any, Dict, Optional, Tuple, Type
|
|
3
4
|
|
|
4
5
|
import yaml
|
|
@@ -11,6 +12,13 @@ from splight_lib.constants import DL_BUFFER_SIZE, DL_BUFFER_TIMEOUT
|
|
|
11
12
|
SPLIGHT_HOME = os.path.join(os.path.expanduser("~"), ".splight")
|
|
12
13
|
|
|
13
14
|
|
|
15
|
+
class DatalakeClientType(str, Enum):
|
|
16
|
+
LOCAL = "local"
|
|
17
|
+
SYNC = "sync"
|
|
18
|
+
BUFFERED_SYNC = "buffered_sync"
|
|
19
|
+
BUFFERED_ASYNC = "buffered_async"
|
|
20
|
+
|
|
21
|
+
|
|
14
22
|
class Singleton:
|
|
15
23
|
def __new__(cls, *args, **kw):
|
|
16
24
|
if not hasattr(cls, "_instance"):
|
|
@@ -58,12 +66,13 @@ class SplightSettings(BaseSettings, Singleton):
|
|
|
58
66
|
SPLIGHT_PLATFORM_API_HOST: str = "https://api.splight-ai.com"
|
|
59
67
|
|
|
60
68
|
# Parameters for local environment
|
|
69
|
+
# TODO: to deprecate this and its effects
|
|
61
70
|
LOCAL_ENVIRONMENT: bool = False
|
|
62
71
|
CURRENT_DIR: Optional[str] = None
|
|
63
72
|
|
|
64
|
-
# Parameters for
|
|
73
|
+
# Parameters for the datalake client
|
|
65
74
|
# Review if is better to use another class for only the DL Client settings
|
|
66
|
-
|
|
75
|
+
DL_CLIENT_TYPE: DatalakeClientType = DatalakeClientType.BUFFERED_ASYNC
|
|
67
76
|
DL_BUFFER_SIZE: int = DL_BUFFER_SIZE
|
|
68
77
|
DL_BUFFER_TIMEOUT: float = DL_BUFFER_TIMEOUT # seconds
|
|
69
78
|
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from typing import Any, Dict
|
|
2
|
-
|
|
3
|
-
from splight_lib.client.datalake.abstract import AbstractDatalakeClient
|
|
4
|
-
from splight_lib.client.datalake.local_client import LocalDatalakeClient
|
|
5
|
-
from splight_lib.client.datalake.remote_client import (
|
|
6
|
-
BufferedRemoteDatalakeClient,
|
|
7
|
-
RemoteDatalakeClient,
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class DatalakeClientBuilder:
|
|
12
|
-
@staticmethod
|
|
13
|
-
def build(
|
|
14
|
-
local: bool = False,
|
|
15
|
-
use_buffer: bool = True,
|
|
16
|
-
parameters: Dict[str, Any] = {},
|
|
17
|
-
) -> AbstractDatalakeClient:
|
|
18
|
-
if local:
|
|
19
|
-
dl_client = LocalDatalakeClient(**parameters)
|
|
20
|
-
elif use_buffer:
|
|
21
|
-
dl_client = BufferedRemoteDatalakeClient(**parameters)
|
|
22
|
-
else:
|
|
23
|
-
dl_client = RemoteDatalakeClient(**parameters)
|
|
24
|
-
return dl_client
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/models/tests/test_database_model.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{splight_lib-5.4.3 → splight_lib-5.5.0.dev1}/splight_lib/restclient/tests/test_restclient.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|