tilebox-datasets 0.46.0__py3-none-any.whl → 0.48.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tilebox/datasets/aio/client.py +16 -10
- tilebox/datasets/client.py +23 -4
- tilebox/datasets/message_pool.py +2 -1
- tilebox/datasets/progress.py +10 -3
- tilebox/datasets/protobuf_conversion/field_types.py +32 -12
- tilebox/datasets/protobuf_conversion/protobuf_xarray.py +9 -9
- tilebox/datasets/protobuf_conversion/to_protobuf.py +9 -8
- tilebox/datasets/query/id_interval.py +9 -11
- tilebox/datasets/query/time_interval.py +3 -1
- tilebox/datasets/service.py +53 -6
- tilebox/datasets/sync/client.py +16 -11
- {tilebox_datasets-0.46.0.dist-info → tilebox_datasets-0.48.0.dist-info}/METADATA +1 -1
- {tilebox_datasets-0.46.0.dist-info → tilebox_datasets-0.48.0.dist-info}/RECORD +14 -14
- {tilebox_datasets-0.46.0.dist-info → tilebox_datasets-0.48.0.dist-info}/WHEEL +0 -0
tilebox/datasets/aio/client.py
CHANGED
|
@@ -2,6 +2,7 @@ from uuid import UUID
|
|
|
2
2
|
|
|
3
3
|
from _tilebox.grpc.aio.channel import open_channel
|
|
4
4
|
from _tilebox.grpc.aio.error import with_pythonic_errors
|
|
5
|
+
from _tilebox.grpc.error import NotFoundError
|
|
5
6
|
from tilebox.datasets.aio.dataset import DatasetClient
|
|
6
7
|
from tilebox.datasets.client import Client as BaseClient
|
|
7
8
|
from tilebox.datasets.client import token_from_env
|
|
@@ -33,33 +34,38 @@ class Client:
|
|
|
33
34
|
)
|
|
34
35
|
self._client = BaseClient(service)
|
|
35
36
|
|
|
36
|
-
async def
|
|
37
|
+
async def create_or_update_dataset(
|
|
37
38
|
self,
|
|
38
39
|
kind: DatasetKind,
|
|
39
40
|
code_name: str,
|
|
40
|
-
fields: list[FieldDict],
|
|
41
|
+
fields: list[FieldDict] | None = None,
|
|
41
42
|
*,
|
|
42
43
|
name: str | None = None,
|
|
43
|
-
description: str | None = None,
|
|
44
44
|
) -> DatasetClient:
|
|
45
45
|
"""Create a new dataset.
|
|
46
46
|
|
|
47
47
|
Args:
|
|
48
48
|
kind: The kind of the dataset.
|
|
49
49
|
code_name: The code name of the dataset.
|
|
50
|
-
fields: The fields of the dataset.
|
|
50
|
+
fields: The custom fields of the dataset.
|
|
51
51
|
name: The name of the dataset. Defaults to the code name.
|
|
52
|
-
description: A short description of the dataset. Optional.
|
|
53
52
|
|
|
54
53
|
Returns:
|
|
55
54
|
The created dataset.
|
|
56
55
|
"""
|
|
57
|
-
if name is None:
|
|
58
|
-
name = code_name
|
|
59
|
-
if description is None:
|
|
60
|
-
description = ""
|
|
61
56
|
|
|
62
|
-
|
|
57
|
+
try:
|
|
58
|
+
dataset = await self.dataset(code_name)
|
|
59
|
+
except NotFoundError:
|
|
60
|
+
return await self._client.create_dataset(kind, code_name, fields or [], name or code_name, DatasetClient)
|
|
61
|
+
|
|
62
|
+
return await self._client.update_dataset(
|
|
63
|
+
kind,
|
|
64
|
+
dataset._dataset.id, # noqa: SLF001
|
|
65
|
+
fields or [],
|
|
66
|
+
name or dataset._dataset.name, # noqa: SLF001
|
|
67
|
+
DatasetClient,
|
|
68
|
+
)
|
|
63
69
|
|
|
64
70
|
async def datasets(self) -> Group:
|
|
65
71
|
"""Fetch all available datasets."""
|
tilebox/datasets/client.py
CHANGED
|
@@ -26,13 +26,32 @@ class Client:
|
|
|
26
26
|
def __init__(self, service: TileboxDatasetService) -> None:
|
|
27
27
|
self._service = service
|
|
28
28
|
|
|
29
|
-
def create_dataset(
|
|
30
|
-
self,
|
|
29
|
+
def create_dataset(
|
|
30
|
+
self,
|
|
31
|
+
kind: DatasetKind,
|
|
32
|
+
code_name: str,
|
|
33
|
+
fields: list[FieldDict] | None,
|
|
34
|
+
name: str | None,
|
|
35
|
+
py_dataset_class: type[T],
|
|
31
36
|
) -> Promise[T]:
|
|
32
37
|
return (
|
|
33
|
-
self._service.create_dataset(kind, code_name,
|
|
38
|
+
self._service.create_dataset(kind, code_name, name or code_name, fields or [])
|
|
34
39
|
.then(_ensure_registered)
|
|
35
|
-
.then(lambda dataset:
|
|
40
|
+
.then(lambda dataset: py_dataset_class(self._service, dataset))
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def update_dataset(
|
|
44
|
+
self,
|
|
45
|
+
kind: DatasetKind,
|
|
46
|
+
dataset_id: UUID,
|
|
47
|
+
fields: list[FieldDict] | None,
|
|
48
|
+
name: str | None,
|
|
49
|
+
py_dataset_class: type[T],
|
|
50
|
+
) -> Promise[T]:
|
|
51
|
+
return (
|
|
52
|
+
self._service.update_dataset(kind, dataset_id, name, fields or [])
|
|
53
|
+
.then(_ensure_registered)
|
|
54
|
+
.then(lambda dataset: py_dataset_class(self._service, dataset))
|
|
36
55
|
)
|
|
37
56
|
|
|
38
57
|
def datasets(self, dataset_type: type[T]) -> Promise[Group]:
|
tilebox/datasets/message_pool.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from google.protobuf import descriptor_pb2, duration_pb2, timestamp_pb2
|
|
2
2
|
from google.protobuf.descriptor_pool import Default
|
|
3
|
+
from google.protobuf.message import Message
|
|
3
4
|
from google.protobuf.message_factory import GetMessageClass, GetMessages
|
|
4
5
|
|
|
5
6
|
from tilebox.datasets.data.datasets import AnnotatedType
|
|
@@ -25,5 +26,5 @@ def register_message_types(descriptor_set: descriptor_pb2.FileDescriptorSet) ->
|
|
|
25
26
|
GetMessages(descriptor_set.file, pool=Default())
|
|
26
27
|
|
|
27
28
|
|
|
28
|
-
def get_message_type(type_url: str) -> type:
|
|
29
|
+
def get_message_type(type_url: str) -> type[Message]:
|
|
29
30
|
return GetMessageClass(Default().FindMessageTypeByName(type_url))
|
tilebox/datasets/progress.py
CHANGED
|
@@ -3,6 +3,11 @@ from datetime import datetime
|
|
|
3
3
|
from types import TracebackType
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
|
+
try:
|
|
7
|
+
from typing import Self # ty: ignore[unresolved-import]
|
|
8
|
+
except ImportError: # Self is only available in Python 3.11+
|
|
9
|
+
from typing_extensions import Self
|
|
10
|
+
|
|
6
11
|
from tqdm.auto import tqdm
|
|
7
12
|
|
|
8
13
|
from tilebox.datasets.query.time_interval import TimeInterval
|
|
@@ -42,7 +47,7 @@ class TimeIntervalProgressBar:
|
|
|
42
47
|
self._actual_start_time = actual_start_time
|
|
43
48
|
self._total_data_points = 0
|
|
44
49
|
|
|
45
|
-
def __enter__(self) ->
|
|
50
|
+
def __enter__(self) -> Self:
|
|
46
51
|
self._progress_bar = tqdm(
|
|
47
52
|
bar_format="{l_bar}{bar}[{elapsed}<{remaining}{postfix}]",
|
|
48
53
|
total=self._calc_progress_seconds(self._interval.end),
|
|
@@ -62,7 +67,8 @@ class TimeIntervalProgressBar:
|
|
|
62
67
|
|
|
63
68
|
def set_progress(self, time: datetime) -> None:
|
|
64
69
|
"""Set the progress of the progress bar to the given time"""
|
|
65
|
-
|
|
70
|
+
total = self._calc_progress_seconds(self._interval.end)
|
|
71
|
+
done = min(self._calc_progress_seconds(time), total)
|
|
66
72
|
self._progress_bar.update(done - self._progress_bar.n)
|
|
67
73
|
|
|
68
74
|
def set_download_info(self, datapoints: int, byte_size: int, download_time: float) -> None:
|
|
@@ -79,7 +85,8 @@ class TimeIntervalProgressBar:
|
|
|
79
85
|
) -> None:
|
|
80
86
|
try:
|
|
81
87
|
if traceback is None:
|
|
82
|
-
self.
|
|
88
|
+
total = self._calc_progress_seconds(self._interval.end)
|
|
89
|
+
self._progress_bar.update(total - self._progress_bar.n) # set to 100%
|
|
83
90
|
|
|
84
91
|
self._progress_bar.close() # mark as completed or failed
|
|
85
92
|
except AttributeError:
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
from collections.abc import
|
|
1
|
+
from collections.abc import Sequence
|
|
2
2
|
from datetime import timedelta
|
|
3
3
|
from typing import Any
|
|
4
4
|
from uuid import UUID
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
7
8
|
from google.protobuf.descriptor import FieldDescriptor
|
|
8
9
|
from google.protobuf.duration_pb2 import Duration
|
|
9
10
|
from google.protobuf.message import Message
|
|
@@ -16,9 +17,12 @@ from shapely import from_wkb
|
|
|
16
17
|
from tilebox.datasets.datasets.v1.well_known_types_pb2 import UUID as UUIDMessage # noqa: N811
|
|
17
18
|
from tilebox.datasets.datasets.v1.well_known_types_pb2 import Geometry, LatLon, LatLonAlt, Quaternion, Vec3
|
|
18
19
|
|
|
19
|
-
|
|
20
|
+
ScalarProtoFieldValue = Message | float | str | bool | bytes
|
|
20
21
|
|
|
21
|
-
|
|
22
|
+
|
|
23
|
+
ProtoFieldValue = ScalarProtoFieldValue | Sequence[ScalarProtoFieldValue] | None
|
|
24
|
+
|
|
25
|
+
_FILL_VALUES_BY_DTYPE: dict[type[np.dtype[Any]], Any] = {
|
|
22
26
|
npdtypes.Int8DType: np.int8(0),
|
|
23
27
|
npdtypes.Int16DType: np.int16(0),
|
|
24
28
|
npdtypes.Int32DType: np.int32(0),
|
|
@@ -106,7 +110,7 @@ class TimestampField(ProtobufFieldType):
|
|
|
106
110
|
return value.seconds * 10**9 + value.nanos
|
|
107
111
|
|
|
108
112
|
def to_proto(self, value: DatetimeScalar) -> Timestamp | None:
|
|
109
|
-
if value
|
|
113
|
+
if is_missing(value) or (isinstance(value, np.datetime64) and np.isnat(value)):
|
|
110
114
|
return None
|
|
111
115
|
# we use pandas to_datetime function to handle a variety of input types that can be coerced to datetimes
|
|
112
116
|
seconds, nanos = divmod(to_datetime(value, utc=True).value, 10**9)
|
|
@@ -123,10 +127,10 @@ class TimeDeltaField(ProtobufFieldType):
|
|
|
123
127
|
return value.seconds * 10**9 + value.nanos
|
|
124
128
|
|
|
125
129
|
def to_proto(self, value: str | float | timedelta | np.timedelta64) -> Duration | None:
|
|
126
|
-
if value
|
|
130
|
+
if is_missing(value) or (isinstance(value, np.timedelta64) and np.isnat(value)):
|
|
127
131
|
return None
|
|
128
132
|
# we use pandas to_timedelta function to handle a variety of input types that can be coerced to timedeltas
|
|
129
|
-
seconds, nanos = divmod(to_timedelta(value).value, 10**9)
|
|
133
|
+
seconds, nanos = divmod(to_timedelta(value).value, 10**9)
|
|
130
134
|
return Duration(seconds=seconds, nanos=nanos)
|
|
131
135
|
|
|
132
136
|
|
|
@@ -140,7 +144,7 @@ class UUIDField(ProtobufFieldType):
|
|
|
140
144
|
return str(UUID(bytes=value.uuid))
|
|
141
145
|
|
|
142
146
|
def to_proto(self, value: str | UUID) -> UUIDMessage | None:
|
|
143
|
-
if
|
|
147
|
+
if is_missing(value) or value == "": # missing or empty string
|
|
144
148
|
return None
|
|
145
149
|
|
|
146
150
|
if isinstance(value, str):
|
|
@@ -159,7 +163,7 @@ class GeometryField(ProtobufFieldType):
|
|
|
159
163
|
return from_wkb(value.wkb)
|
|
160
164
|
|
|
161
165
|
def to_proto(self, value: Any) -> Geometry | None:
|
|
162
|
-
if value
|
|
166
|
+
if is_missing(value):
|
|
163
167
|
return None
|
|
164
168
|
return Geometry(wkb=value.wkb)
|
|
165
169
|
|
|
@@ -174,7 +178,7 @@ class Vec3Field(ProtobufFieldType):
|
|
|
174
178
|
return value.x, value.y, value.z
|
|
175
179
|
|
|
176
180
|
def to_proto(self, value: tuple[float, float, float]) -> Vec3 | None:
|
|
177
|
-
if value
|
|
181
|
+
if is_missing(value) or np.all(np.isnan(value)):
|
|
178
182
|
return None
|
|
179
183
|
return Vec3(x=value[0], y=value[1], z=value[2])
|
|
180
184
|
|
|
@@ -189,7 +193,7 @@ class QuaternionField(ProtobufFieldType):
|
|
|
189
193
|
return value.q1, value.q2, value.q3, value.q4
|
|
190
194
|
|
|
191
195
|
def to_proto(self, value: tuple[float, float, float, float]) -> Quaternion | None:
|
|
192
|
-
if value
|
|
196
|
+
if is_missing(value) or np.all(np.isnan(value)):
|
|
193
197
|
return None
|
|
194
198
|
return Quaternion(q1=value[0], q2=value[1], q3=value[2], q4=value[3])
|
|
195
199
|
|
|
@@ -204,7 +208,7 @@ class LatLonField(ProtobufFieldType):
|
|
|
204
208
|
return value.latitude, value.longitude
|
|
205
209
|
|
|
206
210
|
def to_proto(self, value: tuple[float, float]) -> LatLon | None:
|
|
207
|
-
if value
|
|
211
|
+
if is_missing(value) or np.all(np.isnan(value)):
|
|
208
212
|
return None
|
|
209
213
|
return LatLon(latitude=value[0], longitude=value[1])
|
|
210
214
|
|
|
@@ -220,7 +224,7 @@ class LatLonAltField(ProtobufFieldType):
|
|
|
220
224
|
return value.latitude, value.longitude, value.altitude
|
|
221
225
|
|
|
222
226
|
def to_proto(self, value: tuple[float, float, float]) -> LatLonAlt | None:
|
|
223
|
-
if value
|
|
227
|
+
if is_missing(value) or np.all(np.isnan(value)):
|
|
224
228
|
return None
|
|
225
229
|
return LatLonAlt(latitude=value[0], longitude=value[1], altitude=value[2])
|
|
226
230
|
|
|
@@ -300,3 +304,19 @@ def _camel_to_uppercase(name: str) -> str:
|
|
|
300
304
|
'PROCESSING_LEVEL'
|
|
301
305
|
"""
|
|
302
306
|
return "".join(["_" + c.lower() if c.isupper() else c for c in name]).lstrip("_").upper()
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def is_missing(value: Any) -> bool:
|
|
310
|
+
"""Check if a value represents a missing/null value.
|
|
311
|
+
|
|
312
|
+
Handles None, np.nan, pd.NA, NaT, and other pandas missing value sentinels.
|
|
313
|
+
This is needed for pandas 3.0+ compatibility where object-dtype columns use
|
|
314
|
+
np.nan instead of None for missing values.
|
|
315
|
+
"""
|
|
316
|
+
try:
|
|
317
|
+
return bool(pd.isna(value))
|
|
318
|
+
except ValueError:
|
|
319
|
+
# pd.isna returns either a bool, or an array of bools. In case of an array, converting the result to bool()
|
|
320
|
+
# will raise a ValueError. For an array, we know it's not a missing value, even an array of all NaNs is not
|
|
321
|
+
# a missing value.
|
|
322
|
+
return False
|
|
@@ -3,7 +3,7 @@ Functionality for converting protobuf messages to xarray datasets.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import contextlib
|
|
6
|
-
from collections.abc import
|
|
6
|
+
from collections.abc import Sequence
|
|
7
7
|
from typing import Any, TypeVar
|
|
8
8
|
|
|
9
9
|
import numpy as np
|
|
@@ -231,10 +231,10 @@ class _SimpleFieldConverter(_FieldConverter):
|
|
|
231
231
|
elif buffer_size > len(self._data):
|
|
232
232
|
# resize the data buffer to the new capacity, by just padding it with zeros at the end
|
|
233
233
|
missing = buffer_size - len(self._data)
|
|
234
|
-
self._data = np.pad(
|
|
234
|
+
self._data = np.pad( # ty: ignore[no-matching-overload]
|
|
235
235
|
self._data,
|
|
236
236
|
((0, missing), (0, 0)),
|
|
237
|
-
constant_values=self._type.fill_value,
|
|
237
|
+
constant_values=self._type.fill_value,
|
|
238
238
|
)
|
|
239
239
|
|
|
240
240
|
|
|
@@ -258,13 +258,13 @@ class _ArrayFieldConverter(_FieldConverter):
|
|
|
258
258
|
self._array_dim: int | None = None
|
|
259
259
|
|
|
260
260
|
def __call__(self, index: int, value: ProtoFieldValue) -> None:
|
|
261
|
-
if not isinstance(value,
|
|
261
|
+
if not isinstance(value, Sequence):
|
|
262
262
|
raise TypeError(f"Expected array field but got {type(value)}")
|
|
263
263
|
|
|
264
264
|
if self._array_dim is None or len(value) > self._array_dim:
|
|
265
265
|
self._resize_array_dim(len(value))
|
|
266
266
|
|
|
267
|
-
for i, v in enumerate(value): #
|
|
267
|
+
for i, v in enumerate(value): # somehow the isinstance(value, Sized) isn't used here
|
|
268
268
|
self._data[index, i, :] = self._type.from_proto(v)
|
|
269
269
|
|
|
270
270
|
def finalize(
|
|
@@ -309,10 +309,10 @@ class _ArrayFieldConverter(_FieldConverter):
|
|
|
309
309
|
else: # resize the data buffer to the new capacity, by just padding it with zeros at the end
|
|
310
310
|
missing_capacity = self._capacity - self._data.shape[0]
|
|
311
311
|
missing_array_dim = self._array_dim - self._data.shape[1]
|
|
312
|
-
self._data = np.pad(
|
|
312
|
+
self._data = np.pad( # ty: ignore[no-matching-overload]
|
|
313
313
|
self._data,
|
|
314
314
|
((0, missing_capacity), (0, missing_array_dim), (0, 0)),
|
|
315
|
-
constant_values=self._type.fill_value,
|
|
315
|
+
constant_values=self._type.fill_value,
|
|
316
316
|
)
|
|
317
317
|
|
|
318
318
|
|
|
@@ -374,13 +374,13 @@ def _create_field_converter(field: FieldDescriptor) -> _FieldConverter:
|
|
|
374
374
|
"""
|
|
375
375
|
# special handling for enums:
|
|
376
376
|
if field.type == FieldDescriptor.TYPE_ENUM:
|
|
377
|
-
if field.is_repeated:
|
|
377
|
+
if field.is_repeated:
|
|
378
378
|
raise NotImplementedError("Repeated enum fields are not supported")
|
|
379
379
|
|
|
380
380
|
return _EnumFieldConverter(field.name, enum_mapping_from_field_descriptor(field))
|
|
381
381
|
|
|
382
382
|
field_type = infer_field_type(field)
|
|
383
|
-
if field.is_repeated:
|
|
383
|
+
if field.is_repeated:
|
|
384
384
|
return _ArrayFieldConverter(field.name, field_type)
|
|
385
385
|
|
|
386
386
|
return _SimpleFieldConverter(field.name, field_type)
|
|
@@ -12,6 +12,7 @@ from tilebox.datasets.protobuf_conversion.field_types import (
|
|
|
12
12
|
ProtobufFieldType,
|
|
13
13
|
ProtoFieldValue,
|
|
14
14
|
infer_field_type,
|
|
15
|
+
is_missing,
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
IngestionData = Mapping[str, Collection[Any]] | Iterable[tuple[str, Collection[Any]]] | pd.DataFrame | xr.Dataset
|
|
@@ -20,7 +21,7 @@ DatapointIDs = pd.DataFrame | pd.Series | xr.Dataset | xr.DataArray | np.ndarray
|
|
|
20
21
|
|
|
21
22
|
def to_messages( # noqa: C901, PLR0912
|
|
22
23
|
data: IngestionData,
|
|
23
|
-
message_type: type,
|
|
24
|
+
message_type: type[Message],
|
|
24
25
|
required_fields: list[str] | None = None,
|
|
25
26
|
ignore_fields: list[str] | None = None,
|
|
26
27
|
) -> list[Message]:
|
|
@@ -44,9 +45,9 @@ def to_messages( # noqa: C901, PLR0912
|
|
|
44
45
|
# let's validate our fields, to make sure that they are all known fields for the given protobuf message
|
|
45
46
|
# and that they are all lists of the same length
|
|
46
47
|
field_lengths = defaultdict(list)
|
|
47
|
-
fields: dict[str, pd.Series | np.ndarray] = {}
|
|
48
|
+
fields: dict[str, pd.Series | np.ndarray | list[ProtoFieldValue]] = {}
|
|
48
49
|
|
|
49
|
-
field_names =
|
|
50
|
+
field_names = [str(field) for field in data]
|
|
50
51
|
if isinstance(data, xr.Dataset):
|
|
51
52
|
# list(dataset) only returns the variables, not the coords, so for xarray we need to add the coords as well
|
|
52
53
|
# but not all coords, we only care abou time for now
|
|
@@ -84,7 +85,7 @@ def to_messages( # noqa: C901, PLR0912
|
|
|
84
85
|
else:
|
|
85
86
|
values = convert_values_to_proto(values, field_type, filter_none=False)
|
|
86
87
|
|
|
87
|
-
fields[field_name] = values
|
|
88
|
+
fields[field_name] = values
|
|
88
89
|
|
|
89
90
|
# now convert every datapoint to a protobuf message
|
|
90
91
|
if len(field_lengths) == 0: # early return, no actual data to convert
|
|
@@ -103,7 +104,7 @@ def marshal_messages(messages: list[Message]) -> list[bytes]:
|
|
|
103
104
|
|
|
104
105
|
|
|
105
106
|
def columnar_to_row_based(
|
|
106
|
-
data: dict[str, pd.Series | np.ndarray],
|
|
107
|
+
data: dict[str, pd.Series | np.ndarray | list[ProtoFieldValue]],
|
|
107
108
|
) -> Iterator[dict[str, Any]]:
|
|
108
109
|
if len(data) == 0:
|
|
109
110
|
return
|
|
@@ -120,18 +121,18 @@ def convert_values_to_proto(
|
|
|
120
121
|
values: np.ndarray | pd.Series, field_type: ProtobufFieldType, filter_none: bool = False
|
|
121
122
|
) -> list[ProtoFieldValue]:
|
|
122
123
|
if filter_none:
|
|
123
|
-
return [field_type.to_proto(value) for value in values if
|
|
124
|
+
return [field_type.to_proto(value) for value in values if not is_missing(value)]
|
|
124
125
|
return [field_type.to_proto(value) for value in values]
|
|
125
126
|
|
|
126
127
|
|
|
127
128
|
def convert_repeated_values_to_proto(
|
|
128
129
|
values: np.ndarray | pd.Series | list[np.ndarray], field_type: ProtobufFieldType
|
|
129
|
-
) ->
|
|
130
|
+
) -> list[ProtoFieldValue]:
|
|
130
131
|
if isinstance(values, np.ndarray): # it was an xarray, with potentially padded fill values at the end
|
|
131
132
|
values = trim_trailing_fill_values(values, field_type.fill_value)
|
|
132
133
|
|
|
133
134
|
# since repeated fields can have different lengths between datapoints, we can filter out None values here
|
|
134
|
-
return [convert_values_to_proto(repeated_values, field_type, filter_none=True) for repeated_values in values]
|
|
135
|
+
return [convert_values_to_proto(repeated_values, field_type, filter_none=True) for repeated_values in values] # ty: ignore[invalid-return-type]
|
|
135
136
|
|
|
136
137
|
|
|
137
138
|
def trim_trailing_fill_values(values: np.ndarray, fill_value: Any) -> list[np.ndarray]:
|
|
@@ -50,23 +50,21 @@ class IDInterval:
|
|
|
50
50
|
Returns:
|
|
51
51
|
IDInterval: The parsed ID interval
|
|
52
52
|
"""
|
|
53
|
+
if isinstance(arg, IDInterval):
|
|
54
|
+
return arg
|
|
53
55
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
case (UUID(), UUID()):
|
|
58
|
-
start, end = arg
|
|
56
|
+
if isinstance(arg, tuple) and len(arg) == 2:
|
|
57
|
+
start, end = arg
|
|
58
|
+
if isinstance(start, UUID) and isinstance(end, UUID):
|
|
59
59
|
return IDInterval(
|
|
60
|
-
start_id=start,
|
|
61
|
-
end_id=end,
|
|
62
|
-
start_exclusive=start_exclusive,
|
|
63
|
-
end_inclusive=end_inclusive,
|
|
60
|
+
start_id=start, end_id=end, start_exclusive=start_exclusive, end_inclusive=end_inclusive
|
|
64
61
|
)
|
|
65
|
-
|
|
66
|
-
start, end = arg
|
|
62
|
+
if isinstance(start, str) and isinstance(end, str):
|
|
67
63
|
return IDInterval(
|
|
68
64
|
start_id=UUID(start),
|
|
69
65
|
end_id=UUID(end),
|
|
70
66
|
start_exclusive=start_exclusive,
|
|
71
67
|
end_inclusive=end_inclusive,
|
|
72
68
|
)
|
|
69
|
+
|
|
70
|
+
raise ValueError(f"Failed to convert {arg} ({type(arg)}) to IDInterval")
|
|
@@ -15,8 +15,10 @@ _EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
|
|
|
15
15
|
|
|
16
16
|
# A type alias for the different types that can be used to specify a time interval
|
|
17
17
|
TimeIntervalLike: TypeAlias = (
|
|
18
|
-
DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset |
|
|
18
|
+
"DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | TimeInterval"
|
|
19
19
|
)
|
|
20
|
+
# once we require python >= 3.12 we can replace this with a type statement, which doesn't require a string at all
|
|
21
|
+
# type TimeIntervalLike = DatetimeScalar | tuple[DatetimeScalar ... | TimeInterval
|
|
20
22
|
|
|
21
23
|
|
|
22
24
|
@dataclass(frozen=True)
|
tilebox/datasets/service.py
CHANGED
|
@@ -37,6 +37,7 @@ from tilebox.datasets.datasets.v1.datasets_pb2 import (
|
|
|
37
37
|
GetDatasetRequest,
|
|
38
38
|
ListDatasetsRequest,
|
|
39
39
|
Package,
|
|
40
|
+
UpdateDatasetRequest,
|
|
40
41
|
)
|
|
41
42
|
from tilebox.datasets.datasets.v1.datasets_pb2_grpc import DatasetServiceStub
|
|
42
43
|
from tilebox.datasets.query.pagination import Pagination
|
|
@@ -64,24 +65,70 @@ class TileboxDatasetService:
|
|
|
64
65
|
self._data_ingestion_service = data_ingestion_service_stub
|
|
65
66
|
|
|
66
67
|
def create_dataset(
|
|
67
|
-
self, kind: DatasetKind, code_name: str,
|
|
68
|
+
self, kind: DatasetKind, code_name: str, name: str, custom_fields: list[FieldDict]
|
|
68
69
|
) -> Promise[Dataset]:
|
|
69
70
|
"""Create a new dataset.
|
|
70
71
|
|
|
71
72
|
Args:
|
|
72
73
|
kind: The kind of the dataset.
|
|
73
74
|
code_name: The code name of the dataset.
|
|
74
|
-
fields: The fields of the dataset.
|
|
75
75
|
name: The name of the dataset.
|
|
76
|
-
|
|
76
|
+
fields: The custom fields of the dataset
|
|
77
77
|
|
|
78
78
|
Returns:
|
|
79
79
|
The created dataset.
|
|
80
80
|
"""
|
|
81
|
-
dataset_type = DatasetType(
|
|
82
|
-
|
|
81
|
+
dataset_type = DatasetType(
|
|
82
|
+
kind, _REQUIRED_FIELDS_PER_DATASET_KIND[kind] + [Field.from_dict(f) for f in custom_fields]
|
|
83
|
+
)
|
|
84
|
+
req = CreateDatasetRequest(name=name, type=dataset_type.to_message(), code_name=code_name)
|
|
83
85
|
return Promise.resolve(self._dataset_service.CreateDataset(req)).then(Dataset.from_message)
|
|
84
86
|
|
|
87
|
+
def update_dataset(
|
|
88
|
+
self, kind: DatasetKind, dataset_id: UUID, name: str | None, custom_fields: list[FieldDict]
|
|
89
|
+
) -> Promise[Dataset]:
|
|
90
|
+
"""Update a dataset.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
kind: The kind of the dataset to update, cannot be changed.
|
|
94
|
+
dataset_id: The id of the dataset to update, cannot be changed.
|
|
95
|
+
name: The new name of the dataset.
|
|
96
|
+
custom_fields: The new list of custom fields of the dataset.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
The updated dataset.
|
|
100
|
+
"""
|
|
101
|
+
dataset_type = DatasetType(
|
|
102
|
+
kind, _REQUIRED_FIELDS_PER_DATASET_KIND[kind] + [Field.from_dict(f) for f in custom_fields]
|
|
103
|
+
)
|
|
104
|
+
req = UpdateDatasetRequest(id=uuid_to_uuid_message(dataset_id), name=name, type=dataset_type.to_message())
|
|
105
|
+
return Promise.resolve(self._dataset_service.UpdateDataset(req)).then(Dataset.from_message)
|
|
106
|
+
|
|
107
|
+
def create_or_update_dataset(
|
|
108
|
+
self, kind: DatasetKind, code_name: str, name: str, custom_fields: list[FieldDict]
|
|
109
|
+
) -> Promise[Dataset]:
|
|
110
|
+
"""Create a new dataset, or update it if it already exists.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
kind: The kind of the dataset.
|
|
114
|
+
code_name: The code name of the dataset.
|
|
115
|
+
name: The name of the dataset.
|
|
116
|
+
custom_fields: The custom fields of the dataset
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
The created or updated dataset.
|
|
120
|
+
"""
|
|
121
|
+
return (
|
|
122
|
+
Promise.resolve(self._dataset_service.GetDataset(GetDatasetRequest(slug=code_name)))
|
|
123
|
+
.then(
|
|
124
|
+
did_fulfill=lambda dataset: self.update_dataset(
|
|
125
|
+
kind, Dataset.from_message(dataset).id, name, custom_fields
|
|
126
|
+
),
|
|
127
|
+
did_reject=lambda _: self.create_dataset(kind, code_name, name, custom_fields),
|
|
128
|
+
)
|
|
129
|
+
.then(Dataset.from_message)
|
|
130
|
+
)
|
|
131
|
+
|
|
85
132
|
def list_datasets(self) -> Promise[ListDatasetsResponse]:
|
|
86
133
|
"""List all datasets and dataset groups."""
|
|
87
134
|
return Promise.resolve(
|
|
@@ -224,7 +271,7 @@ def _client_info() -> ClientInfo:
|
|
|
224
271
|
def _environment_info() -> str:
|
|
225
272
|
python_version = sys.version.split(" ")[0]
|
|
226
273
|
try:
|
|
227
|
-
shell = str(get_ipython()) #
|
|
274
|
+
shell = str(get_ipython()) # ty: ignore[unresolved-reference]
|
|
228
275
|
except NameError:
|
|
229
276
|
return f"Python {python_version}" # Probably standard Python interpreter
|
|
230
277
|
|
tilebox/datasets/sync/client.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from uuid import UUID
|
|
2
2
|
|
|
3
3
|
from _tilebox.grpc.channel import open_channel
|
|
4
|
-
from _tilebox.grpc.error import with_pythonic_errors
|
|
4
|
+
from _tilebox.grpc.error import NotFoundError, with_pythonic_errors
|
|
5
5
|
from tilebox.datasets.client import Client as BaseClient
|
|
6
6
|
from tilebox.datasets.client import token_from_env
|
|
7
7
|
from tilebox.datasets.data.datasets import DatasetKind, FieldDict
|
|
@@ -33,33 +33,38 @@ class Client:
|
|
|
33
33
|
)
|
|
34
34
|
self._client = BaseClient(service)
|
|
35
35
|
|
|
36
|
-
def
|
|
36
|
+
def create_or_update_dataset(
|
|
37
37
|
self,
|
|
38
38
|
kind: DatasetKind,
|
|
39
39
|
code_name: str,
|
|
40
|
-
fields: list[FieldDict],
|
|
40
|
+
fields: list[FieldDict] | None = None,
|
|
41
41
|
*,
|
|
42
42
|
name: str | None = None,
|
|
43
|
-
description: str | None = None,
|
|
44
43
|
) -> DatasetClient:
|
|
45
44
|
"""Create a new dataset.
|
|
46
45
|
|
|
47
46
|
Args:
|
|
48
47
|
kind: The kind of the dataset.
|
|
49
48
|
code_name: The code name of the dataset.
|
|
50
|
-
fields: The fields of the dataset.
|
|
49
|
+
fields: The custom fields of the dataset.
|
|
51
50
|
name: The name of the dataset. Defaults to the code name.
|
|
52
|
-
description: A short description of the dataset. Optional.
|
|
53
51
|
|
|
54
52
|
Returns:
|
|
55
53
|
The created dataset.
|
|
56
54
|
"""
|
|
57
|
-
if name is None:
|
|
58
|
-
name = code_name
|
|
59
|
-
if description is None:
|
|
60
|
-
description = ""
|
|
61
55
|
|
|
62
|
-
|
|
56
|
+
try:
|
|
57
|
+
dataset = self.dataset(code_name)
|
|
58
|
+
except NotFoundError:
|
|
59
|
+
return self._client.create_dataset(kind, code_name, fields or [], name or code_name, DatasetClient).get()
|
|
60
|
+
|
|
61
|
+
return self._client.update_dataset(
|
|
62
|
+
kind,
|
|
63
|
+
dataset._dataset.id, # noqa: SLF001
|
|
64
|
+
fields or [],
|
|
65
|
+
name or dataset._dataset.name, # noqa: SLF001
|
|
66
|
+
DatasetClient,
|
|
67
|
+
).get()
|
|
63
68
|
|
|
64
69
|
def datasets(self) -> Group:
|
|
65
70
|
"""Fetch all available datasets."""
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
tilebox/datasets/__init__.py,sha256=4fI6ErCVb15KD_iPWIM6TUXTPgU8qrJVx3Cx7m7jeC8,824
|
|
2
|
-
tilebox/datasets/client.py,sha256=
|
|
2
|
+
tilebox/datasets/client.py,sha256=uWAFr77kJ1HXR8hXsDDl8H6ug-PKUdPomwc4Z85QQ9I,4718
|
|
3
3
|
tilebox/datasets/group.py,sha256=DoGl4w7Viy-l4kE9580dJOymP_B2pj3LRqvMNxvrYmU,1884
|
|
4
|
-
tilebox/datasets/message_pool.py,sha256=
|
|
5
|
-
tilebox/datasets/progress.py,sha256=
|
|
6
|
-
tilebox/datasets/service.py,sha256=
|
|
4
|
+
tilebox/datasets/message_pool.py,sha256=nGuwihK2Lhfk-q5cuRtjXxmgPlEU_DEp5uYRueCPWHk,1325
|
|
5
|
+
tilebox/datasets/progress.py,sha256=Mmn1ukjX_lNhgR7jnYFgksAoc2r33DK44VqIWMogI04,3776
|
|
6
|
+
tilebox/datasets/service.py,sha256=32Lw-giFGchbqDcX6vfYJxZfdPUxAtEjPxhf5P93DkE,14043
|
|
7
7
|
tilebox/datasets/uuid.py,sha256=pqtp5GMHM41KEKZHPdwrHVVThY9VDa7BPbCogrM01ZU,1107
|
|
8
8
|
tilebox/datasets/aio/__init__.py,sha256=0x_gddLgDsUCdl8MMZj4MPH1lp4HuOrExMHTjIFmM6s,405
|
|
9
|
-
tilebox/datasets/aio/client.py,sha256=
|
|
9
|
+
tilebox/datasets/aio/client.py,sha256=fJ7AF0cCocc0XT-Z1V3eb1QjnP4FirEixd4IufnEZoo,3499
|
|
10
10
|
tilebox/datasets/aio/dataset.py,sha256=CBiEhT7Pex5JcYulCkKl4DX80oGj6jQB1uUfVV_F-zo,22511
|
|
11
11
|
tilebox/datasets/aio/pagination.py,sha256=dqxnG1UIvXQwhS5XZDlnmtiy_zat5j5E7xeucqI1dZU,6111
|
|
12
12
|
tilebox/datasets/aio/timeseries.py,sha256=iQqIyh9TPL_gJz18GCxmtFJEwObR9S2rPsUohFYM8wQ,301
|
|
@@ -45,15 +45,15 @@ tilebox/datasets/datasets/v1/well_known_types_pb2.py,sha256=1Pi83QERn1nFFenZkITH
|
|
|
45
45
|
tilebox/datasets/datasets/v1/well_known_types_pb2.pyi,sha256=RW-TrAbeFof2x-Qa5UPH0nCg-4KTuJ1RLU6SApnS60E,7216
|
|
46
46
|
tilebox/datasets/datasets/v1/well_known_types_pb2_grpc.py,sha256=xYOs94SXiNYAlFodACnsXW5QovLsHY5tCk3p76RH5Zc,158
|
|
47
47
|
tilebox/datasets/protobuf_conversion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
48
|
-
tilebox/datasets/protobuf_conversion/field_types.py,sha256=
|
|
49
|
-
tilebox/datasets/protobuf_conversion/protobuf_xarray.py,sha256=
|
|
50
|
-
tilebox/datasets/protobuf_conversion/to_protobuf.py,sha256
|
|
48
|
+
tilebox/datasets/protobuf_conversion/field_types.py,sha256=vvPy1-8vsHedza-jPOCFXtQXwK6QVRc0BEXESSQ1jgU,12793
|
|
49
|
+
tilebox/datasets/protobuf_conversion/protobuf_xarray.py,sha256=7OTQ3NnTVL52KoCeC8X5yOPG-Y60UpBm5bRuT8IAFAY,16123
|
|
50
|
+
tilebox/datasets/protobuf_conversion/to_protobuf.py,sha256=-rJP954W4ZyONyHQSMtnLSnVIs9j73PCX0hPu7O-fL4,7504
|
|
51
51
|
tilebox/datasets/query/__init__.py,sha256=lR-tzsVyx1QXe-uIHrYkCWcjmLRfKzmRHC7E1TTGroY,245
|
|
52
|
-
tilebox/datasets/query/id_interval.py,sha256=
|
|
52
|
+
tilebox/datasets/query/id_interval.py,sha256=HiaAqn9Hh-07N9MSTjvVO0W0wJOTIP1HLy15PDa1nQQ,2583
|
|
53
53
|
tilebox/datasets/query/pagination.py,sha256=0kaQI6v9sJnDJblP3VJn6erPbkP_LSwegFRSCzINGY0,774
|
|
54
|
-
tilebox/datasets/query/time_interval.py,sha256=
|
|
54
|
+
tilebox/datasets/query/time_interval.py,sha256=qJbCIjFzPt5oEi6YIEb4viNElJUPp1ZCanoA44eXITg,10216
|
|
55
55
|
tilebox/datasets/sync/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
tilebox/datasets/sync/client.py,sha256=
|
|
56
|
+
tilebox/datasets/sync/client.py,sha256=5wRzSAkXtPAfY7ykzqSckzbvvMmBjm0Dft-0mCHkfAU,3431
|
|
57
57
|
tilebox/datasets/sync/dataset.py,sha256=wh8grBQZJAPZ7_X_8Ui67hK6v4uGAPC2gx02PSwbUgE,22174
|
|
58
58
|
tilebox/datasets/sync/pagination.py,sha256=IOSbpNTlv3Fx9QLdBMZHJxZSWeKJNLOVWkmSoKJHIcw,6025
|
|
59
59
|
tilebox/datasets/sync/timeseries.py,sha256=4nTP8_tmv6V7PXTUNzzlbzlxv0OXo_IqVLtSdJpUOW0,303
|
|
@@ -63,6 +63,6 @@ tilebox/datasets/tilebox/v1/id_pb2_grpc.py,sha256=xYOs94SXiNYAlFodACnsXW5QovLsHY
|
|
|
63
63
|
tilebox/datasets/tilebox/v1/query_pb2.py,sha256=l60DA1setyQhdBbZ_jgG8Pw3ourUSxXWU5P8AACYlpk,3444
|
|
64
64
|
tilebox/datasets/tilebox/v1/query_pb2.pyi,sha256=f-u60POkJqzssOmCEbOrD5fam9_86c6MdY_CzpnZZk0,2061
|
|
65
65
|
tilebox/datasets/tilebox/v1/query_pb2_grpc.py,sha256=xYOs94SXiNYAlFodACnsXW5QovLsHY5tCk3p76RH5Zc,158
|
|
66
|
-
tilebox_datasets-0.
|
|
67
|
-
tilebox_datasets-0.
|
|
68
|
-
tilebox_datasets-0.
|
|
66
|
+
tilebox_datasets-0.48.0.dist-info/METADATA,sha256=fRI4iuA8M4AK_QA2C87S60EzCgE5MAxNOaKgNQ_E3gQ,4234
|
|
67
|
+
tilebox_datasets-0.48.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
68
|
+
tilebox_datasets-0.48.0.dist-info/RECORD,,
|
|
File without changes
|