tilebox-datasets 0.46.0__py3-none-any.whl → 0.48.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ from uuid import UUID
2
2
 
3
3
  from _tilebox.grpc.aio.channel import open_channel
4
4
  from _tilebox.grpc.aio.error import with_pythonic_errors
5
+ from _tilebox.grpc.error import NotFoundError
5
6
  from tilebox.datasets.aio.dataset import DatasetClient
6
7
  from tilebox.datasets.client import Client as BaseClient
7
8
  from tilebox.datasets.client import token_from_env
@@ -33,33 +34,38 @@ class Client:
33
34
  )
34
35
  self._client = BaseClient(service)
35
36
 
36
- async def create_dataset(
37
+ async def create_or_update_dataset(
37
38
  self,
38
39
  kind: DatasetKind,
39
40
  code_name: str,
40
- fields: list[FieldDict],
41
+ fields: list[FieldDict] | None = None,
41
42
  *,
42
43
  name: str | None = None,
43
- description: str | None = None,
44
44
  ) -> DatasetClient:
45
45
  """Create a new dataset.
46
46
 
47
47
  Args:
48
48
  kind: The kind of the dataset.
49
49
  code_name: The code name of the dataset.
50
- fields: The fields of the dataset.
50
+ fields: The custom fields of the dataset.
51
51
  name: The name of the dataset. Defaults to the code name.
52
- description: A short description of the dataset. Optional.
53
52
 
54
53
  Returns:
55
54
  The created dataset.
56
55
  """
57
- if name is None:
58
- name = code_name
59
- if description is None:
60
- description = ""
61
56
 
62
- return await self._client.create_dataset(kind, code_name, fields, name, description, DatasetClient)
57
+ try:
58
+ dataset = await self.dataset(code_name)
59
+ except NotFoundError:
60
+ return await self._client.create_dataset(kind, code_name, fields or [], name or code_name, DatasetClient)
61
+
62
+ return await self._client.update_dataset(
63
+ kind,
64
+ dataset._dataset.id, # noqa: SLF001
65
+ fields or [],
66
+ name or dataset._dataset.name, # noqa: SLF001
67
+ DatasetClient,
68
+ )
63
69
 
64
70
  async def datasets(self) -> Group:
65
71
  """Fetch all available datasets."""
@@ -26,13 +26,32 @@ class Client:
26
26
  def __init__(self, service: TileboxDatasetService) -> None:
27
27
  self._service = service
28
28
 
29
- def create_dataset( # noqa: PLR0913
30
- self, kind: DatasetKind, code_name: str, fields: list[FieldDict], name: str, summary: str, dataset_type: type[T]
29
+ def create_dataset(
30
+ self,
31
+ kind: DatasetKind,
32
+ code_name: str,
33
+ fields: list[FieldDict] | None,
34
+ name: str | None,
35
+ py_dataset_class: type[T],
31
36
  ) -> Promise[T]:
32
37
  return (
33
- self._service.create_dataset(kind, code_name, fields, name, summary)
38
+ self._service.create_dataset(kind, code_name, name or code_name, fields or [])
34
39
  .then(_ensure_registered)
35
- .then(lambda dataset: dataset_type(self._service, dataset))
40
+ .then(lambda dataset: py_dataset_class(self._service, dataset))
41
+ )
42
+
43
+ def update_dataset(
44
+ self,
45
+ kind: DatasetKind,
46
+ dataset_id: UUID,
47
+ fields: list[FieldDict] | None,
48
+ name: str | None,
49
+ py_dataset_class: type[T],
50
+ ) -> Promise[T]:
51
+ return (
52
+ self._service.update_dataset(kind, dataset_id, name, fields or [])
53
+ .then(_ensure_registered)
54
+ .then(lambda dataset: py_dataset_class(self._service, dataset))
36
55
  )
37
56
 
38
57
  def datasets(self, dataset_type: type[T]) -> Promise[Group]:
@@ -1,5 +1,6 @@
1
1
  from google.protobuf import descriptor_pb2, duration_pb2, timestamp_pb2
2
2
  from google.protobuf.descriptor_pool import Default
3
+ from google.protobuf.message import Message
3
4
  from google.protobuf.message_factory import GetMessageClass, GetMessages
4
5
 
5
6
  from tilebox.datasets.data.datasets import AnnotatedType
@@ -25,5 +26,5 @@ def register_message_types(descriptor_set: descriptor_pb2.FileDescriptorSet) ->
25
26
  GetMessages(descriptor_set.file, pool=Default())
26
27
 
27
28
 
28
- def get_message_type(type_url: str) -> type:
29
+ def get_message_type(type_url: str) -> type[Message]:
29
30
  return GetMessageClass(Default().FindMessageTypeByName(type_url))
@@ -3,6 +3,11 @@ from datetime import datetime
3
3
  from types import TracebackType
4
4
  from typing import Any
5
5
 
6
+ try:
7
+ from typing import Self # ty: ignore[unresolved-import]
8
+ except ImportError: # Self is only available in Python 3.11+
9
+ from typing_extensions import Self
10
+
6
11
  from tqdm.auto import tqdm
7
12
 
8
13
  from tilebox.datasets.query.time_interval import TimeInterval
@@ -42,7 +47,7 @@ class TimeIntervalProgressBar:
42
47
  self._actual_start_time = actual_start_time
43
48
  self._total_data_points = 0
44
49
 
45
- def __enter__(self) -> "TimeIntervalProgressBar":
50
+ def __enter__(self) -> Self:
46
51
  self._progress_bar = tqdm(
47
52
  bar_format="{l_bar}{bar}[{elapsed}<{remaining}{postfix}]",
48
53
  total=self._calc_progress_seconds(self._interval.end),
@@ -62,7 +67,8 @@ class TimeIntervalProgressBar:
62
67
 
63
68
  def set_progress(self, time: datetime) -> None:
64
69
  """Set the progress of the progress bar to the given time"""
65
- done = min(self._calc_progress_seconds(time), self._progress_bar.total)
70
+ total = self._calc_progress_seconds(self._interval.end)
71
+ done = min(self._calc_progress_seconds(time), total)
66
72
  self._progress_bar.update(done - self._progress_bar.n)
67
73
 
68
74
  def set_download_info(self, datapoints: int, byte_size: int, download_time: float) -> None:
@@ -79,7 +85,8 @@ class TimeIntervalProgressBar:
79
85
  ) -> None:
80
86
  try:
81
87
  if traceback is None:
82
- self._progress_bar.update(self._progress_bar.total - self._progress_bar.n) # set to 100%
88
+ total = self._calc_progress_seconds(self._interval.end)
89
+ self._progress_bar.update(total - self._progress_bar.n) # set to 100%
83
90
 
84
91
  self._progress_bar.close() # mark as completed or failed
85
92
  except AttributeError:
@@ -1,9 +1,10 @@
1
- from collections.abc import Sized
1
+ from collections.abc import Sequence
2
2
  from datetime import timedelta
3
3
  from typing import Any
4
4
  from uuid import UUID
5
5
 
6
6
  import numpy as np
7
+ import pandas as pd
7
8
  from google.protobuf.descriptor import FieldDescriptor
8
9
  from google.protobuf.duration_pb2 import Duration
9
10
  from google.protobuf.message import Message
@@ -16,9 +17,12 @@ from shapely import from_wkb
16
17
  from tilebox.datasets.datasets.v1.well_known_types_pb2 import UUID as UUIDMessage # noqa: N811
17
18
  from tilebox.datasets.datasets.v1.well_known_types_pb2 import Geometry, LatLon, LatLonAlt, Quaternion, Vec3
18
19
 
19
- ProtoFieldValue = Message | float | str | bool | bytes | Sized | None
20
+ ScalarProtoFieldValue = Message | float | str | bool | bytes
20
21
 
21
- _FILL_VALUES_BY_DTYPE = {
22
+
23
+ ProtoFieldValue = ScalarProtoFieldValue | Sequence[ScalarProtoFieldValue] | None
24
+
25
+ _FILL_VALUES_BY_DTYPE: dict[type[np.dtype[Any]], Any] = {
22
26
  npdtypes.Int8DType: np.int8(0),
23
27
  npdtypes.Int16DType: np.int16(0),
24
28
  npdtypes.Int32DType: np.int32(0),
@@ -106,7 +110,7 @@ class TimestampField(ProtobufFieldType):
106
110
  return value.seconds * 10**9 + value.nanos
107
111
 
108
112
  def to_proto(self, value: DatetimeScalar) -> Timestamp | None:
109
- if value is None or (isinstance(value, np.datetime64) and np.isnat(value)):
113
+ if is_missing(value) or (isinstance(value, np.datetime64) and np.isnat(value)):
110
114
  return None
111
115
  # we use pandas to_datetime function to handle a variety of input types that can be coerced to datetimes
112
116
  seconds, nanos = divmod(to_datetime(value, utc=True).value, 10**9)
@@ -123,10 +127,10 @@ class TimeDeltaField(ProtobufFieldType):
123
127
  return value.seconds * 10**9 + value.nanos
124
128
 
125
129
  def to_proto(self, value: str | float | timedelta | np.timedelta64) -> Duration | None:
126
- if value is None or (isinstance(value, np.timedelta64) and np.isnat(value)):
130
+ if is_missing(value) or (isinstance(value, np.timedelta64) and np.isnat(value)):
127
131
  return None
128
132
  # we use pandas to_timedelta function to handle a variety of input types that can be coerced to timedeltas
129
- seconds, nanos = divmod(to_timedelta(value).value, 10**9) # type: ignore[arg-type]
133
+ seconds, nanos = divmod(to_timedelta(value).value, 10**9)
130
134
  return Duration(seconds=seconds, nanos=nanos)
131
135
 
132
136
 
@@ -140,7 +144,7 @@ class UUIDField(ProtobufFieldType):
140
144
  return str(UUID(bytes=value.uuid))
141
145
 
142
146
  def to_proto(self, value: str | UUID) -> UUIDMessage | None:
143
- if not value: # None or empty string
147
+ if is_missing(value) or value == "": # missing or empty string
144
148
  return None
145
149
 
146
150
  if isinstance(value, str):
@@ -159,7 +163,7 @@ class GeometryField(ProtobufFieldType):
159
163
  return from_wkb(value.wkb)
160
164
 
161
165
  def to_proto(self, value: Any) -> Geometry | None:
162
- if value is None:
166
+ if is_missing(value):
163
167
  return None
164
168
  return Geometry(wkb=value.wkb)
165
169
 
@@ -174,7 +178,7 @@ class Vec3Field(ProtobufFieldType):
174
178
  return value.x, value.y, value.z
175
179
 
176
180
  def to_proto(self, value: tuple[float, float, float]) -> Vec3 | None:
177
- if value is None or np.all(np.isnan(value)):
181
+ if is_missing(value) or np.all(np.isnan(value)):
178
182
  return None
179
183
  return Vec3(x=value[0], y=value[1], z=value[2])
180
184
 
@@ -189,7 +193,7 @@ class QuaternionField(ProtobufFieldType):
189
193
  return value.q1, value.q2, value.q3, value.q4
190
194
 
191
195
  def to_proto(self, value: tuple[float, float, float, float]) -> Quaternion | None:
192
- if value is None or np.all(np.isnan(value)):
196
+ if is_missing(value) or np.all(np.isnan(value)):
193
197
  return None
194
198
  return Quaternion(q1=value[0], q2=value[1], q3=value[2], q4=value[3])
195
199
 
@@ -204,7 +208,7 @@ class LatLonField(ProtobufFieldType):
204
208
  return value.latitude, value.longitude
205
209
 
206
210
  def to_proto(self, value: tuple[float, float]) -> LatLon | None:
207
- if value is None or np.all(np.isnan(value)):
211
+ if is_missing(value) or np.all(np.isnan(value)):
208
212
  return None
209
213
  return LatLon(latitude=value[0], longitude=value[1])
210
214
 
@@ -220,7 +224,7 @@ class LatLonAltField(ProtobufFieldType):
220
224
  return value.latitude, value.longitude, value.altitude
221
225
 
222
226
  def to_proto(self, value: tuple[float, float, float]) -> LatLonAlt | None:
223
- if value is None or np.all(np.isnan(value)):
227
+ if is_missing(value) or np.all(np.isnan(value)):
224
228
  return None
225
229
  return LatLonAlt(latitude=value[0], longitude=value[1], altitude=value[2])
226
230
 
@@ -300,3 +304,19 @@ def _camel_to_uppercase(name: str) -> str:
300
304
  'PROCESSING_LEVEL'
301
305
  """
302
306
  return "".join(["_" + c.lower() if c.isupper() else c for c in name]).lstrip("_").upper()
307
+
308
+
309
+ def is_missing(value: Any) -> bool:
310
+ """Check if a value represents a missing/null value.
311
+
312
+ Handles None, np.nan, pd.NA, NaT, and other pandas missing value sentinels.
313
+ This is needed for pandas 3.0+ compatibility where object-dtype columns use
314
+ np.nan instead of None for missing values.
315
+ """
316
+ try:
317
+ return bool(pd.isna(value))
318
+ except ValueError:
319
+ # pd.isna returns either a bool, or an array of bools. In case of an array, converting the result to bool()
320
+ # will raise a ValueError. For an array, we know it's not a missing value, even an array of all NaNs is not
321
+ # a missing value.
322
+ return False
@@ -3,7 +3,7 @@ Functionality for converting protobuf messages to xarray datasets.
3
3
  """
4
4
 
5
5
  import contextlib
6
- from collections.abc import Sized
6
+ from collections.abc import Sequence
7
7
  from typing import Any, TypeVar
8
8
 
9
9
  import numpy as np
@@ -231,10 +231,10 @@ class _SimpleFieldConverter(_FieldConverter):
231
231
  elif buffer_size > len(self._data):
232
232
  # resize the data buffer to the new capacity, by just padding it with zeros at the end
233
233
  missing = buffer_size - len(self._data)
234
- self._data = np.pad(
234
+ self._data = np.pad( # ty: ignore[no-matching-overload]
235
235
  self._data,
236
236
  ((0, missing), (0, 0)),
237
- constant_values=self._type.fill_value, # type: ignore[arg-type]
237
+ constant_values=self._type.fill_value,
238
238
  )
239
239
 
240
240
 
@@ -258,13 +258,13 @@ class _ArrayFieldConverter(_FieldConverter):
258
258
  self._array_dim: int | None = None
259
259
 
260
260
  def __call__(self, index: int, value: ProtoFieldValue) -> None:
261
- if not isinstance(value, Sized):
261
+ if not isinstance(value, Sequence):
262
262
  raise TypeError(f"Expected array field but got {type(value)}")
263
263
 
264
264
  if self._array_dim is None or len(value) > self._array_dim:
265
265
  self._resize_array_dim(len(value))
266
266
 
267
- for i, v in enumerate(value): # type: ignore[arg-type] # somehow the isinstance(value, Sized) isn't used here
267
+ for i, v in enumerate(value): # somehow the isinstance(value, Sized) isn't used here
268
268
  self._data[index, i, :] = self._type.from_proto(v)
269
269
 
270
270
  def finalize(
@@ -309,10 +309,10 @@ class _ArrayFieldConverter(_FieldConverter):
309
309
  else: # resize the data buffer to the new capacity, by just padding it with zeros at the end
310
310
  missing_capacity = self._capacity - self._data.shape[0]
311
311
  missing_array_dim = self._array_dim - self._data.shape[1]
312
- self._data = np.pad(
312
+ self._data = np.pad( # ty: ignore[no-matching-overload]
313
313
  self._data,
314
314
  ((0, missing_capacity), (0, missing_array_dim), (0, 0)),
315
- constant_values=self._type.fill_value, # type: ignore[arg-type]
315
+ constant_values=self._type.fill_value,
316
316
  )
317
317
 
318
318
 
@@ -374,13 +374,13 @@ def _create_field_converter(field: FieldDescriptor) -> _FieldConverter:
374
374
  """
375
375
  # special handling for enums:
376
376
  if field.type == FieldDescriptor.TYPE_ENUM:
377
- if field.is_repeated: # type: ignore[attr-defined]
377
+ if field.is_repeated:
378
378
  raise NotImplementedError("Repeated enum fields are not supported")
379
379
 
380
380
  return _EnumFieldConverter(field.name, enum_mapping_from_field_descriptor(field))
381
381
 
382
382
  field_type = infer_field_type(field)
383
- if field.is_repeated: # type: ignore[attr-defined]
383
+ if field.is_repeated:
384
384
  return _ArrayFieldConverter(field.name, field_type)
385
385
 
386
386
  return _SimpleFieldConverter(field.name, field_type)
@@ -12,6 +12,7 @@ from tilebox.datasets.protobuf_conversion.field_types import (
12
12
  ProtobufFieldType,
13
13
  ProtoFieldValue,
14
14
  infer_field_type,
15
+ is_missing,
15
16
  )
16
17
 
17
18
  IngestionData = Mapping[str, Collection[Any]] | Iterable[tuple[str, Collection[Any]]] | pd.DataFrame | xr.Dataset
@@ -20,7 +21,7 @@ DatapointIDs = pd.DataFrame | pd.Series | xr.Dataset | xr.DataArray | np.ndarray
20
21
 
21
22
  def to_messages( # noqa: C901, PLR0912
22
23
  data: IngestionData,
23
- message_type: type,
24
+ message_type: type[Message],
24
25
  required_fields: list[str] | None = None,
25
26
  ignore_fields: list[str] | None = None,
26
27
  ) -> list[Message]:
@@ -44,9 +45,9 @@ def to_messages( # noqa: C901, PLR0912
44
45
  # let's validate our fields, to make sure that they are all known fields for the given protobuf message
45
46
  # and that they are all lists of the same length
46
47
  field_lengths = defaultdict(list)
47
- fields: dict[str, pd.Series | np.ndarray] = {}
48
+ fields: dict[str, pd.Series | np.ndarray | list[ProtoFieldValue]] = {}
48
49
 
49
- field_names = list(map(str, data))
50
+ field_names = [str(field) for field in data]
50
51
  if isinstance(data, xr.Dataset):
51
52
  # list(dataset) only returns the variables, not the coords, so for xarray we need to add the coords as well
52
53
  # but not all coords, we only care abou time for now
@@ -84,7 +85,7 @@ def to_messages( # noqa: C901, PLR0912
84
85
  else:
85
86
  values = convert_values_to_proto(values, field_type, filter_none=False)
86
87
 
87
- fields[field_name] = values # type: ignore[assignment]
88
+ fields[field_name] = values
88
89
 
89
90
  # now convert every datapoint to a protobuf message
90
91
  if len(field_lengths) == 0: # early return, no actual data to convert
@@ -103,7 +104,7 @@ def marshal_messages(messages: list[Message]) -> list[bytes]:
103
104
 
104
105
 
105
106
  def columnar_to_row_based(
106
- data: dict[str, pd.Series | np.ndarray],
107
+ data: dict[str, pd.Series | np.ndarray | list[ProtoFieldValue]],
107
108
  ) -> Iterator[dict[str, Any]]:
108
109
  if len(data) == 0:
109
110
  return
@@ -120,18 +121,18 @@ def convert_values_to_proto(
120
121
  values: np.ndarray | pd.Series, field_type: ProtobufFieldType, filter_none: bool = False
121
122
  ) -> list[ProtoFieldValue]:
122
123
  if filter_none:
123
- return [field_type.to_proto(value) for value in values if value is not None]
124
+ return [field_type.to_proto(value) for value in values if not is_missing(value)]
124
125
  return [field_type.to_proto(value) for value in values]
125
126
 
126
127
 
127
128
  def convert_repeated_values_to_proto(
128
129
  values: np.ndarray | pd.Series | list[np.ndarray], field_type: ProtobufFieldType
129
- ) -> Any:
130
+ ) -> list[ProtoFieldValue]:
130
131
  if isinstance(values, np.ndarray): # it was an xarray, with potentially padded fill values at the end
131
132
  values = trim_trailing_fill_values(values, field_type.fill_value)
132
133
 
133
134
  # since repeated fields can have different lengths between datapoints, we can filter out None values here
134
- return [convert_values_to_proto(repeated_values, field_type, filter_none=True) for repeated_values in values]
135
+ return [convert_values_to_proto(repeated_values, field_type, filter_none=True) for repeated_values in values] # ty: ignore[invalid-return-type]
135
136
 
136
137
 
137
138
  def trim_trailing_fill_values(values: np.ndarray, fill_value: Any) -> list[np.ndarray]:
@@ -50,23 +50,21 @@ class IDInterval:
50
50
  Returns:
51
51
  IDInterval: The parsed ID interval
52
52
  """
53
+ if isinstance(arg, IDInterval):
54
+ return arg
53
55
 
54
- match arg:
55
- case IDInterval(_, _, _, _):
56
- return arg
57
- case (UUID(), UUID()):
58
- start, end = arg
56
+ if isinstance(arg, tuple) and len(arg) == 2:
57
+ start, end = arg
58
+ if isinstance(start, UUID) and isinstance(end, UUID):
59
59
  return IDInterval(
60
- start_id=start,
61
- end_id=end,
62
- start_exclusive=start_exclusive,
63
- end_inclusive=end_inclusive,
60
+ start_id=start, end_id=end, start_exclusive=start_exclusive, end_inclusive=end_inclusive
64
61
  )
65
- case (str(), str()):
66
- start, end = arg
62
+ if isinstance(start, str) and isinstance(end, str):
67
63
  return IDInterval(
68
64
  start_id=UUID(start),
69
65
  end_id=UUID(end),
70
66
  start_exclusive=start_exclusive,
71
67
  end_inclusive=end_inclusive,
72
68
  )
69
+
70
+ raise ValueError(f"Failed to convert {arg} ({type(arg)}) to IDInterval")
@@ -15,8 +15,10 @@ _EPOCH = datetime(1970, 1, 1, tzinfo=timezone.utc)
15
15
 
16
16
  # A type alias for the different types that can be used to specify a time interval
17
17
  TimeIntervalLike: TypeAlias = (
18
- DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | "TimeInterval"
18
+ "DatetimeScalar | tuple[DatetimeScalar, DatetimeScalar] | xr.DataArray | xr.Dataset | TimeInterval"
19
19
  )
20
+ # once we require python >= 3.12 we can replace this with a type statement, which doesn't require a string at all
21
+ # type TimeIntervalLike = DatetimeScalar | tuple[DatetimeScalar ... | TimeInterval
20
22
 
21
23
 
22
24
  @dataclass(frozen=True)
@@ -37,6 +37,7 @@ from tilebox.datasets.datasets.v1.datasets_pb2 import (
37
37
  GetDatasetRequest,
38
38
  ListDatasetsRequest,
39
39
  Package,
40
+ UpdateDatasetRequest,
40
41
  )
41
42
  from tilebox.datasets.datasets.v1.datasets_pb2_grpc import DatasetServiceStub
42
43
  from tilebox.datasets.query.pagination import Pagination
@@ -64,24 +65,70 @@ class TileboxDatasetService:
64
65
  self._data_ingestion_service = data_ingestion_service_stub
65
66
 
66
67
  def create_dataset(
67
- self, kind: DatasetKind, code_name: str, fields: list[FieldDict], name: str, summary: str
68
+ self, kind: DatasetKind, code_name: str, name: str, custom_fields: list[FieldDict]
68
69
  ) -> Promise[Dataset]:
69
70
  """Create a new dataset.
70
71
 
71
72
  Args:
72
73
  kind: The kind of the dataset.
73
74
  code_name: The code name of the dataset.
74
- fields: The fields of the dataset.
75
75
  name: The name of the dataset.
76
- summary: A short summary of the dataset.
76
+ fields: The custom fields of the dataset
77
77
 
78
78
  Returns:
79
79
  The created dataset.
80
80
  """
81
- dataset_type = DatasetType(kind, _REQUIRED_FIELDS_PER_DATASET_KIND[kind] + [Field.from_dict(f) for f in fields])
82
- req = CreateDatasetRequest(name=name, type=dataset_type.to_message(), summary=summary, code_name=code_name)
81
+ dataset_type = DatasetType(
82
+ kind, _REQUIRED_FIELDS_PER_DATASET_KIND[kind] + [Field.from_dict(f) for f in custom_fields]
83
+ )
84
+ req = CreateDatasetRequest(name=name, type=dataset_type.to_message(), code_name=code_name)
83
85
  return Promise.resolve(self._dataset_service.CreateDataset(req)).then(Dataset.from_message)
84
86
 
87
+ def update_dataset(
88
+ self, kind: DatasetKind, dataset_id: UUID, name: str | None, custom_fields: list[FieldDict]
89
+ ) -> Promise[Dataset]:
90
+ """Update a dataset.
91
+
92
+ Args:
93
+ kind: The kind of the dataset to update, cannot be changed.
94
+ dataset_id: The id of the dataset to update, cannot be changed.
95
+ name: The new name of the dataset.
96
+ custom_fields: The new list of custom fields of the dataset.
97
+
98
+ Returns:
99
+ The updated dataset.
100
+ """
101
+ dataset_type = DatasetType(
102
+ kind, _REQUIRED_FIELDS_PER_DATASET_KIND[kind] + [Field.from_dict(f) for f in custom_fields]
103
+ )
104
+ req = UpdateDatasetRequest(id=uuid_to_uuid_message(dataset_id), name=name, type=dataset_type.to_message())
105
+ return Promise.resolve(self._dataset_service.UpdateDataset(req)).then(Dataset.from_message)
106
+
107
+ def create_or_update_dataset(
108
+ self, kind: DatasetKind, code_name: str, name: str, custom_fields: list[FieldDict]
109
+ ) -> Promise[Dataset]:
110
+ """Create a new dataset, or update it if it already exists.
111
+
112
+ Args:
113
+ kind: The kind of the dataset.
114
+ code_name: The code name of the dataset.
115
+ name: The name of the dataset.
116
+ custom_fields: The custom fields of the dataset
117
+
118
+ Returns:
119
+ The created or updated dataset.
120
+ """
121
+ return (
122
+ Promise.resolve(self._dataset_service.GetDataset(GetDatasetRequest(slug=code_name)))
123
+ .then(
124
+ did_fulfill=lambda dataset: self.update_dataset(
125
+ kind, Dataset.from_message(dataset).id, name, custom_fields
126
+ ),
127
+ did_reject=lambda _: self.create_dataset(kind, code_name, name, custom_fields),
128
+ )
129
+ .then(Dataset.from_message)
130
+ )
131
+
85
132
  def list_datasets(self) -> Promise[ListDatasetsResponse]:
86
133
  """List all datasets and dataset groups."""
87
134
  return Promise.resolve(
@@ -224,7 +271,7 @@ def _client_info() -> ClientInfo:
224
271
  def _environment_info() -> str:
225
272
  python_version = sys.version.split(" ")[0]
226
273
  try:
227
- shell = str(get_ipython()) # type: ignore[name-defined]
274
+ shell = str(get_ipython()) # ty: ignore[unresolved-reference]
228
275
  except NameError:
229
276
  return f"Python {python_version}" # Probably standard Python interpreter
230
277
 
@@ -1,7 +1,7 @@
1
1
  from uuid import UUID
2
2
 
3
3
  from _tilebox.grpc.channel import open_channel
4
- from _tilebox.grpc.error import with_pythonic_errors
4
+ from _tilebox.grpc.error import NotFoundError, with_pythonic_errors
5
5
  from tilebox.datasets.client import Client as BaseClient
6
6
  from tilebox.datasets.client import token_from_env
7
7
  from tilebox.datasets.data.datasets import DatasetKind, FieldDict
@@ -33,33 +33,38 @@ class Client:
33
33
  )
34
34
  self._client = BaseClient(service)
35
35
 
36
- def create_dataset(
36
+ def create_or_update_dataset(
37
37
  self,
38
38
  kind: DatasetKind,
39
39
  code_name: str,
40
- fields: list[FieldDict],
40
+ fields: list[FieldDict] | None = None,
41
41
  *,
42
42
  name: str | None = None,
43
- description: str | None = None,
44
43
  ) -> DatasetClient:
45
44
  """Create a new dataset.
46
45
 
47
46
  Args:
48
47
  kind: The kind of the dataset.
49
48
  code_name: The code name of the dataset.
50
- fields: The fields of the dataset.
49
+ fields: The custom fields of the dataset.
51
50
  name: The name of the dataset. Defaults to the code name.
52
- description: A short description of the dataset. Optional.
53
51
 
54
52
  Returns:
55
53
  The created dataset.
56
54
  """
57
- if name is None:
58
- name = code_name
59
- if description is None:
60
- description = ""
61
55
 
62
- return self._client.create_dataset(kind, code_name, fields, name, description, DatasetClient).get()
56
+ try:
57
+ dataset = self.dataset(code_name)
58
+ except NotFoundError:
59
+ return self._client.create_dataset(kind, code_name, fields or [], name or code_name, DatasetClient).get()
60
+
61
+ return self._client.update_dataset(
62
+ kind,
63
+ dataset._dataset.id, # noqa: SLF001
64
+ fields or [],
65
+ name or dataset._dataset.name, # noqa: SLF001
66
+ DatasetClient,
67
+ ).get()
63
68
 
64
69
  def datasets(self) -> Group:
65
70
  """Fetch all available datasets."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tilebox-datasets
3
- Version: 0.46.0
3
+ Version: 0.48.0
4
4
  Summary: Access Tilebox datasets from Python
5
5
  Project-URL: Homepage, https://tilebox.com
6
6
  Project-URL: Documentation, https://docs.tilebox.com/datasets/introduction
@@ -1,12 +1,12 @@
1
1
  tilebox/datasets/__init__.py,sha256=4fI6ErCVb15KD_iPWIM6TUXTPgU8qrJVx3Cx7m7jeC8,824
2
- tilebox/datasets/client.py,sha256=0RuBAytG9dI-BKpLmb0DbBxhWY9KCOFRpC-3lUsiy1c,4242
2
+ tilebox/datasets/client.py,sha256=uWAFr77kJ1HXR8hXsDDl8H6ug-PKUdPomwc4Z85QQ9I,4718
3
3
  tilebox/datasets/group.py,sha256=DoGl4w7Viy-l4kE9580dJOymP_B2pj3LRqvMNxvrYmU,1884
4
- tilebox/datasets/message_pool.py,sha256=5Hkd2xqgcpQsYY4IySNRGnvAyl2tsyhH21fTAu3EVXg,1272
5
- tilebox/datasets/progress.py,sha256=5w_kmrg_aKcW4qluOJu0bPnMP2tV_JA3EZMgk1GDYJM,3531
6
- tilebox/datasets/service.py,sha256=Oo_yJNomUqS_CCbBW132uNyWT_sjjG7_W_bqLJIE4FQ,12195
4
+ tilebox/datasets/message_pool.py,sha256=nGuwihK2Lhfk-q5cuRtjXxmgPlEU_DEp5uYRueCPWHk,1325
5
+ tilebox/datasets/progress.py,sha256=Mmn1ukjX_lNhgR7jnYFgksAoc2r33DK44VqIWMogI04,3776
6
+ tilebox/datasets/service.py,sha256=32Lw-giFGchbqDcX6vfYJxZfdPUxAtEjPxhf5P93DkE,14043
7
7
  tilebox/datasets/uuid.py,sha256=pqtp5GMHM41KEKZHPdwrHVVThY9VDa7BPbCogrM01ZU,1107
8
8
  tilebox/datasets/aio/__init__.py,sha256=0x_gddLgDsUCdl8MMZj4MPH1lp4HuOrExMHTjIFmM6s,405
9
- tilebox/datasets/aio/client.py,sha256=Lit7sD1vy7jedhv16oltUf7r9y7mSwoZAAKrG_I60CM,3303
9
+ tilebox/datasets/aio/client.py,sha256=fJ7AF0cCocc0XT-Z1V3eb1QjnP4FirEixd4IufnEZoo,3499
10
10
  tilebox/datasets/aio/dataset.py,sha256=CBiEhT7Pex5JcYulCkKl4DX80oGj6jQB1uUfVV_F-zo,22511
11
11
  tilebox/datasets/aio/pagination.py,sha256=dqxnG1UIvXQwhS5XZDlnmtiy_zat5j5E7xeucqI1dZU,6111
12
12
  tilebox/datasets/aio/timeseries.py,sha256=iQqIyh9TPL_gJz18GCxmtFJEwObR9S2rPsUohFYM8wQ,301
@@ -45,15 +45,15 @@ tilebox/datasets/datasets/v1/well_known_types_pb2.py,sha256=1Pi83QERn1nFFenZkITH
45
45
  tilebox/datasets/datasets/v1/well_known_types_pb2.pyi,sha256=RW-TrAbeFof2x-Qa5UPH0nCg-4KTuJ1RLU6SApnS60E,7216
46
46
  tilebox/datasets/datasets/v1/well_known_types_pb2_grpc.py,sha256=xYOs94SXiNYAlFodACnsXW5QovLsHY5tCk3p76RH5Zc,158
47
47
  tilebox/datasets/protobuf_conversion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
- tilebox/datasets/protobuf_conversion/field_types.py,sha256=WBISp5pJ_jUYXLC_mNXw_S98guLlnqBeAr1fJjAk5uo,11977
49
- tilebox/datasets/protobuf_conversion/protobuf_xarray.py,sha256=kO4Pq9bsH7p7Uocg8V4zUKM8KY0hes8bxByIdb6_s5k,16183
50
- tilebox/datasets/protobuf_conversion/to_protobuf.py,sha256=Xy_ItOzTBd0nziRUe_CHBTM7VwQi6XY8LbYFE2thjAc,7392
48
+ tilebox/datasets/protobuf_conversion/field_types.py,sha256=vvPy1-8vsHedza-jPOCFXtQXwK6QVRc0BEXESSQ1jgU,12793
49
+ tilebox/datasets/protobuf_conversion/protobuf_xarray.py,sha256=7OTQ3NnTVL52KoCeC8X5yOPG-Y60UpBm5bRuT8IAFAY,16123
50
+ tilebox/datasets/protobuf_conversion/to_protobuf.py,sha256=-rJP954W4ZyONyHQSMtnLSnVIs9j73PCX0hPu7O-fL4,7504
51
51
  tilebox/datasets/query/__init__.py,sha256=lR-tzsVyx1QXe-uIHrYkCWcjmLRfKzmRHC7E1TTGroY,245
52
- tilebox/datasets/query/id_interval.py,sha256=Ha3Rm92hZugQXNzyfdFUROT1pTJ1ZBIISqTJbf13OP4,2508
52
+ tilebox/datasets/query/id_interval.py,sha256=HiaAqn9Hh-07N9MSTjvVO0W0wJOTIP1HLy15PDa1nQQ,2583
53
53
  tilebox/datasets/query/pagination.py,sha256=0kaQI6v9sJnDJblP3VJn6erPbkP_LSwegFRSCzINGY0,774
54
- tilebox/datasets/query/time_interval.py,sha256=1Y_ewpGxIUJ1KSkAhoceH0madfsxS-F-NSHY2yM5GZ8,10018
54
+ tilebox/datasets/query/time_interval.py,sha256=qJbCIjFzPt5oEi6YIEb4viNElJUPp1ZCanoA44eXITg,10216
55
55
  tilebox/datasets/sync/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- tilebox/datasets/sync/client.py,sha256=hr5ZAcvCFGqJu3AREIg0RDFBnc8dOmTIEgBWkGvgKUA,3272
56
+ tilebox/datasets/sync/client.py,sha256=5wRzSAkXtPAfY7ykzqSckzbvvMmBjm0Dft-0mCHkfAU,3431
57
57
  tilebox/datasets/sync/dataset.py,sha256=wh8grBQZJAPZ7_X_8Ui67hK6v4uGAPC2gx02PSwbUgE,22174
58
58
  tilebox/datasets/sync/pagination.py,sha256=IOSbpNTlv3Fx9QLdBMZHJxZSWeKJNLOVWkmSoKJHIcw,6025
59
59
  tilebox/datasets/sync/timeseries.py,sha256=4nTP8_tmv6V7PXTUNzzlbzlxv0OXo_IqVLtSdJpUOW0,303
@@ -63,6 +63,6 @@ tilebox/datasets/tilebox/v1/id_pb2_grpc.py,sha256=xYOs94SXiNYAlFodACnsXW5QovLsHY
63
63
  tilebox/datasets/tilebox/v1/query_pb2.py,sha256=l60DA1setyQhdBbZ_jgG8Pw3ourUSxXWU5P8AACYlpk,3444
64
64
  tilebox/datasets/tilebox/v1/query_pb2.pyi,sha256=f-u60POkJqzssOmCEbOrD5fam9_86c6MdY_CzpnZZk0,2061
65
65
  tilebox/datasets/tilebox/v1/query_pb2_grpc.py,sha256=xYOs94SXiNYAlFodACnsXW5QovLsHY5tCk3p76RH5Zc,158
66
- tilebox_datasets-0.46.0.dist-info/METADATA,sha256=az6VLDm7YiPyq8izUHYLQ3TUaa3HWnH1NvyFxvvZ3-o,4234
67
- tilebox_datasets-0.46.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
68
- tilebox_datasets-0.46.0.dist-info/RECORD,,
66
+ tilebox_datasets-0.48.0.dist-info/METADATA,sha256=fRI4iuA8M4AK_QA2C87S60EzCgE5MAxNOaKgNQ_E3gQ,4234
67
+ tilebox_datasets-0.48.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
68
+ tilebox_datasets-0.48.0.dist-info/RECORD,,