wandb 0.18.0rc1__py3-none-win_amd64.whl → 0.18.2__py3-none-win_amd64.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. wandb/__init__.py +4 -4
  2. wandb/__init__.pyi +67 -12
  3. wandb/apis/internal.py +3 -0
  4. wandb/apis/public/api.py +128 -2
  5. wandb/apis/public/artifacts.py +11 -7
  6. wandb/apis/public/jobs.py +8 -0
  7. wandb/apis/public/runs.py +18 -5
  8. wandb/bin/wandb-core +0 -0
  9. wandb/cli/cli.py +0 -5
  10. wandb/data_types.py +9 -2019
  11. wandb/env.py +0 -5
  12. wandb/errors/__init__.py +11 -40
  13. wandb/errors/errors.py +37 -0
  14. wandb/errors/warnings.py +2 -0
  15. wandb/{sklearn → integration/sklearn}/calculate/calibration_curves.py +7 -7
  16. wandb/{sklearn → integration/sklearn}/calculate/class_proportions.py +1 -1
  17. wandb/{sklearn → integration/sklearn}/calculate/confusion_matrix.py +3 -2
  18. wandb/{sklearn → integration/sklearn}/calculate/elbow_curve.py +6 -6
  19. wandb/{sklearn → integration/sklearn}/calculate/learning_curve.py +2 -2
  20. wandb/{sklearn → integration/sklearn}/calculate/outlier_candidates.py +2 -2
  21. wandb/{sklearn → integration/sklearn}/calculate/residuals.py +8 -8
  22. wandb/{sklearn → integration/sklearn}/calculate/silhouette.py +2 -2
  23. wandb/{sklearn → integration/sklearn}/calculate/summary_metrics.py +2 -2
  24. wandb/{sklearn → integration/sklearn}/plot/classifier.py +5 -5
  25. wandb/{sklearn → integration/sklearn}/plot/clusterer.py +10 -6
  26. wandb/{sklearn → integration/sklearn}/plot/regressor.py +5 -5
  27. wandb/{sklearn → integration/sklearn}/plot/shared.py +3 -3
  28. wandb/{sklearn → integration/sklearn}/utils.py +8 -8
  29. wandb/integration/tensorboard/log.py +1 -1
  30. wandb/{wandb_torch.py → integration/torch/wandb_torch.py} +36 -32
  31. wandb/old/core.py +2 -80
  32. wandb/plot/bar.py +7 -4
  33. wandb/plot/confusion_matrix.py +5 -4
  34. wandb/plot/histogram.py +7 -4
  35. wandb/plot/line.py +7 -4
  36. wandb/proto/v3/wandb_base_pb2.py +2 -1
  37. wandb/proto/v3/wandb_internal_pb2.py +2 -1
  38. wandb/proto/v3/wandb_server_pb2.py +2 -1
  39. wandb/proto/v3/wandb_settings_pb2.py +3 -2
  40. wandb/proto/v3/wandb_telemetry_pb2.py +2 -1
  41. wandb/proto/v4/wandb_base_pb2.py +2 -1
  42. wandb/proto/v4/wandb_internal_pb2.py +2 -1
  43. wandb/proto/v4/wandb_server_pb2.py +2 -1
  44. wandb/proto/v4/wandb_settings_pb2.py +3 -2
  45. wandb/proto/v4/wandb_telemetry_pb2.py +2 -1
  46. wandb/proto/v5/wandb_base_pb2.py +3 -2
  47. wandb/proto/v5/wandb_internal_pb2.py +3 -2
  48. wandb/proto/v5/wandb_server_pb2.py +3 -2
  49. wandb/proto/v5/wandb_settings_pb2.py +4 -3
  50. wandb/proto/v5/wandb_telemetry_pb2.py +3 -2
  51. wandb/sdk/artifacts/_validators.py +48 -3
  52. wandb/sdk/artifacts/artifact.py +157 -183
  53. wandb/sdk/artifacts/artifact_file_cache.py +13 -11
  54. wandb/sdk/artifacts/artifact_instance_cache.py +4 -2
  55. wandb/sdk/artifacts/artifact_manifest.py +13 -11
  56. wandb/sdk/artifacts/artifact_manifest_entry.py +24 -22
  57. wandb/sdk/artifacts/artifact_manifests/artifact_manifest_v1.py +9 -7
  58. wandb/sdk/artifacts/artifact_saver.py +27 -25
  59. wandb/sdk/artifacts/exceptions.py +26 -25
  60. wandb/sdk/artifacts/storage_handler.py +11 -9
  61. wandb/sdk/artifacts/storage_handlers/azure_handler.py +16 -14
  62. wandb/sdk/artifacts/storage_handlers/gcs_handler.py +15 -13
  63. wandb/sdk/artifacts/storage_handlers/http_handler.py +15 -14
  64. wandb/sdk/artifacts/storage_handlers/local_file_handler.py +10 -8
  65. wandb/sdk/artifacts/storage_handlers/multi_handler.py +14 -12
  66. wandb/sdk/artifacts/storage_handlers/s3_handler.py +19 -19
  67. wandb/sdk/artifacts/storage_handlers/tracking_handler.py +10 -8
  68. wandb/sdk/artifacts/storage_handlers/wb_artifact_handler.py +12 -10
  69. wandb/sdk/artifacts/storage_handlers/wb_local_artifact_handler.py +9 -7
  70. wandb/sdk/artifacts/storage_policies/wandb_storage_policy.py +31 -29
  71. wandb/sdk/artifacts/storage_policy.py +20 -20
  72. wandb/sdk/backend/backend.py +8 -26
  73. wandb/sdk/data_types/audio.py +165 -0
  74. wandb/sdk/data_types/base_types/wb_value.py +1 -3
  75. wandb/sdk/data_types/bokeh.py +70 -0
  76. wandb/sdk/data_types/graph.py +405 -0
  77. wandb/sdk/data_types/image.py +156 -0
  78. wandb/sdk/data_types/table.py +1204 -0
  79. wandb/sdk/data_types/trace_tree.py +2 -2
  80. wandb/sdk/data_types/utils.py +49 -0
  81. wandb/sdk/data_types/video.py +2 -2
  82. wandb/sdk/interface/interface.py +0 -24
  83. wandb/sdk/interface/interface_shared.py +0 -12
  84. wandb/sdk/internal/handler.py +0 -10
  85. wandb/sdk/internal/internal_api.py +71 -0
  86. wandb/sdk/internal/sender.py +0 -43
  87. wandb/sdk/internal/tb_watcher.py +1 -1
  88. wandb/sdk/lib/_settings_toposort_generated.py +1 -0
  89. wandb/sdk/lib/hashutil.py +34 -12
  90. wandb/sdk/lib/service_connection.py +216 -0
  91. wandb/sdk/lib/service_token.py +94 -0
  92. wandb/sdk/lib/sock_client.py +7 -3
  93. wandb/sdk/service/server.py +2 -5
  94. wandb/sdk/service/service.py +2 -31
  95. wandb/sdk/service/streams.py +0 -7
  96. wandb/sdk/wandb_init.py +42 -25
  97. wandb/sdk/wandb_run.py +18 -159
  98. wandb/sdk/wandb_settings.py +2 -0
  99. wandb/sdk/wandb_setup.py +25 -16
  100. wandb/sdk/wandb_sync.py +9 -3
  101. wandb/sdk/wandb_watch.py +31 -15
  102. wandb/sklearn.py +35 -0
  103. wandb/util.py +14 -3
  104. {wandb-0.18.0rc1.dist-info → wandb-0.18.2.dist-info}/METADATA +6 -5
  105. {wandb-0.18.0rc1.dist-info → wandb-0.18.2.dist-info}/RECORD +114 -110
  106. wandb/sdk/internal/update.py +0 -113
  107. wandb/sdk/lib/console.py +0 -39
  108. wandb/sdk/service/service_base.py +0 -50
  109. wandb/sdk/service/service_sock.py +0 -70
  110. wandb/sdk/wandb_manager.py +0 -232
  111. /wandb/{sklearn → integration/sklearn}/__init__.py +0 -0
  112. /wandb/{sklearn → integration/sklearn}/calculate/__init__.py +0 -0
  113. /wandb/{sklearn → integration/sklearn}/calculate/decision_boundaries.py +0 -0
  114. /wandb/{sklearn → integration/sklearn}/calculate/feature_importances.py +0 -0
  115. /wandb/{sklearn → integration/sklearn}/plot/__init__.py +0 -0
  116. /wandb/{sdk/lib → plot}/viz.py +0 -0
  117. {wandb-0.18.0rc1.dist-info → wandb-0.18.2.dist-info}/WHEEL +0 -0
  118. {wandb-0.18.0rc1.dist-info → wandb-0.18.2.dist-info}/entry_points.txt +0 -0
  119. {wandb-0.18.0rc1.dist-info → wandb-0.18.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1204 @@
1
+ import base64
2
+ import binascii
3
+ import codecs
4
+ import datetime
5
+ import logging
6
+ import os
7
+
8
+ import wandb
9
+ from wandb import util
10
+ from wandb.sdk.lib import runid
11
+
12
+ from . import _dtypes
13
+ from ._private import MEDIA_TMP
14
+ from .base_types.media import Media, _numpy_arrays_to_lists
15
+ from .base_types.wb_value import WBValue
16
+ from .utils import _json_helper
17
+
18
+
19
+ class _TableLinkMixin:
20
+ def set_table(self, table):
21
+ self._table = table
22
+
23
+
24
+ class _TableKey(str, _TableLinkMixin):
25
+ def set_table(self, table, col_name):
26
+ assert col_name in table.columns
27
+ self._table = table
28
+ self._col_name = col_name
29
+
30
+
31
+ class _TableIndex(int, _TableLinkMixin):
32
+ def get_row(self):
33
+ row = {}
34
+ if self._table:
35
+ row = {
36
+ c: self._table.data[self][i] for i, c in enumerate(self._table.columns)
37
+ }
38
+
39
+ return row
40
+
41
+
42
+ class _PrimaryKeyType(_dtypes.Type):
43
+ name = "primaryKey"
44
+ legacy_names = ["wandb.TablePrimaryKey"]
45
+
46
+ def assign_type(self, wb_type=None):
47
+ if isinstance(wb_type, _dtypes.StringType) or isinstance(
48
+ wb_type, _PrimaryKeyType
49
+ ):
50
+ return self
51
+ return _dtypes.InvalidType()
52
+
53
+ @classmethod
54
+ def from_obj(cls, py_obj):
55
+ if not isinstance(py_obj, _TableKey):
56
+ raise TypeError("py_obj must be a wandb.Table")
57
+ else:
58
+ return cls()
59
+
60
+
61
+ class _ForeignKeyType(_dtypes.Type):
62
+ name = "foreignKey"
63
+ legacy_names = ["wandb.TableForeignKey"]
64
+ types = [_TableKey]
65
+
66
+ def __init__(self, table, col_name):
67
+ assert isinstance(table, Table)
68
+ assert isinstance(col_name, str)
69
+ assert col_name in table.columns
70
+ self.params.update({"table": table, "col_name": col_name})
71
+
72
+ def assign_type(self, wb_type=None):
73
+ if isinstance(wb_type, _dtypes.StringType):
74
+ return self
75
+ elif (
76
+ isinstance(wb_type, _ForeignKeyType)
77
+ and id(self.params["table"]) == id(wb_type.params["table"])
78
+ and self.params["col_name"] == wb_type.params["col_name"]
79
+ ):
80
+ return self
81
+
82
+ return _dtypes.InvalidType()
83
+
84
+ @classmethod
85
+ def from_obj(cls, py_obj):
86
+ if not isinstance(py_obj, _TableKey):
87
+ raise TypeError("py_obj must be a _TableKey")
88
+ else:
89
+ return cls(py_obj._table, py_obj._col_name)
90
+
91
+ def to_json(self, artifact=None):
92
+ res = super().to_json(artifact)
93
+ if artifact is not None:
94
+ table_name = f"media/tables/t_{runid.generate_id()}"
95
+ entry = artifact.add(self.params["table"], table_name)
96
+ res["params"]["table"] = entry.path
97
+ else:
98
+ raise AssertionError(
99
+ "_ForeignKeyType does not support serialization without an artifact"
100
+ )
101
+ return res
102
+
103
+ @classmethod
104
+ def from_json(
105
+ cls,
106
+ json_dict,
107
+ artifact,
108
+ ):
109
+ table = None
110
+ col_name = None
111
+ if artifact is None:
112
+ raise AssertionError(
113
+ "_ForeignKeyType does not support deserialization without an artifact"
114
+ )
115
+ else:
116
+ table = artifact.get(json_dict["params"]["table"])
117
+ col_name = json_dict["params"]["col_name"]
118
+
119
+ if table is None:
120
+ raise AssertionError("Unable to deserialize referenced table")
121
+
122
+ return cls(table, col_name)
123
+
124
+
125
+ class _ForeignIndexType(_dtypes.Type):
126
+ name = "foreignIndex"
127
+ legacy_names = ["wandb.TableForeignIndex"]
128
+ types = [_TableIndex]
129
+
130
+ def __init__(self, table):
131
+ assert isinstance(table, Table)
132
+ self.params.update({"table": table})
133
+
134
+ def assign_type(self, wb_type=None):
135
+ if isinstance(wb_type, _dtypes.NumberType):
136
+ return self
137
+ elif isinstance(wb_type, _ForeignIndexType) and id(self.params["table"]) == id(
138
+ wb_type.params["table"]
139
+ ):
140
+ return self
141
+
142
+ return _dtypes.InvalidType()
143
+
144
+ @classmethod
145
+ def from_obj(cls, py_obj):
146
+ if not isinstance(py_obj, _TableIndex):
147
+ raise TypeError("py_obj must be a _TableIndex")
148
+ else:
149
+ return cls(py_obj._table)
150
+
151
+ def to_json(self, artifact=None):
152
+ res = super().to_json(artifact)
153
+ if artifact is not None:
154
+ table_name = f"media/tables/t_{runid.generate_id()}"
155
+ entry = artifact.add(self.params["table"], table_name)
156
+ res["params"]["table"] = entry.path
157
+ else:
158
+ raise AssertionError(
159
+ "_ForeignIndexType does not support serialization without an artifact"
160
+ )
161
+ return res
162
+
163
+ @classmethod
164
+ def from_json(
165
+ cls,
166
+ json_dict,
167
+ artifact,
168
+ ):
169
+ table = None
170
+ if artifact is None:
171
+ raise AssertionError(
172
+ "_ForeignIndexType does not support deserialization without an artifact"
173
+ )
174
+ else:
175
+ table = artifact.get(json_dict["params"]["table"])
176
+
177
+ if table is None:
178
+ raise AssertionError("Unable to deserialize referenced table")
179
+
180
+ return cls(table)
181
+
182
+
183
+ class Table(Media):
184
+ """The Table class used to display and analyze tabular data.
185
+
186
+ Unlike traditional spreadsheets, Tables support numerous types of data:
187
+ scalar values, strings, numpy arrays, and most subclasses of `wandb.data_types.Media`.
188
+ This means you can embed `Images`, `Video`, `Audio`, and other sorts of rich, annotated media
189
+ directly in Tables, alongside other traditional scalar values.
190
+
191
+ This class is the primary class used to generate the Table Visualizer
192
+ in the UI: https://docs.wandb.ai/guides/data-vis/tables.
193
+
194
+ Arguments:
195
+ columns: (List[str]) Names of the columns in the table.
196
+ Defaults to ["Input", "Output", "Expected"].
197
+ data: (List[List[any]]) 2D row-oriented array of values.
198
+ dataframe: (pandas.DataFrame) DataFrame object used to create the table.
199
+ When set, `data` and `columns` arguments are ignored.
200
+ optional: (Union[bool,List[bool]]) Determines if `None` values are allowed. Default to True
201
+ - If a singular bool value, then the optionality is enforced for all
202
+ columns specified at construction time
203
+ - If a list of bool values, then the optionality is applied to each
204
+ column - should be the same length as `columns`
205
+ applies to all columns. A list of bool values applies to each respective column.
206
+ allow_mixed_types: (bool) Determines if columns are allowed to have mixed types
207
+ (disables type validation). Defaults to False
208
+ """
209
+
210
+ MAX_ROWS = 10000
211
+ MAX_ARTIFACT_ROWS = 200000
212
+ _MAX_EMBEDDING_DIMENSIONS = 150
213
+ _log_type = "table"
214
+
215
+ def __init__(
216
+ self,
217
+ columns=None,
218
+ data=None,
219
+ rows=None,
220
+ dataframe=None,
221
+ dtype=None,
222
+ optional=True,
223
+ allow_mixed_types=False,
224
+ ):
225
+ """Initializes a Table object.
226
+
227
+ The rows is available for legacy reasons and should not be used.
228
+ The Table class uses data to mimic the Pandas API.
229
+ """
230
+ super().__init__()
231
+ self._pk_col = None
232
+ self._fk_cols = set()
233
+ if allow_mixed_types:
234
+ dtype = _dtypes.AnyType
235
+
236
+ # This is kept for legacy reasons (tss: personally, I think we should remove this)
237
+ if columns is None:
238
+ columns = ["Input", "Output", "Expected"]
239
+
240
+ # Explicit dataframe option
241
+ if dataframe is not None:
242
+ self._init_from_dataframe(dataframe, columns, optional, dtype)
243
+ else:
244
+ # Expected pattern
245
+ if data is not None:
246
+ if util.is_numpy_array(data):
247
+ self._init_from_ndarray(data, columns, optional, dtype)
248
+ elif util.is_pandas_data_frame(data):
249
+ self._init_from_dataframe(data, columns, optional, dtype)
250
+ else:
251
+ self._init_from_list(data, columns, optional, dtype)
252
+
253
+ # legacy
254
+ elif rows is not None:
255
+ self._init_from_list(rows, columns, optional, dtype)
256
+
257
+ # Default empty case
258
+ else:
259
+ self._init_from_list([], columns, optional, dtype)
260
+
261
+ @staticmethod
262
+ def _assert_valid_columns(columns):
263
+ valid_col_types = [str, int]
264
+ assert isinstance(columns, list), "columns argument expects a `list` object"
265
+ assert len(columns) == 0 or all(
266
+ [type(col) in valid_col_types for col in columns]
267
+ ), "columns argument expects list of strings or ints"
268
+
269
+ def _init_from_list(self, data, columns, optional=True, dtype=None):
270
+ assert isinstance(data, list), "data argument expects a `list` object"
271
+ self.data = []
272
+ self._assert_valid_columns(columns)
273
+ self.columns = columns
274
+ self._make_column_types(dtype, optional)
275
+ for row in data:
276
+ self.add_data(*row)
277
+
278
+ def _init_from_ndarray(self, ndarray, columns, optional=True, dtype=None):
279
+ assert util.is_numpy_array(
280
+ ndarray
281
+ ), "ndarray argument expects a `numpy.ndarray` object"
282
+ self.data = []
283
+ self._assert_valid_columns(columns)
284
+ self.columns = columns
285
+ self._make_column_types(dtype, optional)
286
+ for row in ndarray:
287
+ self.add_data(*row)
288
+
289
+ def _init_from_dataframe(self, dataframe, columns, optional=True, dtype=None):
290
+ assert util.is_pandas_data_frame(
291
+ dataframe
292
+ ), "dataframe argument expects a `pandas.core.frame.DataFrame` object"
293
+ self.data = []
294
+ columns = list(dataframe.columns)
295
+ self._assert_valid_columns(columns)
296
+ self.columns = columns
297
+ self._make_column_types(dtype, optional)
298
+ for row in range(len(dataframe)):
299
+ self.add_data(*tuple(dataframe[col].values[row] for col in self.columns))
300
+
301
+ def _make_column_types(self, dtype=None, optional=True):
302
+ if dtype is None:
303
+ dtype = _dtypes.UnknownType()
304
+
305
+ if optional.__class__ is not list:
306
+ optional = [optional for _ in range(len(self.columns))]
307
+
308
+ if dtype.__class__ is not list:
309
+ dtype = [dtype for _ in range(len(self.columns))]
310
+
311
+ self._column_types = _dtypes.TypedDictType({})
312
+ for col_name, opt, dt in zip(self.columns, optional, dtype):
313
+ self.cast(col_name, dt, opt)
314
+
315
+ def cast(self, col_name, dtype, optional=False):
316
+ """Casts a column to a specific data type.
317
+
318
+ This can be one of the normal python classes, an internal W&B type, or an
319
+ example object, like an instance of wandb.Image or wandb.Classes.
320
+
321
+ Arguments:
322
+ col_name: (str) - The name of the column to cast.
323
+ dtype: (class, wandb.wandb_sdk.interface._dtypes.Type, any) - The target dtype.
324
+ optional: (bool) - If the column should allow Nones.
325
+ """
326
+ assert col_name in self.columns
327
+
328
+ wbtype = _dtypes.TypeRegistry.type_from_dtype(dtype)
329
+
330
+ if optional:
331
+ wbtype = _dtypes.OptionalType(wbtype)
332
+
333
+ # Cast each value in the row, raising an error if there are invalid entries.
334
+ col_ndx = self.columns.index(col_name)
335
+ for row in self.data:
336
+ result_type = wbtype.assign(row[col_ndx])
337
+ if isinstance(result_type, _dtypes.InvalidType):
338
+ raise TypeError(
339
+ "Existing data {}, of type {} cannot be cast to {}".format(
340
+ row[col_ndx],
341
+ _dtypes.TypeRegistry.type_of(row[col_ndx]),
342
+ wbtype,
343
+ )
344
+ )
345
+ wbtype = result_type
346
+
347
+ # Assert valid options
348
+ is_pk = isinstance(wbtype, _PrimaryKeyType)
349
+ is_fk = isinstance(wbtype, _ForeignKeyType)
350
+ is_fi = isinstance(wbtype, _ForeignIndexType)
351
+ if is_pk or is_fk or is_fi:
352
+ assert (
353
+ not optional
354
+ ), "Primary keys, foreign keys, and foreign indexes cannot be optional."
355
+
356
+ if (is_fk or is_fk) and id(wbtype.params["table"]) == id(self):
357
+ raise AssertionError("Cannot set a foreign table reference to same table.")
358
+
359
+ if is_pk:
360
+ assert (
361
+ self._pk_col is None
362
+ ), "Cannot have multiple primary keys - {} is already set as the primary key.".format(
363
+ self._pk_col
364
+ )
365
+
366
+ # Update the column type
367
+ self._column_types.params["type_map"][col_name] = wbtype
368
+
369
+ # Wrap the data if needed
370
+ self._update_keys()
371
+ return wbtype
372
+
373
+ def __ne__(self, other):
374
+ return not self.__eq__(other)
375
+
376
+ def _eq_debug(self, other, should_assert=False):
377
+ eq = isinstance(other, Table)
378
+ assert not should_assert or eq, "Found type {}, expected {}".format(
379
+ other.__class__, Table
380
+ )
381
+ eq = eq and len(self.data) == len(other.data)
382
+ assert not should_assert or eq, "Found {} rows, expected {}".format(
383
+ len(other.data), len(self.data)
384
+ )
385
+ eq = eq and self.columns == other.columns
386
+ assert not should_assert or eq, "Found columns {}, expected {}".format(
387
+ other.columns, self.columns
388
+ )
389
+ eq = eq and self._column_types == other._column_types
390
+ assert (
391
+ not should_assert or eq
392
+ ), "Found column type {}, expected column type {}".format(
393
+ other._column_types, self._column_types
394
+ )
395
+ if eq:
396
+ for row_ndx in range(len(self.data)):
397
+ for col_ndx in range(len(self.data[row_ndx])):
398
+ _eq = self.data[row_ndx][col_ndx] == other.data[row_ndx][col_ndx]
399
+ # equal if all are equal
400
+ if util.is_numpy_array(_eq):
401
+ _eq = ((_eq * -1) + 1).sum() == 0
402
+ eq = eq and _eq
403
+ assert (
404
+ not should_assert or eq
405
+ ), "Unequal data at row_ndx {} col_ndx {}: found {}, expected {}".format(
406
+ row_ndx,
407
+ col_ndx,
408
+ other.data[row_ndx][col_ndx],
409
+ self.data[row_ndx][col_ndx],
410
+ )
411
+ if not eq:
412
+ return eq
413
+ return eq
414
+
415
+ def __eq__(self, other):
416
+ return self._eq_debug(other)
417
+
418
+ def add_row(self, *row):
419
+ """Deprecated; use add_data instead."""
420
+ logging.warning("add_row is deprecated, use add_data")
421
+ self.add_data(*row)
422
+
423
+ def add_data(self, *data):
424
+ """Adds a new row of data to the table. The maximum amount of rows in a table is determined by `wandb.Table.MAX_ARTIFACT_ROWS`.
425
+
426
+ The length of the data should match the length of the table column.
427
+ """
428
+ if len(data) != len(self.columns):
429
+ raise ValueError(
430
+ "This table expects {} columns: {}, found {}".format(
431
+ len(self.columns), self.columns, len(data)
432
+ )
433
+ )
434
+
435
+ # Special case to pre-emptively cast a column as a key.
436
+ # Needed as String.assign(Key) is invalid
437
+ for ndx, item in enumerate(data):
438
+ if isinstance(item, _TableLinkMixin):
439
+ self.cast(
440
+ self.columns[ndx],
441
+ _dtypes.TypeRegistry.type_of(item),
442
+ optional=False,
443
+ )
444
+
445
+ # Update the table's column types
446
+ result_type = self._get_updated_result_type(data)
447
+ self._column_types = result_type
448
+
449
+ # rows need to be mutable
450
+ if isinstance(data, tuple):
451
+ data = list(data)
452
+ # Add the new data
453
+ self.data.append(data)
454
+
455
+ # Update the wrapper values if needed
456
+ self._update_keys(force_last=True)
457
+
458
+ def _get_updated_result_type(self, row):
459
+ """Returns the updated result type based on the inputted row.
460
+
461
+ Raises:
462
+ TypeError: if the assignment is invalid.
463
+ """
464
+ incoming_row_dict = {
465
+ col_key: row[ndx] for ndx, col_key in enumerate(self.columns)
466
+ }
467
+ current_type = self._column_types
468
+ result_type = current_type.assign(incoming_row_dict)
469
+ if isinstance(result_type, _dtypes.InvalidType):
470
+ raise TypeError(
471
+ "Data row contained incompatible types:\n{}".format(
472
+ current_type.explain(incoming_row_dict)
473
+ )
474
+ )
475
+ return result_type
476
+
477
+ def _to_table_json(self, max_rows=None, warn=True):
478
+ # separate this method for easier testing
479
+ if max_rows is None:
480
+ max_rows = Table.MAX_ROWS
481
+ n_rows = len(self.data)
482
+ if n_rows > max_rows and warn:
483
+ if wandb.run and (
484
+ wandb.run.settings.table_raise_on_max_row_limit_exceeded
485
+ or wandb.run.settings.strict
486
+ ):
487
+ raise ValueError(
488
+ f"Table row limit exceeded: table has {n_rows} rows, limit is {max_rows}. "
489
+ f"To increase the maximum number of allowed rows in a wandb.Table, override "
490
+ f"the limit with `wandb.Table.MAX_ARTIFACT_ROWS = X` and try again. Note: "
491
+ f"this may cause slower queries in the W&B UI."
492
+ )
493
+ logging.warning("Truncating wandb.Table object to %i rows." % max_rows)
494
+ return {"columns": self.columns, "data": self.data[:max_rows]}
495
+
496
+ def bind_to_run(self, *args, **kwargs):
497
+ # We set `warn=False` since Tables will now always be logged to both
498
+ # files and artifacts. The file limit will never practically matter and
499
+ # this code path will be ultimately removed. The 10k limit warning confuses
500
+ # users given that we publicly say 200k is the limit.
501
+ data = self._to_table_json(warn=False)
502
+ tmp_path = os.path.join(MEDIA_TMP.name, runid.generate_id() + ".table.json")
503
+ data = _numpy_arrays_to_lists(data)
504
+ with codecs.open(tmp_path, "w", encoding="utf-8") as fp:
505
+ util.json_dump_safer(data, fp)
506
+ self._set_file(tmp_path, is_tmp=True, extension=".table.json")
507
+ super().bind_to_run(*args, **kwargs)
508
+
509
+ @classmethod
510
+ def get_media_subdir(cls):
511
+ return os.path.join("media", "table")
512
+
513
+ @classmethod
514
+ def from_json(cls, json_obj, source_artifact):
515
+ data = []
516
+ column_types = None
517
+ np_deserialized_columns = {}
518
+ timestamp_column_indices = set()
519
+ if json_obj.get("column_types") is not None:
520
+ column_types = _dtypes.TypeRegistry.type_from_dict(
521
+ json_obj["column_types"], source_artifact
522
+ )
523
+ for col_name in column_types.params["type_map"]:
524
+ col_type = column_types.params["type_map"][col_name]
525
+ ndarray_type = None
526
+ if isinstance(col_type, _dtypes.NDArrayType):
527
+ ndarray_type = col_type
528
+ elif isinstance(col_type, _dtypes.UnionType):
529
+ for t in col_type.params["allowed_types"]:
530
+ if isinstance(t, _dtypes.NDArrayType):
531
+ ndarray_type = t
532
+ elif isinstance(t, _dtypes.TimestampType):
533
+ timestamp_column_indices.add(
534
+ json_obj["columns"].index(col_name)
535
+ )
536
+
537
+ elif isinstance(col_type, _dtypes.TimestampType):
538
+ timestamp_column_indices.add(json_obj["columns"].index(col_name))
539
+
540
+ if (
541
+ ndarray_type is not None
542
+ and ndarray_type._get_serialization_path() is not None
543
+ ):
544
+ serialization_path = ndarray_type._get_serialization_path()
545
+ np = util.get_module(
546
+ "numpy",
547
+ required="Deserializing NumPy columns requires NumPy to be installed.",
548
+ )
549
+ deserialized = np.load(
550
+ source_artifact.get_entry(serialization_path["path"]).download()
551
+ )
552
+ np_deserialized_columns[json_obj["columns"].index(col_name)] = (
553
+ deserialized[serialization_path["key"]]
554
+ )
555
+ ndarray_type._clear_serialization_path()
556
+
557
+ for r_ndx, row in enumerate(json_obj["data"]):
558
+ row_data = []
559
+ for c_ndx, item in enumerate(row):
560
+ cell = item
561
+ if c_ndx in timestamp_column_indices and isinstance(item, (int, float)):
562
+ cell = datetime.datetime.fromtimestamp(
563
+ item / 1000, tz=datetime.timezone.utc
564
+ )
565
+ elif c_ndx in np_deserialized_columns:
566
+ cell = np_deserialized_columns[c_ndx][r_ndx]
567
+ elif isinstance(item, dict) and "_type" in item:
568
+ obj = WBValue.init_from_json(item, source_artifact)
569
+ if obj is not None:
570
+ cell = obj
571
+ row_data.append(cell)
572
+ data.append(row_data)
573
+
574
+ # construct Table with dtypes for each column if type information exists
575
+ dtypes = None
576
+ if column_types is not None:
577
+ dtypes = [
578
+ column_types.params["type_map"][str(col)] for col in json_obj["columns"]
579
+ ]
580
+
581
+ new_obj = cls(columns=json_obj["columns"], data=data, dtype=dtypes)
582
+
583
+ if column_types is not None:
584
+ new_obj._column_types = column_types
585
+
586
+ new_obj._update_keys()
587
+ return new_obj
588
+
589
+ def to_json(self, run_or_artifact):
590
+ json_dict = super().to_json(run_or_artifact)
591
+
592
+ if isinstance(run_or_artifact, wandb.wandb_sdk.wandb_run.Run):
593
+ json_dict.update(
594
+ {
595
+ "_type": "table-file",
596
+ "ncols": len(self.columns),
597
+ "nrows": len(self.data),
598
+ }
599
+ )
600
+
601
+ elif isinstance(run_or_artifact, wandb.Artifact):
602
+ artifact = run_or_artifact
603
+ mapped_data = []
604
+ data = self._to_table_json(Table.MAX_ARTIFACT_ROWS)["data"]
605
+
606
+ ndarray_col_ndxs = set()
607
+ for col_ndx, col_name in enumerate(self.columns):
608
+ col_type = self._column_types.params["type_map"][col_name]
609
+ ndarray_type = None
610
+ if isinstance(col_type, _dtypes.NDArrayType):
611
+ ndarray_type = col_type
612
+ elif isinstance(col_type, _dtypes.UnionType):
613
+ for t in col_type.params["allowed_types"]:
614
+ if isinstance(t, _dtypes.NDArrayType):
615
+ ndarray_type = t
616
+
617
+ # Do not serialize 1d arrays - these are likely embeddings and
618
+ # will not have the same cost as higher dimensional arrays
619
+ is_1d_array = (
620
+ ndarray_type is not None
621
+ and "shape" in ndarray_type._params
622
+ and isinstance(ndarray_type._params["shape"], list)
623
+ and len(ndarray_type._params["shape"]) == 1
624
+ and ndarray_type._params["shape"][0]
625
+ <= self._MAX_EMBEDDING_DIMENSIONS
626
+ )
627
+ if is_1d_array:
628
+ self._column_types.params["type_map"][col_name] = _dtypes.ListType(
629
+ _dtypes.NumberType, ndarray_type._params["shape"][0]
630
+ )
631
+ elif ndarray_type is not None:
632
+ np = util.get_module(
633
+ "numpy",
634
+ required="Serializing NumPy requires NumPy to be installed.",
635
+ )
636
+ file_name = f"{str(col_name)}_{runid.generate_id()}.npz"
637
+ npz_file_name = os.path.join(MEDIA_TMP.name, file_name)
638
+ np.savez_compressed(
639
+ npz_file_name,
640
+ **{
641
+ str(col_name): self.get_column(col_name, convert_to="numpy")
642
+ },
643
+ )
644
+ entry = artifact.add_file(
645
+ npz_file_name, "media/serialized_data/" + file_name, is_tmp=True
646
+ )
647
+ ndarray_type._set_serialization_path(entry.path, str(col_name))
648
+ ndarray_col_ndxs.add(col_ndx)
649
+
650
+ for row in data:
651
+ mapped_row = []
652
+ for ndx, v in enumerate(row):
653
+ if ndx in ndarray_col_ndxs:
654
+ mapped_row.append(None)
655
+ else:
656
+ mapped_row.append(_json_helper(v, artifact))
657
+ mapped_data.append(mapped_row)
658
+
659
+ json_dict.update(
660
+ {
661
+ "_type": Table._log_type,
662
+ "columns": self.columns,
663
+ "data": mapped_data,
664
+ "ncols": len(self.columns),
665
+ "nrows": len(mapped_data),
666
+ "column_types": self._column_types.to_json(artifact),
667
+ }
668
+ )
669
+ else:
670
+ raise ValueError("to_json accepts wandb_run.Run or wandb_artifact.Artifact")
671
+
672
+ return json_dict
673
+
674
+ def iterrows(self):
675
+ """Returns the table data by row, showing the index of the row and the relevant data.
676
+
677
+ Yields:
678
+ ------
679
+ index : int
680
+ The index of the row. Using this value in other W&B tables
681
+ will automatically build a relationship between the tables
682
+ row : List[any]
683
+ The data of the row.
684
+ """
685
+ for ndx in range(len(self.data)):
686
+ index = _TableIndex(ndx)
687
+ index.set_table(self)
688
+ yield index, self.data[ndx]
689
+
690
+ def set_pk(self, col_name):
691
+ # TODO: Docs
692
+ assert col_name in self.columns
693
+ self.cast(col_name, _PrimaryKeyType())
694
+
695
+ def set_fk(self, col_name, table, table_col):
696
+ # TODO: Docs
697
+ assert col_name in self.columns
698
+ assert col_name != self._pk_col
699
+ self.cast(col_name, _ForeignKeyType(table, table_col))
700
+
701
+ def _update_keys(self, force_last=False):
702
+ """Updates the known key-like columns based on current column types.
703
+
704
+ If the state has been updated since the last update, wraps the data
705
+ appropriately in the Key classes.
706
+
707
+ Arguments:
708
+ force_last: (bool) Wraps the last column of data even if there
709
+ are no key updates.
710
+ """
711
+ _pk_col = None
712
+ _fk_cols = set()
713
+
714
+ # Buildup the known keys from column types
715
+ c_types = self._column_types.params["type_map"]
716
+ for t in c_types:
717
+ if isinstance(c_types[t], _PrimaryKeyType):
718
+ _pk_col = t
719
+ elif isinstance(c_types[t], _ForeignKeyType) or isinstance(
720
+ c_types[t], _ForeignIndexType
721
+ ):
722
+ _fk_cols.add(t)
723
+
724
+ # If there are updates to perform, safely update them
725
+ has_update = _pk_col != self._pk_col or _fk_cols != self._fk_cols
726
+ if has_update:
727
+ # If we removed the PK
728
+ if _pk_col is None and self._pk_col is not None:
729
+ raise AssertionError(
730
+ f"Cannot unset primary key (column {self._pk_col})"
731
+ )
732
+ # If there is a removed FK
733
+ if len(self._fk_cols - _fk_cols) > 0:
734
+ raise AssertionError(
735
+ "Cannot unset foreign key. Attempted to unset ({})".format(
736
+ self._fk_cols - _fk_cols
737
+ )
738
+ )
739
+
740
+ self._pk_col = _pk_col
741
+ self._fk_cols = _fk_cols
742
+
743
+ # Apply updates to data only if there are update or the caller
744
+ # requested the final row to be updated
745
+ if has_update or force_last:
746
+ self._apply_key_updates(not has_update)
747
+
748
+ def _apply_key_updates(self, only_last=False):
749
+ """Appropriately wraps the underlying data in special Key classes.
750
+
751
+ Arguments:
752
+ only_last: only apply the updates to the last row (used for performance when
753
+ the caller knows that the only new data is the last row and no updates were
754
+ applied to the column types)
755
+ """
756
+ c_types = self._column_types.params["type_map"]
757
+
758
+ # Define a helper function which will wrap the data of a single row
759
+ # in the appropriate class wrapper.
760
+ def update_row(row_ndx):
761
+ for fk_col in self._fk_cols:
762
+ col_ndx = self.columns.index(fk_col)
763
+
764
+ # Wrap the Foreign Keys
765
+ if isinstance(c_types[fk_col], _ForeignKeyType) and not isinstance(
766
+ self.data[row_ndx][col_ndx], _TableKey
767
+ ):
768
+ self.data[row_ndx][col_ndx] = _TableKey(self.data[row_ndx][col_ndx])
769
+ self.data[row_ndx][col_ndx].set_table(
770
+ c_types[fk_col].params["table"],
771
+ c_types[fk_col].params["col_name"],
772
+ )
773
+
774
+ # Wrap the Foreign Indexes
775
+ elif isinstance(c_types[fk_col], _ForeignIndexType) and not isinstance(
776
+ self.data[row_ndx][col_ndx], _TableIndex
777
+ ):
778
+ self.data[row_ndx][col_ndx] = _TableIndex(
779
+ self.data[row_ndx][col_ndx]
780
+ )
781
+ self.data[row_ndx][col_ndx].set_table(
782
+ c_types[fk_col].params["table"]
783
+ )
784
+
785
+ # Wrap the Primary Key
786
+ if self._pk_col is not None:
787
+ col_ndx = self.columns.index(self._pk_col)
788
+ self.data[row_ndx][col_ndx] = _TableKey(self.data[row_ndx][col_ndx])
789
+ self.data[row_ndx][col_ndx].set_table(self, self._pk_col)
790
+
791
+ if only_last:
792
+ update_row(len(self.data) - 1)
793
+ else:
794
+ for row_ndx in range(len(self.data)):
795
+ update_row(row_ndx)
796
+
797
+ def add_column(self, name, data, optional=False):
798
+ """Adds a column of data to the table.
799
+
800
+ Arguments:
801
+ name: (str) - the unique name of the column
802
+ data: (list | np.array) - a column of homogeneous data
803
+ optional: (bool) - if null-like values are permitted
804
+ """
805
+ assert isinstance(name, str) and name not in self.columns
806
+ is_np = util.is_numpy_array(data)
807
+ assert isinstance(data, list) or is_np
808
+ assert isinstance(optional, bool)
809
+ is_first_col = len(self.columns) == 0
810
+ assert is_first_col or len(data) == len(
811
+ self.data
812
+ ), f"Expected length {len(self.data)}, found {len(data)}"
813
+
814
+ # Add the new data
815
+ for ndx in range(max(len(data), len(self.data))):
816
+ if is_first_col:
817
+ self.data.append([])
818
+ if is_np:
819
+ self.data[ndx].append(data[ndx])
820
+ else:
821
+ self.data[ndx].append(data[ndx])
822
+ # add the column
823
+ self.columns.append(name)
824
+
825
+ try:
826
+ self.cast(name, _dtypes.UnknownType(), optional=optional)
827
+ except TypeError as err:
828
+ # Undo the changes
829
+ if is_first_col:
830
+ self.data = []
831
+ self.columns = []
832
+ else:
833
+ for ndx in range(len(self.data)):
834
+ self.data[ndx] = self.data[ndx][:-1]
835
+ self.columns = self.columns[:-1]
836
+ raise err
837
+
838
+ def get_column(self, name, convert_to=None):
839
+ """Retrieves a column from the table and optionally converts it to a NumPy object.
840
+
841
+ Arguments:
842
+ name: (str) - the name of the column
843
+ convert_to: (str, optional)
844
+ - "numpy": will convert the underlying data to numpy object
845
+ """
846
+ assert name in self.columns
847
+ assert convert_to is None or convert_to == "numpy"
848
+ if convert_to == "numpy":
849
+ np = util.get_module(
850
+ "numpy", required="Converting to NumPy requires installing NumPy"
851
+ )
852
+ col = []
853
+ col_ndx = self.columns.index(name)
854
+ for row in self.data:
855
+ item = row[col_ndx]
856
+ if convert_to is not None and isinstance(item, WBValue):
857
+ item = item.to_data_array()
858
+ col.append(item)
859
+ if convert_to == "numpy":
860
+ col = np.array(col)
861
+ return col
862
+
863
+ def get_index(self):
864
+ """Returns an array of row indexes for use in other tables to create links."""
865
+ ndxs = []
866
+ for ndx in range(len(self.data)):
867
+ index = _TableIndex(ndx)
868
+ index.set_table(self)
869
+ ndxs.append(index)
870
+ return ndxs
871
+
872
+ def get_dataframe(self):
873
+ """Returns a `pandas.DataFrame` of the table."""
874
+ pd = util.get_module(
875
+ "pandas",
876
+ required="Converting to pandas.DataFrame requires installing pandas",
877
+ )
878
+ return pd.DataFrame.from_records(self.data, columns=self.columns)
879
+
880
+ def index_ref(self, index):
881
+ """Gets a reference of the index of a row in the table."""
882
+ assert index < len(self.data)
883
+ _index = _TableIndex(index)
884
+ _index.set_table(self)
885
+ return _index
886
+
887
+ def add_computed_columns(self, fn):
888
+ """Adds one or more computed columns based on existing data.
889
+
890
+ Args:
891
+ fn: A function which accepts one or two parameters, ndx (int) and row (dict),
892
+ which is expected to return a dict representing new columns for that row, keyed
893
+ by the new column names.
894
+
895
+ `ndx` is an integer representing the index of the row. Only included if `include_ndx`
896
+ is set to `True`.
897
+
898
+ `row` is a dictionary keyed by existing columns
899
+ """
900
+ new_columns = {}
901
+ for ndx, row in self.iterrows():
902
+ row_dict = {self.columns[i]: row[i] for i in range(len(self.columns))}
903
+ new_row_dict = fn(ndx, row_dict)
904
+ assert isinstance(new_row_dict, dict)
905
+ for key in new_row_dict:
906
+ new_columns[key] = new_columns.get(key, [])
907
+ new_columns[key].append(new_row_dict[key])
908
+ for new_col_name in new_columns:
909
+ self.add_column(new_col_name, new_columns[new_col_name])
910
+
911
+
912
+ class _PartitionTablePartEntry:
913
+ """Helper class for PartitionTable to track its parts."""
914
+
915
+ def __init__(self, entry, source_artifact):
916
+ self.entry = entry
917
+ self.source_artifact = source_artifact
918
+ self._part = None
919
+
920
+ def get_part(self):
921
+ if self._part is None:
922
+ self._part = self.source_artifact.get(self.entry.path)
923
+ return self._part
924
+
925
+ def free(self):
926
+ self._part = None
927
+
928
+
929
+ class PartitionedTable(Media):
930
+ """A table which is composed of multiple sub-tables.
931
+
932
+ Currently, PartitionedTable is designed to point to a directory within an artifact.
933
+ """
934
+
935
+ _log_type = "partitioned-table"
936
+
937
+ def __init__(self, parts_path):
938
+ """Initialize a PartitionedTable.
939
+
940
+ Args:
941
+ parts_path (str): path to a directory of tables in the artifact.
942
+ """
943
+ super().__init__()
944
+ self.parts_path = parts_path
945
+ self._loaded_part_entries = {}
946
+
947
+ def to_json(self, artifact_or_run):
948
+ json_obj = {
949
+ "_type": PartitionedTable._log_type,
950
+ }
951
+ if isinstance(artifact_or_run, wandb.wandb_sdk.wandb_run.Run):
952
+ artifact_entry_url = self._get_artifact_entry_ref_url()
953
+ if artifact_entry_url is None:
954
+ raise ValueError(
955
+ "PartitionedTables must first be added to an Artifact before logging to a Run"
956
+ )
957
+ json_obj["artifact_path"] = artifact_entry_url
958
+ else:
959
+ json_obj["parts_path"] = self.parts_path
960
+ return json_obj
961
+
962
+ @classmethod
963
+ def from_json(cls, json_obj, source_artifact):
964
+ instance = cls(json_obj["parts_path"])
965
+ entries = source_artifact.manifest.get_entries_in_directory(
966
+ json_obj["parts_path"]
967
+ )
968
+ for entry in entries:
969
+ instance._add_part_entry(entry, source_artifact)
970
+ return instance
971
+
972
+ def iterrows(self):
973
+ """Iterate over rows as (ndx, row).
974
+
975
+ Yields:
976
+ ------
977
+ index : int
978
+ The index of the row.
979
+ row : List[any]
980
+ The data of the row.
981
+ """
982
+ columns = None
983
+ ndx = 0
984
+ for entry_path in self._loaded_part_entries:
985
+ part = self._loaded_part_entries[entry_path].get_part()
986
+ if columns is None:
987
+ columns = part.columns
988
+ elif columns != part.columns:
989
+ raise ValueError(
990
+ "Table parts have non-matching columns. {} != {}".format(
991
+ columns, part.columns
992
+ )
993
+ )
994
+ for _, row in part.iterrows():
995
+ yield ndx, row
996
+ ndx += 1
997
+
998
+ self._loaded_part_entries[entry_path].free()
999
+
1000
+ def _add_part_entry(self, entry, source_artifact):
1001
+ self._loaded_part_entries[entry.path] = _PartitionTablePartEntry(
1002
+ entry, source_artifact
1003
+ )
1004
+
1005
+ def __ne__(self, other):
1006
+ return not self.__eq__(other)
1007
+
1008
+ def __eq__(self, other):
1009
+ return isinstance(other, self.__class__) and self.parts_path == other.parts_path
1010
+
1011
+ def bind_to_run(self, *args, **kwargs):
1012
+ raise ValueError("PartitionedTables cannot be bound to runs")
1013
+
1014
+
1015
+ class JoinedTable(Media):
1016
+ """Join two tables for visualization in the Artifact UI.
1017
+
1018
+ Arguments:
1019
+ table1 (str, wandb.Table, ArtifactManifestEntry):
1020
+ the path to a wandb.Table in an artifact, the table object, or ArtifactManifestEntry
1021
+ table2 (str, wandb.Table):
1022
+ the path to a wandb.Table in an artifact, the table object, or ArtifactManifestEntry
1023
+ join_key (str, [str, str]):
1024
+ key or keys to perform the join
1025
+ """
1026
+
1027
+ _log_type = "joined-table"
1028
+
1029
+ def __init__(self, table1, table2, join_key):
1030
+ super().__init__()
1031
+
1032
+ if not isinstance(join_key, str) and (
1033
+ not isinstance(join_key, list) or len(join_key) != 2
1034
+ ):
1035
+ raise ValueError(
1036
+ "JoinedTable join_key should be a string or a list of two strings"
1037
+ )
1038
+
1039
+ if not self._validate_table_input(table1):
1040
+ raise ValueError(
1041
+ "JoinedTable table1 should be an artifact path to a table or wandb.Table object"
1042
+ )
1043
+
1044
+ if not self._validate_table_input(table2):
1045
+ raise ValueError(
1046
+ "JoinedTable table2 should be an artifact path to a table or wandb.Table object"
1047
+ )
1048
+
1049
+ self._table1 = table1
1050
+ self._table2 = table2
1051
+ self._join_key = join_key
1052
+
1053
+ @classmethod
1054
+ def from_json(cls, json_obj, source_artifact):
1055
+ t1 = source_artifact.get(json_obj["table1"])
1056
+ if t1 is None:
1057
+ t1 = json_obj["table1"]
1058
+
1059
+ t2 = source_artifact.get(json_obj["table2"])
1060
+ if t2 is None:
1061
+ t2 = json_obj["table2"]
1062
+
1063
+ return cls(
1064
+ t1,
1065
+ t2,
1066
+ json_obj["join_key"],
1067
+ )
1068
+
1069
+ @staticmethod
1070
+ def _validate_table_input(table):
1071
+ """Helper method to validate that the table input is one of the 3 supported types."""
1072
+ return (
1073
+ (isinstance(table, str) and table.endswith(".table.json"))
1074
+ or isinstance(table, Table)
1075
+ or isinstance(table, PartitionedTable)
1076
+ or (hasattr(table, "ref_url") and table.ref_url().endswith(".table.json"))
1077
+ )
1078
+
1079
+ def _ensure_table_in_artifact(self, table, artifact, table_ndx):
1080
+ """Helper method to add the table to the incoming artifact. Returns the path."""
1081
+ if isinstance(table, Table) or isinstance(table, PartitionedTable):
1082
+ table_name = f"t{table_ndx}_{str(id(self))}"
1083
+ if (
1084
+ table._artifact_source is not None
1085
+ and table._artifact_source.name is not None
1086
+ ):
1087
+ table_name = os.path.basename(table._artifact_source.name)
1088
+ entry = artifact.add(table, table_name)
1089
+ table = entry.path
1090
+ # Check if this is an ArtifactManifestEntry
1091
+ elif hasattr(table, "ref_url"):
1092
+ # Give the new object a unique, yet deterministic name
1093
+ name = binascii.hexlify(base64.standard_b64decode(table.digest)).decode(
1094
+ "ascii"
1095
+ )[:20]
1096
+ entry = artifact.add_reference(
1097
+ table.ref_url(), "{}.{}.json".format(name, table.name.split(".")[-2])
1098
+ )[0]
1099
+ table = entry.path
1100
+
1101
+ err_str = "JoinedTable table:{} not found in artifact. Add a table to the artifact using Artifact#add(<table>, {}) before adding this JoinedTable"
1102
+ if table not in artifact._manifest.entries:
1103
+ raise ValueError(err_str.format(table, table))
1104
+
1105
+ return table
1106
+
1107
+ def to_json(self, artifact_or_run):
1108
+ json_obj = {
1109
+ "_type": JoinedTable._log_type,
1110
+ }
1111
+ if isinstance(artifact_or_run, wandb.wandb_sdk.wandb_run.Run):
1112
+ artifact_entry_url = self._get_artifact_entry_ref_url()
1113
+ if artifact_entry_url is None:
1114
+ raise ValueError(
1115
+ "JoinedTables must first be added to an Artifact before logging to a Run"
1116
+ )
1117
+ json_obj["artifact_path"] = artifact_entry_url
1118
+ else:
1119
+ table1 = self._ensure_table_in_artifact(self._table1, artifact_or_run, 1)
1120
+ table2 = self._ensure_table_in_artifact(self._table2, artifact_or_run, 2)
1121
+ json_obj.update(
1122
+ {
1123
+ "table1": table1,
1124
+ "table2": table2,
1125
+ "join_key": self._join_key,
1126
+ }
1127
+ )
1128
+ return json_obj
1129
+
1130
+ def __ne__(self, other):
1131
+ return not self.__eq__(other)
1132
+
1133
+ def _eq_debug(self, other, should_assert=False):
1134
+ eq = isinstance(other, JoinedTable)
1135
+ assert not should_assert or eq, "Found type {}, expected {}".format(
1136
+ other.__class__, JoinedTable
1137
+ )
1138
+ eq = eq and self._join_key == other._join_key
1139
+ assert not should_assert or eq, "Found {} join key, expected {}".format(
1140
+ other._join_key, self._join_key
1141
+ )
1142
+ eq = eq and self._table1._eq_debug(other._table1, should_assert)
1143
+ eq = eq and self._table2._eq_debug(other._table2, should_assert)
1144
+ return eq
1145
+
1146
+ def __eq__(self, other):
1147
+ return self._eq_debug(other, False)
1148
+
1149
+ def bind_to_run(self, *args, **kwargs):
1150
+ raise ValueError("JoinedTables cannot be bound to runs")
1151
+
1152
+
1153
+ class _TableType(_dtypes.Type):
1154
+ name = "table"
1155
+ legacy_names = ["wandb.Table"]
1156
+ types = [Table]
1157
+
1158
+ def __init__(self, column_types=None):
1159
+ if column_types is None:
1160
+ column_types = _dtypes.UnknownType()
1161
+ if isinstance(column_types, dict):
1162
+ column_types = _dtypes.TypedDictType(column_types)
1163
+ elif not (
1164
+ isinstance(column_types, _dtypes.TypedDictType)
1165
+ or isinstance(column_types, _dtypes.UnknownType)
1166
+ ):
1167
+ raise TypeError("column_types must be a dict or TypedDictType")
1168
+
1169
+ self.params.update({"column_types": column_types})
1170
+
1171
+ def assign_type(self, wb_type=None):
1172
+ if isinstance(wb_type, _TableType):
1173
+ column_types = self.params["column_types"].assign_type(
1174
+ wb_type.params["column_types"]
1175
+ )
1176
+ if not isinstance(column_types, _dtypes.InvalidType):
1177
+ return _TableType(column_types)
1178
+
1179
+ return _dtypes.InvalidType()
1180
+
1181
+ @classmethod
1182
+ def from_obj(cls, py_obj):
1183
+ if not isinstance(py_obj, Table):
1184
+ raise TypeError("py_obj must be a wandb.Table")
1185
+ else:
1186
+ return cls(py_obj._column_types)
1187
+
1188
+
1189
+ class _JoinedTableType(_dtypes.Type):
1190
+ name = "joined-table"
1191
+ types = [JoinedTable]
1192
+
1193
+
1194
+ class _PartitionedTableType(_dtypes.Type):
1195
+ name = "partitioned-table"
1196
+ types = [PartitionedTable]
1197
+
1198
+
1199
+ _dtypes.TypeRegistry.add(_TableType)
1200
+ _dtypes.TypeRegistry.add(_JoinedTableType)
1201
+ _dtypes.TypeRegistry.add(_PartitionedTableType)
1202
+ _dtypes.TypeRegistry.add(_ForeignKeyType)
1203
+ _dtypes.TypeRegistry.add(_PrimaryKeyType)
1204
+ _dtypes.TypeRegistry.add(_ForeignIndexType)