streamlit-nightly 1.37.2.dev20240805__py2.py3-none-any.whl → 1.37.2.dev20240807__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamlit/dataframe_util.py +328 -72
- streamlit/delta_generator.py +2 -1
- streamlit/elements/arrow.py +5 -8
- streamlit/elements/json.py +13 -13
- streamlit/elements/lib/column_config_utils.py +1 -35
- streamlit/elements/widgets/data_editor.py +28 -9
- streamlit/elements/write.py +21 -28
- streamlit/runtime/caching/cache_utils.py +12 -9
- streamlit/static/asset-manifest.json +16 -15
- streamlit/static/index.html +1 -1
- streamlit/static/static/js/{1451.e9542cc9.chunk.js → 1451.913b0f90.chunk.js} +1 -1
- streamlit/static/static/js/{1792.8bd6ce2a.chunk.js → 1792.eb8a836f.chunk.js} +1 -1
- streamlit/static/static/js/{2469.31e2695e.chunk.js → 2469.c4454803.chunk.js} +1 -1
- streamlit/static/static/js/3466.0cd981ca.chunk.js +2 -0
- streamlit/static/static/js/{3466.bb8e2e0c.chunk.js.LICENSE.txt → 3466.0cd981ca.chunk.js.LICENSE.txt} +10 -0
- streamlit/static/static/js/{3513.7dedbda2.chunk.js → 3513.577f3dc5.chunk.js} +1 -1
- streamlit/static/static/js/{4113.99983645.chunk.js → 4113.8b8c523d.chunk.js} +1 -1
- streamlit/static/static/js/{4335.fb48fd8e.chunk.js → 4335.2b77e051.chunk.js} +1 -1
- streamlit/static/static/js/{4477.b85593dd.chunk.js → 4477.edb1d80a.chunk.js} +1 -1
- streamlit/static/static/js/{5106.e53c485c.chunk.js → 5106.656a5db4.chunk.js} +1 -1
- streamlit/static/static/js/5267.b73f42da.chunk.js +2 -0
- streamlit/static/static/js/5267.b73f42da.chunk.js.LICENSE.txt +1 -0
- streamlit/static/static/js/{6853.93dd1c4c.chunk.js → 6853.e7b24972.chunk.js} +1 -1
- streamlit/static/static/js/8148.539ddabe.chunk.js +1 -0
- streamlit/static/static/js/{8427.e051b59f.chunk.js → 8427.4594845a.chunk.js} +1 -1
- streamlit/static/static/js/{8477.4d2a23c2.chunk.js → 8477.90b06bd9.chunk.js} +1 -1
- streamlit/static/static/js/main.80efcd23.js +2 -0
- streamlit/static/static/js/{main.95daee38.js.LICENSE.txt → main.80efcd23.js.LICENSE.txt} +23 -1
- streamlit/type_util.py +46 -14
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/METADATA +2 -2
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/RECORD +35 -33
- streamlit/static/static/js/3466.bb8e2e0c.chunk.js +0 -2
- streamlit/static/static/js/8148.c7db8490.chunk.js +0 -1
- streamlit/static/static/js/main.95daee38.js +0 -2
- {streamlit_nightly-1.37.2.dev20240805.data → streamlit_nightly-1.37.2.dev20240807.data}/scripts/streamlit.cmd +0 -0
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/WHEEL +0 -0
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/entry_points.txt +0 -0
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/top_level.txt +0 -0
streamlit/dataframe_util.py
CHANGED
@@ -17,8 +17,14 @@
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
19
|
import contextlib
|
20
|
+
import dataclasses
|
21
|
+
import inspect
|
20
22
|
import math
|
23
|
+
import re
|
24
|
+
from collections import ChainMap, UserDict, deque
|
25
|
+
from collections.abc import ItemsView, KeysView, ValuesView
|
21
26
|
from enum import Enum, EnumMeta, auto
|
27
|
+
from types import MappingProxyType
|
22
28
|
from typing import (
|
23
29
|
TYPE_CHECKING,
|
24
30
|
Any,
|
@@ -34,9 +40,14 @@ from typing import (
|
|
34
40
|
|
35
41
|
from typing_extensions import TypeAlias, TypeGuard
|
36
42
|
|
37
|
-
import streamlit as st
|
38
43
|
from streamlit import config, errors, logger, string_util
|
39
|
-
from streamlit.type_util import
|
44
|
+
from streamlit.type_util import (
|
45
|
+
has_callable_attr,
|
46
|
+
is_custom_dict,
|
47
|
+
is_dataclass_instance,
|
48
|
+
is_namedtuple,
|
49
|
+
is_type,
|
50
|
+
)
|
40
51
|
|
41
52
|
if TYPE_CHECKING:
|
42
53
|
import numpy as np
|
@@ -51,6 +62,7 @@ _LOGGER: Final = logger.get_logger(__name__)
|
|
51
62
|
# Maximum number of rows to request from an unevaluated (out-of-core) dataframe
|
52
63
|
_MAX_UNEVALUATED_DF_ROWS = 10000
|
53
64
|
|
65
|
+
_PANDAS_DATA_OBJECT_TYPE_RE: Final = re.compile(r"^pandas.*$")
|
54
66
|
_PANDAS_STYLER_TYPE_STR: Final = "pandas.io.formats.style.Styler"
|
55
67
|
_SNOWPARK_DF_TYPE_STR: Final = "snowflake.snowpark.dataframe.DataFrame"
|
56
68
|
_SNOWPARK_DF_ROW_TYPE_STR: Final = "snowflake.snowpark.row.Row"
|
@@ -60,7 +72,12 @@ _MODIN_DF_TYPE_STR: Final = "modin.pandas.dataframe.DataFrame"
|
|
60
72
|
_MODIN_SERIES_TYPE_STR: Final = "modin.pandas.series.Series"
|
61
73
|
_SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.DataFrame"
|
62
74
|
_SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
|
63
|
-
|
75
|
+
_SNOWPANDAS_INDEX_TYPE_STR: Final = (
|
76
|
+
"snowflake.snowpark.modin.plugin.extensions.index.Index"
|
77
|
+
)
|
78
|
+
_POLARS_DATAFRAME: Final = "polars.dataframe.frame.DataFrame"
|
79
|
+
_POLARS_SERIES: Final = "polars.series.series.Series"
|
80
|
+
_POLARS_LAZYFRAME: Final = "polars.lazyframe.frame.LazyFrame"
|
64
81
|
|
65
82
|
V_co = TypeVar(
|
66
83
|
"V_co",
|
@@ -111,14 +128,19 @@ class DataFormat(Enum):
|
|
111
128
|
PANDAS_DATAFRAME = auto() # pd.DataFrame
|
112
129
|
PANDAS_SERIES = auto() # pd.Series
|
113
130
|
PANDAS_INDEX = auto() # pd.Index
|
131
|
+
PANDAS_ARRAY = auto() # pd.array
|
114
132
|
NUMPY_LIST = auto() # np.array[Scalar]
|
115
133
|
NUMPY_MATRIX = auto() # np.array[List[Scalar]]
|
116
134
|
PYARROW_TABLE = auto() # pyarrow.Table
|
135
|
+
PYARROW_ARRAY = auto() # pyarrow.Array
|
117
136
|
SNOWPARK_OBJECT = auto() # Snowpark DataFrame, Table, List[Row]
|
118
137
|
PYSPARK_OBJECT = auto() # pyspark.DataFrame
|
119
138
|
MODIN_OBJECT = auto() # Modin DataFrame, Series
|
120
139
|
SNOWPANDAS_OBJECT = auto() # Snowpandas DataFrame, Series
|
121
140
|
PANDAS_STYLER = auto() # pandas Styler
|
141
|
+
POLARS_DATAFRAME = auto() # polars.dataframe.frame.DataFrame
|
142
|
+
POLARS_LAZYFRAME = auto() # polars.lazyframe.frame.LazyFrame
|
143
|
+
POLARS_SERIES = auto() # polars.series.series.Series
|
122
144
|
LIST_OF_RECORDS = auto() # List[Dict[str, Scalar]]
|
123
145
|
LIST_OF_ROWS = auto() # List[List[Scalar]]
|
124
146
|
LIST_OF_VALUES = auto() # List[Scalar]
|
@@ -136,9 +158,9 @@ def is_dataframe_like(obj: object) -> bool:
|
|
136
158
|
This does not include basic collection types like list, dict, tuple, etc.
|
137
159
|
"""
|
138
160
|
|
139
|
-
|
140
|
-
|
141
|
-
):
|
161
|
+
# We exclude list and dict here since there are some cases where a list or dict is
|
162
|
+
# considered a dataframe-like object.
|
163
|
+
if obj is None or isinstance(obj, (tuple, set, str, bytes, int, float, bool)):
|
142
164
|
# Basic types are not considered dataframe-like, so we can
|
143
165
|
# return False early to avoid unnecessary checks.
|
144
166
|
return False
|
@@ -148,13 +170,19 @@ def is_dataframe_like(obj: object) -> bool:
|
|
148
170
|
DataFormat.PANDAS_SERIES,
|
149
171
|
DataFormat.PANDAS_INDEX,
|
150
172
|
DataFormat.PANDAS_STYLER,
|
173
|
+
DataFormat.PANDAS_ARRAY,
|
151
174
|
DataFormat.NUMPY_LIST,
|
152
175
|
DataFormat.NUMPY_MATRIX,
|
153
176
|
DataFormat.PYARROW_TABLE,
|
177
|
+
DataFormat.PYARROW_ARRAY,
|
154
178
|
DataFormat.SNOWPARK_OBJECT,
|
155
179
|
DataFormat.PYSPARK_OBJECT,
|
156
180
|
DataFormat.MODIN_OBJECT,
|
157
181
|
DataFormat.SNOWPANDAS_OBJECT,
|
182
|
+
DataFormat.POLARS_SERIES,
|
183
|
+
DataFormat.POLARS_DATAFRAME,
|
184
|
+
DataFormat.POLARS_LAZYFRAME,
|
185
|
+
DataFormat.COLUMN_SERIES_MAPPING,
|
158
186
|
]
|
159
187
|
|
160
188
|
|
@@ -165,7 +193,9 @@ def is_unevaluated_data_object(obj: object) -> bool:
|
|
165
193
|
- Snowpark DataFrame / Table
|
166
194
|
- PySpark DataFrame
|
167
195
|
- Modin DataFrame / Series
|
168
|
-
- Snowpandas DataFrame / Series
|
196
|
+
- Snowpandas DataFrame / Series / Index
|
197
|
+
- Polars LazyFrame
|
198
|
+
- Generator functions
|
169
199
|
|
170
200
|
Unevaluated means that the data is not yet in the local memory.
|
171
201
|
Unevaluated data objects are treated differently from other data objects by only
|
@@ -176,9 +206,16 @@ def is_unevaluated_data_object(obj: object) -> bool:
|
|
176
206
|
or is_pyspark_data_object(obj)
|
177
207
|
or is_snowpandas_data_object(obj)
|
178
208
|
or is_modin_data_object(obj)
|
209
|
+
or is_polars_lazyframe(obj)
|
210
|
+
or inspect.isgeneratorfunction(obj)
|
179
211
|
)
|
180
212
|
|
181
213
|
|
214
|
+
def is_pandas_data_object(obj: object) -> bool:
|
215
|
+
"""True if obj is a Pandas object (e.g. DataFrame, Series, Index, Styler, ...)."""
|
216
|
+
return is_type(obj, _PANDAS_DATA_OBJECT_TYPE_RE)
|
217
|
+
|
218
|
+
|
182
219
|
def is_snowpark_data_object(obj: object) -> bool:
|
183
220
|
"""True if obj is a Snowpark DataFrame or Table."""
|
184
221
|
return is_type(obj, _SNOWPARK_TABLE_TYPE_STR) or is_type(obj, _SNOWPARK_DF_TYPE_STR)
|
@@ -186,13 +223,12 @@ def is_snowpark_data_object(obj: object) -> bool:
|
|
186
223
|
|
187
224
|
def is_snowpark_row_list(obj: object) -> bool:
|
188
225
|
"""True if obj is a list of snowflake.snowpark.row.Row."""
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
return is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
|
226
|
+
return (
|
227
|
+
isinstance(obj, list)
|
228
|
+
and len(obj) > 0
|
229
|
+
and is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
|
230
|
+
and has_callable_attr(obj[0], "as_dict")
|
231
|
+
)
|
196
232
|
|
197
233
|
|
198
234
|
def is_pyspark_data_object(obj: object) -> bool:
|
@@ -211,16 +247,105 @@ def is_modin_data_object(obj: object) -> bool:
|
|
211
247
|
|
212
248
|
def is_snowpandas_data_object(obj: object) -> bool:
|
213
249
|
"""True if obj is a Snowpark Pandas DataFrame or Series."""
|
214
|
-
return
|
215
|
-
obj,
|
250
|
+
return (
|
251
|
+
is_type(obj, _SNOWPANDAS_DF_TYPE_STR)
|
252
|
+
or is_type(obj, _SNOWPANDAS_SERIES_TYPE_STR)
|
253
|
+
or is_type(obj, _SNOWPANDAS_INDEX_TYPE_STR)
|
216
254
|
)
|
217
255
|
|
218
256
|
|
257
|
+
def is_polars_dataframe(obj: object) -> bool:
|
258
|
+
"""True if obj is a Polars Dataframe."""
|
259
|
+
return is_type(obj, _POLARS_DATAFRAME)
|
260
|
+
|
261
|
+
|
262
|
+
def is_polars_series(obj: object) -> bool:
|
263
|
+
"""True if obj is a Polars Series."""
|
264
|
+
return is_type(obj, _POLARS_SERIES)
|
265
|
+
|
266
|
+
|
267
|
+
def is_polars_lazyframe(obj: object) -> bool:
|
268
|
+
"""True if obj is a Polars Lazyframe."""
|
269
|
+
return is_type(obj, _POLARS_LAZYFRAME)
|
270
|
+
|
271
|
+
|
219
272
|
def is_pandas_styler(obj: object) -> TypeGuard[Styler]:
|
220
273
|
"""True if obj is a pandas Styler."""
|
221
274
|
return is_type(obj, _PANDAS_STYLER_TYPE_STR)
|
222
275
|
|
223
276
|
|
277
|
+
def _is_list_of_scalars(data: Iterable[Any]) -> bool:
|
278
|
+
"""Check if the list only contains scalar values."""
|
279
|
+
from pandas.api.types import infer_dtype
|
280
|
+
|
281
|
+
# Overview on all value that are interpreted as scalar:
|
282
|
+
# https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
|
283
|
+
return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
|
284
|
+
|
285
|
+
|
286
|
+
def _iterable_to_list(
|
287
|
+
iterable: Iterable[Any], max_iterations: int | None = None
|
288
|
+
) -> list[Any]:
|
289
|
+
"""Convert an iterable to a list.
|
290
|
+
|
291
|
+
Parameters
|
292
|
+
----------
|
293
|
+
iterable : Iterable
|
294
|
+
The iterable to convert to a list.
|
295
|
+
|
296
|
+
max_iterations : int or None
|
297
|
+
The maximum number of iterations to perform. If None, all iterations are performed.
|
298
|
+
|
299
|
+
Returns
|
300
|
+
-------
|
301
|
+
list
|
302
|
+
The converted list.
|
303
|
+
"""
|
304
|
+
if max_iterations is None:
|
305
|
+
return list(iterable)
|
306
|
+
|
307
|
+
result = []
|
308
|
+
for i, item in enumerate(iterable):
|
309
|
+
if i >= max_iterations:
|
310
|
+
break
|
311
|
+
result.append(item)
|
312
|
+
return result
|
313
|
+
|
314
|
+
|
315
|
+
def _fix_column_naming(data_df: DataFrame) -> DataFrame:
|
316
|
+
"""Rename the first column to "value" if it is not named
|
317
|
+
and if there is only one column in the dataframe.
|
318
|
+
|
319
|
+
The default name of the first column is 0 if it is not named
|
320
|
+
which is not very descriptive.
|
321
|
+
"""
|
322
|
+
|
323
|
+
if len(data_df.columns) == 1 and data_df.columns[0] == 0:
|
324
|
+
# Pandas automatically names the first column with 0 if it is not named.
|
325
|
+
# We rename it to "value" to make it more descriptive if there is only
|
326
|
+
# one column in the dataframe.
|
327
|
+
data_df.rename(columns={0: "value"}, inplace=True)
|
328
|
+
return data_df
|
329
|
+
|
330
|
+
|
331
|
+
def _dict_to_pandas_df(data: dict[Any, Any]) -> DataFrame:
|
332
|
+
"""Convert a key-value dict to a Pandas DataFrame.
|
333
|
+
|
334
|
+
Parameters
|
335
|
+
----------
|
336
|
+
data : dict
|
337
|
+
The dict to convert to a Pandas DataFrame.
|
338
|
+
|
339
|
+
Returns
|
340
|
+
-------
|
341
|
+
pandas.DataFrame
|
342
|
+
The converted Pandas DataFrame.
|
343
|
+
"""
|
344
|
+
import pandas as pd
|
345
|
+
|
346
|
+
return _fix_column_naming(pd.DataFrame.from_dict(data, orient="index"))
|
347
|
+
|
348
|
+
|
224
349
|
def convert_anything_to_pandas_df(
|
225
350
|
data: Any,
|
226
351
|
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
@@ -246,81 +371,140 @@ def convert_anything_to_pandas_df(
|
|
246
371
|
pandas.DataFrame
|
247
372
|
|
248
373
|
"""
|
374
|
+
import array
|
375
|
+
|
249
376
|
import numpy as np
|
250
377
|
import pandas as pd
|
251
378
|
|
252
379
|
if isinstance(data, pd.DataFrame):
|
253
380
|
return data.copy() if ensure_copy else cast(pd.DataFrame, data)
|
254
381
|
|
255
|
-
if isinstance(data, (pd.Series, pd.Index)):
|
382
|
+
if isinstance(data, (pd.Series, pd.Index, pd.api.extensions.ExtensionArray)):
|
256
383
|
return pd.DataFrame(data)
|
257
384
|
|
258
385
|
if is_pandas_styler(data):
|
259
386
|
return cast(pd.DataFrame, data.data.copy() if ensure_copy else data.data)
|
260
387
|
|
261
388
|
if isinstance(data, np.ndarray):
|
262
|
-
return
|
389
|
+
return (
|
390
|
+
pd.DataFrame([])
|
391
|
+
if len(data.shape) == 0
|
392
|
+
else _fix_column_naming(pd.DataFrame(data))
|
393
|
+
)
|
394
|
+
|
395
|
+
if is_polars_dataframe(data):
|
396
|
+
data = data.clone() if ensure_copy else data
|
397
|
+
return data.to_pandas()
|
398
|
+
|
399
|
+
if is_polars_series(data):
|
400
|
+
data = data.clone() if ensure_copy else data
|
401
|
+
return data.to_pandas().to_frame()
|
402
|
+
|
403
|
+
if is_polars_lazyframe(data):
|
404
|
+
data = data.limit(max_unevaluated_rows).collect().to_pandas()
|
405
|
+
if data.shape[0] == max_unevaluated_rows:
|
406
|
+
_show_data_information(
|
407
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
408
|
+
"rows. Call `collect()` on the dataframe to show more."
|
409
|
+
)
|
410
|
+
return cast(pd.DataFrame, data)
|
263
411
|
|
264
412
|
if is_modin_data_object(data):
|
265
413
|
data = data.head(max_unevaluated_rows)._to_pandas()
|
266
414
|
|
267
|
-
if isinstance(data, pd.Series):
|
415
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
268
416
|
data = data.to_frame()
|
269
417
|
|
270
418
|
if data.shape[0] == max_unevaluated_rows:
|
271
|
-
|
419
|
+
_show_data_information(
|
272
420
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
273
|
-
"rows. Call `_to_pandas()` on the
|
421
|
+
"rows. Call `_to_pandas()` on the data object to show more."
|
274
422
|
)
|
275
423
|
return cast(pd.DataFrame, data)
|
276
424
|
|
277
425
|
if is_pyspark_data_object(data):
|
278
426
|
data = data.limit(max_unevaluated_rows).toPandas()
|
279
427
|
if data.shape[0] == max_unevaluated_rows:
|
280
|
-
|
428
|
+
_show_data_information(
|
429
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
430
|
+
"rows. Call `toPandas()` on the data object to show more."
|
431
|
+
)
|
432
|
+
return cast(pd.DataFrame, data)
|
433
|
+
|
434
|
+
if is_snowpandas_data_object(data):
|
435
|
+
data = data[:max_unevaluated_rows].to_pandas()
|
436
|
+
|
437
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
438
|
+
data = data.to_frame()
|
439
|
+
|
440
|
+
if data.shape[0] == max_unevaluated_rows:
|
441
|
+
_show_data_information(
|
281
442
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
282
|
-
"rows. Call `
|
443
|
+
"rows. Call `to_pandas()` on the data object to show more."
|
283
444
|
)
|
284
445
|
return cast(pd.DataFrame, data)
|
285
446
|
|
286
447
|
if is_snowpark_data_object(data):
|
287
448
|
data = data.limit(max_unevaluated_rows).to_pandas()
|
288
449
|
if data.shape[0] == max_unevaluated_rows:
|
289
|
-
|
450
|
+
_show_data_information(
|
290
451
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
291
|
-
"rows. Call `to_pandas()` on the
|
452
|
+
"rows. Call `to_pandas()` on the data object to show more."
|
292
453
|
)
|
293
454
|
return cast(pd.DataFrame, data)
|
294
455
|
|
295
|
-
if
|
296
|
-
|
456
|
+
if is_snowpark_row_list(data):
|
457
|
+
return pd.DataFrame([row.as_dict() for row in data])
|
297
458
|
|
298
|
-
|
299
|
-
|
459
|
+
if has_callable_attr(data, "to_pandas"):
|
460
|
+
return pd.DataFrame(data.to_pandas())
|
461
|
+
|
462
|
+
# Support for generator functions
|
463
|
+
if inspect.isgeneratorfunction(data):
|
464
|
+
data = _fix_column_naming(
|
465
|
+
pd.DataFrame(_iterable_to_list(data(), max_iterations=max_unevaluated_rows))
|
466
|
+
)
|
300
467
|
|
301
468
|
if data.shape[0] == max_unevaluated_rows:
|
302
|
-
|
469
|
+
_show_data_information(
|
303
470
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
304
|
-
"rows.
|
471
|
+
"rows. Convert the data to a list to show more."
|
305
472
|
)
|
306
|
-
return
|
473
|
+
return data
|
307
474
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
475
|
+
if isinstance(data, EnumMeta):
|
476
|
+
# Support for enum classes
|
477
|
+
return _fix_column_naming(pd.DataFrame([c.value for c in data])) # type: ignore
|
478
|
+
|
479
|
+
# Support for some list like objects
|
480
|
+
if isinstance(data, (deque, map, array.ArrayType)):
|
481
|
+
return _fix_column_naming(pd.DataFrame(list(data)))
|
482
|
+
|
483
|
+
# Support for Streamlit's custom dict-like objects
|
484
|
+
if is_custom_dict(data):
|
485
|
+
return _dict_to_pandas_df(data.to_dict())
|
486
|
+
|
487
|
+
# Support for named tuples
|
488
|
+
if is_namedtuple(data):
|
489
|
+
return _dict_to_pandas_df(data._asdict())
|
490
|
+
|
491
|
+
# Support for dataclass instances
|
492
|
+
if is_dataclass_instance(data):
|
493
|
+
return _dict_to_pandas_df(dataclasses.asdict(data))
|
494
|
+
|
495
|
+
# Support for dict-like objects
|
496
|
+
if isinstance(data, (ChainMap, MappingProxyType, UserDict)):
|
497
|
+
return _dict_to_pandas_df(dict(data))
|
313
498
|
|
314
499
|
# Try to convert to pandas.DataFrame. This will raise an error is df is not
|
315
500
|
# compatible with the pandas.DataFrame constructor.
|
316
501
|
try:
|
317
|
-
return pd.DataFrame(data)
|
318
|
-
|
502
|
+
return _fix_column_naming(pd.DataFrame(data))
|
319
503
|
except ValueError as ex:
|
320
504
|
if isinstance(data, dict):
|
321
505
|
with contextlib.suppress(ValueError):
|
322
506
|
# Try to use index orient as back-up to support key-value dicts
|
323
|
-
return
|
507
|
+
return _dict_to_pandas_df(data)
|
324
508
|
raise errors.StreamlitAPIException(
|
325
509
|
f"""
|
326
510
|
Unable to convert object of type `{type(data)}` to `pandas.DataFrame`.
|
@@ -419,6 +603,14 @@ def convert_arrow_bytes_to_pandas_df(source: bytes) -> DataFrame:
|
|
419
603
|
return reader.read_pandas()
|
420
604
|
|
421
605
|
|
606
|
+
def _show_data_information(msg: str) -> None:
|
607
|
+
"""Show a message to the user with important information
|
608
|
+
about the processed dataset."""
|
609
|
+
from streamlit.delta_generator import main_dg
|
610
|
+
|
611
|
+
main_dg.caption(msg)
|
612
|
+
|
613
|
+
|
422
614
|
def convert_anything_to_arrow_bytes(
|
423
615
|
data: Any,
|
424
616
|
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
@@ -449,8 +641,22 @@ def convert_anything_to_arrow_bytes(
|
|
449
641
|
if isinstance(data, pa.Table):
|
450
642
|
return convert_arrow_table_to_arrow_bytes(data)
|
451
643
|
|
644
|
+
if is_pandas_data_object(data):
|
645
|
+
# All pandas data objects should be handled via our pandas
|
646
|
+
# conversion logic. We are already calling it here
|
647
|
+
# to ensure that its not handled via the interchange
|
648
|
+
# protocol support below.
|
649
|
+
df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
|
650
|
+
return convert_pandas_df_to_arrow_bytes(df)
|
651
|
+
|
652
|
+
if is_polars_dataframe(data):
|
653
|
+
return convert_arrow_table_to_arrow_bytes(data.to_arrow())
|
654
|
+
|
655
|
+
if is_polars_series(data):
|
656
|
+
return convert_arrow_table_to_arrow_bytes(data.to_frame().to_arrow())
|
657
|
+
|
452
658
|
# Fallback: try to convert to pandas DataFrame
|
453
|
-
# and then to Arrow bytes
|
659
|
+
# and then to Arrow bytes.
|
454
660
|
df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
|
455
661
|
return convert_pandas_df_to_arrow_bytes(df)
|
456
662
|
|
@@ -475,7 +681,9 @@ def convert_anything_to_sequence(obj: OptionSequence[V_co]) -> Sequence[V_co]:
|
|
475
681
|
if obj is None:
|
476
682
|
return [] # type: ignore
|
477
683
|
|
478
|
-
if isinstance(
|
684
|
+
if isinstance(
|
685
|
+
obj, (str, list, tuple, set, range, EnumMeta, deque, map)
|
686
|
+
) and not is_snowpark_row_list(obj):
|
479
687
|
# This also ensures that the sequence is copied to prevent
|
480
688
|
# potential mutations to the original object.
|
481
689
|
return list(obj)
|
@@ -569,8 +777,7 @@ def _maybe_truncate_table(
|
|
569
777
|
# we just display the exact numbers.
|
570
778
|
displayed_rows = str(table.num_rows)
|
571
779
|
total_rows = str(table.num_rows + truncated_rows)
|
572
|
-
|
573
|
-
st.caption(
|
780
|
+
_show_data_information(
|
574
781
|
f"⚠️ Showing {displayed_rows} out of {total_rows} "
|
575
782
|
"rows due to data size limitations."
|
576
783
|
)
|
@@ -579,7 +786,8 @@ def _maybe_truncate_table(
|
|
579
786
|
|
580
787
|
|
581
788
|
def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
582
|
-
"""Return True if the column type is known to cause issues during
|
789
|
+
"""Return True if the column type is known to cause issues during
|
790
|
+
Arrow conversion."""
|
583
791
|
from pandas.api.types import infer_dtype, is_dict_like, is_list_like
|
584
792
|
|
585
793
|
if column.dtype.kind in [
|
@@ -610,7 +818,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
|
610
818
|
]:
|
611
819
|
return True
|
612
820
|
elif inferred_type == "mixed":
|
613
|
-
# This includes most of the more complex/custom types (objects, dicts,
|
821
|
+
# This includes most of the more complex/custom types (objects, dicts,
|
822
|
+
# lists, ...)
|
614
823
|
if len(column) == 0 or not hasattr(column, "iloc"):
|
615
824
|
# The column seems to be invalid, so we assume it is incompatible.
|
616
825
|
# But this would most likely never happen since empty columns
|
@@ -622,7 +831,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
|
622
831
|
|
623
832
|
if (
|
624
833
|
not is_list_like(first_value)
|
625
|
-
# dicts are list-like, but have issues in Arrow JS (see comments in
|
834
|
+
# dicts are list-like, but have issues in Arrow JS (see comments in
|
835
|
+
# Quiver.ts)
|
626
836
|
or is_dict_like(first_value)
|
627
837
|
# Frozensets are list-like, but are not compatible with pyarrow.
|
628
838
|
or isinstance(first_value, frozenset)
|
@@ -684,15 +894,6 @@ def fix_arrow_incompatible_column_types(
|
|
684
894
|
return df_copy if df_copy is not None else df
|
685
895
|
|
686
896
|
|
687
|
-
def _is_list_of_scalars(data: Iterable[Any]) -> bool:
|
688
|
-
"""Check if the list only contains scalar values."""
|
689
|
-
from pandas.api.types import infer_dtype
|
690
|
-
|
691
|
-
# Overview on all value that are interpreted as scalar:
|
692
|
-
# https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
|
693
|
-
return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
|
694
|
-
|
695
|
-
|
696
897
|
def determine_data_format(input_data: Any) -> DataFormat:
|
697
898
|
"""Determine the data format of the input data.
|
698
899
|
|
@@ -706,6 +907,8 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
706
907
|
DataFormat
|
707
908
|
The data format of the input data.
|
708
909
|
"""
|
910
|
+
import array
|
911
|
+
|
709
912
|
import numpy as np
|
710
913
|
import pandas as pd
|
711
914
|
import pyarrow as pa
|
@@ -722,26 +925,49 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
722
925
|
return DataFormat.NUMPY_MATRIX
|
723
926
|
elif isinstance(input_data, pa.Table):
|
724
927
|
return DataFormat.PYARROW_TABLE
|
928
|
+
elif isinstance(input_data, pa.Array):
|
929
|
+
return DataFormat.PYARROW_ARRAY
|
725
930
|
elif isinstance(input_data, pd.Series):
|
726
931
|
return DataFormat.PANDAS_SERIES
|
727
932
|
elif isinstance(input_data, pd.Index):
|
728
933
|
return DataFormat.PANDAS_INDEX
|
729
934
|
elif is_pandas_styler(input_data):
|
730
935
|
return DataFormat.PANDAS_STYLER
|
731
|
-
elif
|
732
|
-
return DataFormat.
|
936
|
+
elif isinstance(input_data, pd.api.extensions.ExtensionArray):
|
937
|
+
return DataFormat.PANDAS_ARRAY
|
938
|
+
elif is_polars_series(input_data):
|
939
|
+
return DataFormat.POLARS_SERIES
|
940
|
+
elif is_polars_dataframe(input_data):
|
941
|
+
return DataFormat.POLARS_DATAFRAME
|
942
|
+
elif is_polars_lazyframe(input_data):
|
943
|
+
return DataFormat.POLARS_LAZYFRAME
|
733
944
|
elif is_modin_data_object(input_data):
|
734
945
|
return DataFormat.MODIN_OBJECT
|
735
946
|
elif is_snowpandas_data_object(input_data):
|
736
947
|
return DataFormat.SNOWPANDAS_OBJECT
|
737
948
|
elif is_pyspark_data_object(input_data):
|
738
949
|
return DataFormat.PYSPARK_OBJECT
|
739
|
-
elif
|
950
|
+
elif is_snowpark_data_object(input_data) or is_snowpark_row_list(input_data):
|
951
|
+
return DataFormat.SNOWPARK_OBJECT
|
952
|
+
elif isinstance(
|
953
|
+
input_data, (range, EnumMeta, KeysView, ValuesView, deque, map, array.ArrayType)
|
954
|
+
):
|
955
|
+
return DataFormat.LIST_OF_VALUES
|
956
|
+
elif (
|
957
|
+
isinstance(input_data, (ChainMap, MappingProxyType, UserDict))
|
958
|
+
or is_dataclass_instance(input_data)
|
959
|
+
or is_namedtuple(input_data)
|
960
|
+
or is_custom_dict(input_data)
|
961
|
+
):
|
962
|
+
return DataFormat.KEY_VALUE_DICT
|
963
|
+
elif isinstance(input_data, (ItemsView, enumerate)):
|
964
|
+
return DataFormat.LIST_OF_ROWS
|
965
|
+
elif isinstance(input_data, (list, tuple, set, frozenset)):
|
740
966
|
if _is_list_of_scalars(input_data):
|
741
967
|
# -> one-dimensional data structure
|
742
968
|
if isinstance(input_data, tuple):
|
743
969
|
return DataFormat.TUPLE_OF_VALUES
|
744
|
-
if isinstance(input_data, set):
|
970
|
+
if isinstance(input_data, (set, frozenset)):
|
745
971
|
return DataFormat.SET_OF_VALUES
|
746
972
|
return DataFormat.LIST_OF_VALUES
|
747
973
|
else:
|
@@ -751,23 +977,23 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
751
977
|
first_element = next(iter(input_data))
|
752
978
|
if isinstance(first_element, dict):
|
753
979
|
return DataFormat.LIST_OF_RECORDS
|
754
|
-
if isinstance(first_element, (list, tuple, set)):
|
980
|
+
if isinstance(first_element, (list, tuple, set, frozenset)):
|
755
981
|
return DataFormat.LIST_OF_ROWS
|
756
982
|
elif isinstance(input_data, dict):
|
757
983
|
if not input_data:
|
758
984
|
return DataFormat.KEY_VALUE_DICT
|
759
985
|
if len(input_data) > 0:
|
760
986
|
first_value = next(iter(input_data.values()))
|
987
|
+
# In the future, we could potentially also support tight & split formats
|
761
988
|
if isinstance(first_value, dict):
|
762
989
|
return DataFormat.COLUMN_INDEX_MAPPING
|
763
990
|
if isinstance(first_value, (list, tuple)):
|
764
991
|
return DataFormat.COLUMN_VALUE_MAPPING
|
765
992
|
if isinstance(first_value, pd.Series):
|
766
993
|
return DataFormat.COLUMN_SERIES_MAPPING
|
767
|
-
#
|
768
|
-
|
769
|
-
|
770
|
-
return DataFormat.KEY_VALUE_DICT
|
994
|
+
# Use key-value dict as fallback. However, if the values of the dict
|
995
|
+
# contains mixed types, it will become non-editable in the frontend.
|
996
|
+
return DataFormat.KEY_VALUE_DICT
|
771
997
|
return DataFormat.UNKNOWN
|
772
998
|
|
773
999
|
|
@@ -783,12 +1009,30 @@ def _unify_missing_values(df: DataFrame) -> DataFrame:
|
|
783
1009
|
return df.fillna(np.nan).replace([np.nan], [None])
|
784
1010
|
|
785
1011
|
|
1012
|
+
def _pandas_df_to_series(df: DataFrame) -> Series[Any]:
|
1013
|
+
"""Convert a Pandas DataFrame to a Pandas Series by selecting the first column.
|
1014
|
+
|
1015
|
+
Raises
|
1016
|
+
------
|
1017
|
+
ValueError
|
1018
|
+
If the DataFrame has more than one column.
|
1019
|
+
"""
|
1020
|
+
# Select first column in dataframe and create a new series based on the values
|
1021
|
+
if len(df.columns) != 1:
|
1022
|
+
raise ValueError(
|
1023
|
+
"DataFrame is expected to have a single column but "
|
1024
|
+
f"has {len(df.columns)}."
|
1025
|
+
)
|
1026
|
+
return df[df.columns[0]]
|
1027
|
+
|
1028
|
+
|
786
1029
|
def convert_pandas_df_to_data_format(
|
787
1030
|
df: DataFrame, data_format: DataFormat
|
788
1031
|
) -> (
|
789
1032
|
DataFrame
|
790
1033
|
| Series[Any]
|
791
1034
|
| pa.Table
|
1035
|
+
| pa.Array
|
792
1036
|
| np.ndarray[Any, np.dtype[Any]]
|
793
1037
|
| tuple[Any]
|
794
1038
|
| list[Any]
|
@@ -818,6 +1062,7 @@ def convert_pandas_df_to_data_format(
|
|
818
1062
|
DataFormat.PYSPARK_OBJECT,
|
819
1063
|
DataFormat.PANDAS_INDEX,
|
820
1064
|
DataFormat.PANDAS_STYLER,
|
1065
|
+
DataFormat.PANDAS_ARRAY,
|
821
1066
|
DataFormat.MODIN_OBJECT,
|
822
1067
|
DataFormat.SNOWPANDAS_OBJECT,
|
823
1068
|
]:
|
@@ -838,13 +1083,23 @@ def convert_pandas_df_to_data_format(
|
|
838
1083
|
import pyarrow as pa
|
839
1084
|
|
840
1085
|
return pa.Table.from_pandas(df)
|
1086
|
+
elif data_format == DataFormat.PYARROW_ARRAY:
|
1087
|
+
import pyarrow as pa
|
1088
|
+
|
1089
|
+
return pa.Array.from_pandas(_pandas_df_to_series(df))
|
841
1090
|
elif data_format == DataFormat.PANDAS_SERIES:
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
1091
|
+
return _pandas_df_to_series(df)
|
1092
|
+
elif (
|
1093
|
+
data_format == DataFormat.POLARS_DATAFRAME
|
1094
|
+
or data_format == DataFormat.POLARS_LAZYFRAME
|
1095
|
+
):
|
1096
|
+
import polars as pl
|
1097
|
+
|
1098
|
+
return pl.from_pandas(df)
|
1099
|
+
elif data_format == DataFormat.POLARS_SERIES:
|
1100
|
+
import polars as pl
|
1101
|
+
|
1102
|
+
return pl.from_pandas(_pandas_df_to_series(df))
|
848
1103
|
elif data_format == DataFormat.LIST_OF_RECORDS:
|
849
1104
|
return _unify_missing_values(df).to_dict(orient="records")
|
850
1105
|
elif data_format == DataFormat.LIST_OF_ROWS:
|
@@ -868,7 +1123,8 @@ def convert_pandas_df_to_data_format(
|
|
868
1123
|
return_list = df[df.columns[0]].tolist()
|
869
1124
|
elif len(df.columns) >= 1:
|
870
1125
|
raise ValueError(
|
871
|
-
|
1126
|
+
"DataFrame is expected to have a single column but "
|
1127
|
+
f"has {len(df.columns)}."
|
872
1128
|
)
|
873
1129
|
if data_format == DataFormat.TUPLE_OF_VALUES:
|
874
1130
|
return tuple(return_list)
|