streamlit-nightly 1.37.2.dev20240805__py2.py3-none-any.whl → 1.37.2.dev20240806__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamlit/dataframe_util.py +254 -69
- streamlit/delta_generator.py +2 -1
- streamlit/elements/arrow.py +5 -8
- streamlit/elements/json.py +13 -13
- streamlit/elements/lib/column_config_utils.py +1 -35
- streamlit/elements/widgets/data_editor.py +28 -9
- streamlit/elements/write.py +20 -27
- streamlit/runtime/caching/cache_utils.py +12 -9
- streamlit/static/asset-manifest.json +16 -15
- streamlit/static/index.html +1 -1
- streamlit/static/static/js/{1451.e9542cc9.chunk.js → 1451.913b0f90.chunk.js} +1 -1
- streamlit/static/static/js/{1792.8bd6ce2a.chunk.js → 1792.eb8a836f.chunk.js} +1 -1
- streamlit/static/static/js/{2469.31e2695e.chunk.js → 2469.c4454803.chunk.js} +1 -1
- streamlit/static/static/js/3466.0cd981ca.chunk.js +2 -0
- streamlit/static/static/js/{3466.bb8e2e0c.chunk.js.LICENSE.txt → 3466.0cd981ca.chunk.js.LICENSE.txt} +10 -0
- streamlit/static/static/js/{3513.7dedbda2.chunk.js → 3513.577f3dc5.chunk.js} +1 -1
- streamlit/static/static/js/{4113.99983645.chunk.js → 4113.8b8c523d.chunk.js} +1 -1
- streamlit/static/static/js/{4335.fb48fd8e.chunk.js → 4335.2b77e051.chunk.js} +1 -1
- streamlit/static/static/js/{4477.b85593dd.chunk.js → 4477.edb1d80a.chunk.js} +1 -1
- streamlit/static/static/js/{5106.e53c485c.chunk.js → 5106.656a5db4.chunk.js} +1 -1
- streamlit/static/static/js/5267.b73f42da.chunk.js +2 -0
- streamlit/static/static/js/5267.b73f42da.chunk.js.LICENSE.txt +1 -0
- streamlit/static/static/js/{6853.93dd1c4c.chunk.js → 6853.e7b24972.chunk.js} +1 -1
- streamlit/static/static/js/8148.89abd285.chunk.js +1 -0
- streamlit/static/static/js/{8427.e051b59f.chunk.js → 8427.4594845a.chunk.js} +1 -1
- streamlit/static/static/js/{8477.4d2a23c2.chunk.js → 8477.90b06bd9.chunk.js} +1 -1
- streamlit/static/static/js/main.5d7956c8.js +2 -0
- streamlit/static/static/js/{main.95daee38.js.LICENSE.txt → main.5d7956c8.js.LICENSE.txt} +23 -1
- streamlit/type_util.py +46 -14
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/METADATA +1 -1
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/RECORD +35 -33
- streamlit/static/static/js/3466.bb8e2e0c.chunk.js +0 -2
- streamlit/static/static/js/8148.c7db8490.chunk.js +0 -1
- streamlit/static/static/js/main.95daee38.js +0 -2
- {streamlit_nightly-1.37.2.dev20240805.data → streamlit_nightly-1.37.2.dev20240806.data}/scripts/streamlit.cmd +0 -0
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/WHEEL +0 -0
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/entry_points.txt +0 -0
- {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/top_level.txt +0 -0
streamlit/dataframe_util.py
CHANGED
@@ -17,8 +17,14 @@
|
|
17
17
|
from __future__ import annotations
|
18
18
|
|
19
19
|
import contextlib
|
20
|
+
import dataclasses
|
21
|
+
import inspect
|
20
22
|
import math
|
23
|
+
import re
|
24
|
+
from collections import ChainMap, UserDict, deque
|
25
|
+
from collections.abc import ItemsView, KeysView, ValuesView
|
21
26
|
from enum import Enum, EnumMeta, auto
|
27
|
+
from types import MappingProxyType
|
22
28
|
from typing import (
|
23
29
|
TYPE_CHECKING,
|
24
30
|
Any,
|
@@ -34,9 +40,14 @@ from typing import (
|
|
34
40
|
|
35
41
|
from typing_extensions import TypeAlias, TypeGuard
|
36
42
|
|
37
|
-
import streamlit as st
|
38
43
|
from streamlit import config, errors, logger, string_util
|
39
|
-
from streamlit.type_util import
|
44
|
+
from streamlit.type_util import (
|
45
|
+
has_callable_attr,
|
46
|
+
is_custom_dict,
|
47
|
+
is_dataclass_instance,
|
48
|
+
is_namedtuple,
|
49
|
+
is_type,
|
50
|
+
)
|
40
51
|
|
41
52
|
if TYPE_CHECKING:
|
42
53
|
import numpy as np
|
@@ -51,6 +62,7 @@ _LOGGER: Final = logger.get_logger(__name__)
|
|
51
62
|
# Maximum number of rows to request from an unevaluated (out-of-core) dataframe
|
52
63
|
_MAX_UNEVALUATED_DF_ROWS = 10000
|
53
64
|
|
65
|
+
_PANDAS_DATA_OBJECT_TYPE_RE: Final = re.compile(r"^pandas.*$")
|
54
66
|
_PANDAS_STYLER_TYPE_STR: Final = "pandas.io.formats.style.Styler"
|
55
67
|
_SNOWPARK_DF_TYPE_STR: Final = "snowflake.snowpark.dataframe.DataFrame"
|
56
68
|
_SNOWPARK_DF_ROW_TYPE_STR: Final = "snowflake.snowpark.row.Row"
|
@@ -61,7 +73,6 @@ _MODIN_SERIES_TYPE_STR: Final = "modin.pandas.series.Series"
|
|
61
73
|
_SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.DataFrame"
|
62
74
|
_SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
|
63
75
|
|
64
|
-
|
65
76
|
V_co = TypeVar(
|
66
77
|
"V_co",
|
67
78
|
covariant=True, # https://peps.python.org/pep-0484/#covariance-and-contravariance
|
@@ -111,9 +122,11 @@ class DataFormat(Enum):
|
|
111
122
|
PANDAS_DATAFRAME = auto() # pd.DataFrame
|
112
123
|
PANDAS_SERIES = auto() # pd.Series
|
113
124
|
PANDAS_INDEX = auto() # pd.Index
|
125
|
+
PANDAS_ARRAY = auto() # pd.array
|
114
126
|
NUMPY_LIST = auto() # np.array[Scalar]
|
115
127
|
NUMPY_MATRIX = auto() # np.array[List[Scalar]]
|
116
128
|
PYARROW_TABLE = auto() # pyarrow.Table
|
129
|
+
PYARROW_ARRAY = auto() # pyarrow.Array
|
117
130
|
SNOWPARK_OBJECT = auto() # Snowpark DataFrame, Table, List[Row]
|
118
131
|
PYSPARK_OBJECT = auto() # pyspark.DataFrame
|
119
132
|
MODIN_OBJECT = auto() # Modin DataFrame, Series
|
@@ -136,9 +149,9 @@ def is_dataframe_like(obj: object) -> bool:
|
|
136
149
|
This does not include basic collection types like list, dict, tuple, etc.
|
137
150
|
"""
|
138
151
|
|
139
|
-
|
140
|
-
|
141
|
-
):
|
152
|
+
# We exclude list and dict here since there are some cases where a list or dict is
|
153
|
+
# considered a dataframe-like object.
|
154
|
+
if obj is None or isinstance(obj, (tuple, set, str, bytes, int, float, bool)):
|
142
155
|
# Basic types are not considered dataframe-like, so we can
|
143
156
|
# return False early to avoid unnecessary checks.
|
144
157
|
return False
|
@@ -148,13 +161,16 @@ def is_dataframe_like(obj: object) -> bool:
|
|
148
161
|
DataFormat.PANDAS_SERIES,
|
149
162
|
DataFormat.PANDAS_INDEX,
|
150
163
|
DataFormat.PANDAS_STYLER,
|
164
|
+
DataFormat.PANDAS_ARRAY,
|
151
165
|
DataFormat.NUMPY_LIST,
|
152
166
|
DataFormat.NUMPY_MATRIX,
|
153
167
|
DataFormat.PYARROW_TABLE,
|
168
|
+
DataFormat.PYARROW_ARRAY,
|
154
169
|
DataFormat.SNOWPARK_OBJECT,
|
155
170
|
DataFormat.PYSPARK_OBJECT,
|
156
171
|
DataFormat.MODIN_OBJECT,
|
157
172
|
DataFormat.SNOWPANDAS_OBJECT,
|
173
|
+
DataFormat.COLUMN_SERIES_MAPPING,
|
158
174
|
]
|
159
175
|
|
160
176
|
|
@@ -166,6 +182,7 @@ def is_unevaluated_data_object(obj: object) -> bool:
|
|
166
182
|
- PySpark DataFrame
|
167
183
|
- Modin DataFrame / Series
|
168
184
|
- Snowpandas DataFrame / Series
|
185
|
+
- Generator functions
|
169
186
|
|
170
187
|
Unevaluated means that the data is not yet in the local memory.
|
171
188
|
Unevaluated data objects are treated differently from other data objects by only
|
@@ -176,9 +193,15 @@ def is_unevaluated_data_object(obj: object) -> bool:
|
|
176
193
|
or is_pyspark_data_object(obj)
|
177
194
|
or is_snowpandas_data_object(obj)
|
178
195
|
or is_modin_data_object(obj)
|
196
|
+
or inspect.isgeneratorfunction(obj)
|
179
197
|
)
|
180
198
|
|
181
199
|
|
200
|
+
def is_pandas_data_object(obj: object) -> bool:
|
201
|
+
"""True if obj is a Pandas object (e.g. DataFrame, Series, Index, Styler, ...)."""
|
202
|
+
return is_type(obj, _PANDAS_DATA_OBJECT_TYPE_RE)
|
203
|
+
|
204
|
+
|
182
205
|
def is_snowpark_data_object(obj: object) -> bool:
|
183
206
|
"""True if obj is a Snowpark DataFrame or Table."""
|
184
207
|
return is_type(obj, _SNOWPARK_TABLE_TYPE_STR) or is_type(obj, _SNOWPARK_DF_TYPE_STR)
|
@@ -186,13 +209,12 @@ def is_snowpark_data_object(obj: object) -> bool:
|
|
186
209
|
|
187
210
|
def is_snowpark_row_list(obj: object) -> bool:
|
188
211
|
"""True if obj is a list of snowflake.snowpark.row.Row."""
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
return is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
|
212
|
+
return (
|
213
|
+
isinstance(obj, list)
|
214
|
+
and len(obj) > 0
|
215
|
+
and is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
|
216
|
+
and has_callable_attr(obj[0], "as_dict")
|
217
|
+
)
|
196
218
|
|
197
219
|
|
198
220
|
def is_pyspark_data_object(obj: object) -> bool:
|
@@ -221,6 +243,78 @@ def is_pandas_styler(obj: object) -> TypeGuard[Styler]:
|
|
221
243
|
return is_type(obj, _PANDAS_STYLER_TYPE_STR)
|
222
244
|
|
223
245
|
|
246
|
+
def _is_list_of_scalars(data: Iterable[Any]) -> bool:
|
247
|
+
"""Check if the list only contains scalar values."""
|
248
|
+
from pandas.api.types import infer_dtype
|
249
|
+
|
250
|
+
# Overview on all value that are interpreted as scalar:
|
251
|
+
# https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
|
252
|
+
return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
|
253
|
+
|
254
|
+
|
255
|
+
def _iterable_to_list(
|
256
|
+
iterable: Iterable[Any], max_iterations: int | None = None
|
257
|
+
) -> list[Any]:
|
258
|
+
"""Convert an iterable to a list.
|
259
|
+
|
260
|
+
Parameters
|
261
|
+
----------
|
262
|
+
iterable : Iterable
|
263
|
+
The iterable to convert to a list.
|
264
|
+
|
265
|
+
max_iterations : int or None
|
266
|
+
The maximum number of iterations to perform. If None, all iterations are performed.
|
267
|
+
|
268
|
+
Returns
|
269
|
+
-------
|
270
|
+
list
|
271
|
+
The converted list.
|
272
|
+
"""
|
273
|
+
if max_iterations is None:
|
274
|
+
return list(iterable)
|
275
|
+
|
276
|
+
result = []
|
277
|
+
for i, item in enumerate(iterable):
|
278
|
+
if i >= max_iterations:
|
279
|
+
break
|
280
|
+
result.append(item)
|
281
|
+
return result
|
282
|
+
|
283
|
+
|
284
|
+
def _fix_column_naming(data_df: DataFrame) -> DataFrame:
|
285
|
+
"""Rename the first column to "value" if it is not named
|
286
|
+
and if there is only one column in the dataframe.
|
287
|
+
|
288
|
+
The default name of the first column is 0 if it is not named
|
289
|
+
which is not very descriptive.
|
290
|
+
"""
|
291
|
+
|
292
|
+
if len(data_df.columns) == 1 and data_df.columns[0] == 0:
|
293
|
+
# Pandas automatically names the first column with 0 if it is not named.
|
294
|
+
# We rename it to "value" to make it more descriptive if there is only
|
295
|
+
# one column in the dataframe.
|
296
|
+
data_df.rename(columns={0: "value"}, inplace=True)
|
297
|
+
return data_df
|
298
|
+
|
299
|
+
|
300
|
+
def _dict_to_pandas_df(data: dict[Any, Any]) -> DataFrame:
|
301
|
+
"""Convert a key-value dict to a Pandas DataFrame.
|
302
|
+
|
303
|
+
Parameters
|
304
|
+
----------
|
305
|
+
data : dict
|
306
|
+
The dict to convert to a Pandas DataFrame.
|
307
|
+
|
308
|
+
Returns
|
309
|
+
-------
|
310
|
+
pandas.DataFrame
|
311
|
+
The converted Pandas DataFrame.
|
312
|
+
"""
|
313
|
+
import pandas as pd
|
314
|
+
|
315
|
+
return _fix_column_naming(pd.DataFrame.from_dict(data, orient="index"))
|
316
|
+
|
317
|
+
|
224
318
|
def convert_anything_to_pandas_df(
|
225
319
|
data: Any,
|
226
320
|
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
@@ -246,81 +340,123 @@ def convert_anything_to_pandas_df(
|
|
246
340
|
pandas.DataFrame
|
247
341
|
|
248
342
|
"""
|
343
|
+
import array
|
344
|
+
|
249
345
|
import numpy as np
|
250
346
|
import pandas as pd
|
251
347
|
|
252
348
|
if isinstance(data, pd.DataFrame):
|
253
349
|
return data.copy() if ensure_copy else cast(pd.DataFrame, data)
|
254
350
|
|
255
|
-
if isinstance(data, (pd.Series, pd.Index)):
|
351
|
+
if isinstance(data, (pd.Series, pd.Index, pd.api.extensions.ExtensionArray)):
|
256
352
|
return pd.DataFrame(data)
|
257
353
|
|
258
354
|
if is_pandas_styler(data):
|
259
355
|
return cast(pd.DataFrame, data.data.copy() if ensure_copy else data.data)
|
260
356
|
|
261
357
|
if isinstance(data, np.ndarray):
|
262
|
-
return
|
358
|
+
return (
|
359
|
+
pd.DataFrame([])
|
360
|
+
if len(data.shape) == 0
|
361
|
+
else _fix_column_naming(pd.DataFrame(data))
|
362
|
+
)
|
263
363
|
|
264
364
|
if is_modin_data_object(data):
|
265
365
|
data = data.head(max_unevaluated_rows)._to_pandas()
|
266
366
|
|
267
|
-
if isinstance(data, pd.Series):
|
367
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
268
368
|
data = data.to_frame()
|
269
369
|
|
270
370
|
if data.shape[0] == max_unevaluated_rows:
|
271
|
-
|
371
|
+
_show_data_information(
|
272
372
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
273
|
-
"rows. Call `_to_pandas()` on the
|
373
|
+
"rows. Call `_to_pandas()` on the data object to show more."
|
274
374
|
)
|
275
375
|
return cast(pd.DataFrame, data)
|
276
376
|
|
277
377
|
if is_pyspark_data_object(data):
|
278
378
|
data = data.limit(max_unevaluated_rows).toPandas()
|
279
379
|
if data.shape[0] == max_unevaluated_rows:
|
280
|
-
|
380
|
+
_show_data_information(
|
381
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
382
|
+
"rows. Call `toPandas()` on the data object to show more."
|
383
|
+
)
|
384
|
+
return cast(pd.DataFrame, data)
|
385
|
+
|
386
|
+
if is_snowpandas_data_object(data):
|
387
|
+
data = data[:max_unevaluated_rows].to_pandas()
|
388
|
+
|
389
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
390
|
+
data = data.to_frame()
|
391
|
+
|
392
|
+
if data.shape[0] == max_unevaluated_rows:
|
393
|
+
_show_data_information(
|
281
394
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
282
|
-
"rows. Call `
|
395
|
+
"rows. Call `to_pandas()` on the data object to show more."
|
283
396
|
)
|
284
397
|
return cast(pd.DataFrame, data)
|
285
398
|
|
286
399
|
if is_snowpark_data_object(data):
|
287
400
|
data = data.limit(max_unevaluated_rows).to_pandas()
|
288
401
|
if data.shape[0] == max_unevaluated_rows:
|
289
|
-
|
402
|
+
_show_data_information(
|
290
403
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
291
|
-
"rows. Call `to_pandas()` on the
|
404
|
+
"rows. Call `to_pandas()` on the data object to show more."
|
292
405
|
)
|
293
406
|
return cast(pd.DataFrame, data)
|
294
407
|
|
295
|
-
if
|
296
|
-
|
408
|
+
if is_snowpark_row_list(data):
|
409
|
+
return pd.DataFrame([row.as_dict() for row in data])
|
297
410
|
|
298
|
-
|
299
|
-
|
411
|
+
if has_callable_attr(data, "to_pandas"):
|
412
|
+
return pd.DataFrame(data.to_pandas())
|
413
|
+
|
414
|
+
# Support for generator functions
|
415
|
+
if inspect.isgeneratorfunction(data):
|
416
|
+
data = _fix_column_naming(
|
417
|
+
pd.DataFrame(_iterable_to_list(data(), max_iterations=max_unevaluated_rows))
|
418
|
+
)
|
300
419
|
|
301
420
|
if data.shape[0] == max_unevaluated_rows:
|
302
|
-
|
421
|
+
_show_data_information(
|
303
422
|
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
304
|
-
"rows.
|
423
|
+
"rows. Convert the data to a list to show more."
|
305
424
|
)
|
306
|
-
return
|
425
|
+
return data
|
307
426
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
427
|
+
if isinstance(data, EnumMeta):
|
428
|
+
# Support for enum classes
|
429
|
+
return _fix_column_naming(pd.DataFrame([c.value for c in data])) # type: ignore
|
430
|
+
|
431
|
+
# Support for some list like objects
|
432
|
+
if isinstance(data, (deque, map, array.ArrayType)):
|
433
|
+
return _fix_column_naming(pd.DataFrame(list(data)))
|
434
|
+
|
435
|
+
# Support for Streamlit's custom dict-like objects
|
436
|
+
if is_custom_dict(data):
|
437
|
+
return _dict_to_pandas_df(data.to_dict())
|
438
|
+
|
439
|
+
# Support for named tuples
|
440
|
+
if is_namedtuple(data):
|
441
|
+
return _dict_to_pandas_df(data._asdict())
|
442
|
+
|
443
|
+
# Support for dataclass instances
|
444
|
+
if is_dataclass_instance(data):
|
445
|
+
return _dict_to_pandas_df(dataclasses.asdict(data))
|
446
|
+
|
447
|
+
# Support for dict-like objects
|
448
|
+
if isinstance(data, (ChainMap, MappingProxyType, UserDict)):
|
449
|
+
return _dict_to_pandas_df(dict(data))
|
313
450
|
|
314
451
|
# Try to convert to pandas.DataFrame. This will raise an error is df is not
|
315
452
|
# compatible with the pandas.DataFrame constructor.
|
316
453
|
try:
|
317
|
-
return pd.DataFrame(data)
|
318
|
-
|
454
|
+
return _fix_column_naming(pd.DataFrame(data))
|
319
455
|
except ValueError as ex:
|
320
456
|
if isinstance(data, dict):
|
321
457
|
with contextlib.suppress(ValueError):
|
322
458
|
# Try to use index orient as back-up to support key-value dicts
|
323
|
-
return
|
459
|
+
return _dict_to_pandas_df(data)
|
324
460
|
raise errors.StreamlitAPIException(
|
325
461
|
f"""
|
326
462
|
Unable to convert object of type `{type(data)}` to `pandas.DataFrame`.
|
@@ -419,6 +555,14 @@ def convert_arrow_bytes_to_pandas_df(source: bytes) -> DataFrame:
|
|
419
555
|
return reader.read_pandas()
|
420
556
|
|
421
557
|
|
558
|
+
def _show_data_information(msg: str) -> None:
|
559
|
+
"""Show a message to the user with important information
|
560
|
+
about the processed dataset."""
|
561
|
+
from streamlit.delta_generator import main_dg
|
562
|
+
|
563
|
+
main_dg.caption(msg)
|
564
|
+
|
565
|
+
|
422
566
|
def convert_anything_to_arrow_bytes(
|
423
567
|
data: Any,
|
424
568
|
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
@@ -449,8 +593,16 @@ def convert_anything_to_arrow_bytes(
|
|
449
593
|
if isinstance(data, pa.Table):
|
450
594
|
return convert_arrow_table_to_arrow_bytes(data)
|
451
595
|
|
596
|
+
if is_pandas_data_object(data):
|
597
|
+
# All pandas data objects should be handled via our pandas
|
598
|
+
# conversion logic. We are already calling it here
|
599
|
+
# to ensure that its not handled via the interchange
|
600
|
+
# protocol support below.
|
601
|
+
df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
|
602
|
+
return convert_pandas_df_to_arrow_bytes(df)
|
603
|
+
|
452
604
|
# Fallback: try to convert to pandas DataFrame
|
453
|
-
# and then to Arrow bytes
|
605
|
+
# and then to Arrow bytes.
|
454
606
|
df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
|
455
607
|
return convert_pandas_df_to_arrow_bytes(df)
|
456
608
|
|
@@ -475,7 +627,9 @@ def convert_anything_to_sequence(obj: OptionSequence[V_co]) -> Sequence[V_co]:
|
|
475
627
|
if obj is None:
|
476
628
|
return [] # type: ignore
|
477
629
|
|
478
|
-
if isinstance(
|
630
|
+
if isinstance(
|
631
|
+
obj, (str, list, tuple, set, range, EnumMeta, deque, map)
|
632
|
+
) and not is_snowpark_row_list(obj):
|
479
633
|
# This also ensures that the sequence is copied to prevent
|
480
634
|
# potential mutations to the original object.
|
481
635
|
return list(obj)
|
@@ -569,8 +723,7 @@ def _maybe_truncate_table(
|
|
569
723
|
# we just display the exact numbers.
|
570
724
|
displayed_rows = str(table.num_rows)
|
571
725
|
total_rows = str(table.num_rows + truncated_rows)
|
572
|
-
|
573
|
-
st.caption(
|
726
|
+
_show_data_information(
|
574
727
|
f"⚠️ Showing {displayed_rows} out of {total_rows} "
|
575
728
|
"rows due to data size limitations."
|
576
729
|
)
|
@@ -579,7 +732,8 @@ def _maybe_truncate_table(
|
|
579
732
|
|
580
733
|
|
581
734
|
def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
582
|
-
"""Return True if the column type is known to cause issues during
|
735
|
+
"""Return True if the column type is known to cause issues during
|
736
|
+
Arrow conversion."""
|
583
737
|
from pandas.api.types import infer_dtype, is_dict_like, is_list_like
|
584
738
|
|
585
739
|
if column.dtype.kind in [
|
@@ -610,7 +764,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
|
610
764
|
]:
|
611
765
|
return True
|
612
766
|
elif inferred_type == "mixed":
|
613
|
-
# This includes most of the more complex/custom types (objects, dicts,
|
767
|
+
# This includes most of the more complex/custom types (objects, dicts,
|
768
|
+
# lists, ...)
|
614
769
|
if len(column) == 0 or not hasattr(column, "iloc"):
|
615
770
|
# The column seems to be invalid, so we assume it is incompatible.
|
616
771
|
# But this would most likely never happen since empty columns
|
@@ -622,7 +777,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
|
622
777
|
|
623
778
|
if (
|
624
779
|
not is_list_like(first_value)
|
625
|
-
# dicts are list-like, but have issues in Arrow JS (see comments in
|
780
|
+
# dicts are list-like, but have issues in Arrow JS (see comments in
|
781
|
+
# Quiver.ts)
|
626
782
|
or is_dict_like(first_value)
|
627
783
|
# Frozensets are list-like, but are not compatible with pyarrow.
|
628
784
|
or isinstance(first_value, frozenset)
|
@@ -684,15 +840,6 @@ def fix_arrow_incompatible_column_types(
|
|
684
840
|
return df_copy if df_copy is not None else df
|
685
841
|
|
686
842
|
|
687
|
-
def _is_list_of_scalars(data: Iterable[Any]) -> bool:
|
688
|
-
"""Check if the list only contains scalar values."""
|
689
|
-
from pandas.api.types import infer_dtype
|
690
|
-
|
691
|
-
# Overview on all value that are interpreted as scalar:
|
692
|
-
# https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
|
693
|
-
return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
|
694
|
-
|
695
|
-
|
696
843
|
def determine_data_format(input_data: Any) -> DataFormat:
|
697
844
|
"""Determine the data format of the input data.
|
698
845
|
|
@@ -706,6 +853,8 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
706
853
|
DataFormat
|
707
854
|
The data format of the input data.
|
708
855
|
"""
|
856
|
+
import array
|
857
|
+
|
709
858
|
import numpy as np
|
710
859
|
import pandas as pd
|
711
860
|
import pyarrow as pa
|
@@ -722,26 +871,43 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
722
871
|
return DataFormat.NUMPY_MATRIX
|
723
872
|
elif isinstance(input_data, pa.Table):
|
724
873
|
return DataFormat.PYARROW_TABLE
|
874
|
+
elif isinstance(input_data, pa.Array):
|
875
|
+
return DataFormat.PYARROW_ARRAY
|
725
876
|
elif isinstance(input_data, pd.Series):
|
726
877
|
return DataFormat.PANDAS_SERIES
|
727
878
|
elif isinstance(input_data, pd.Index):
|
728
879
|
return DataFormat.PANDAS_INDEX
|
729
880
|
elif is_pandas_styler(input_data):
|
730
881
|
return DataFormat.PANDAS_STYLER
|
731
|
-
elif
|
732
|
-
return DataFormat.
|
882
|
+
elif isinstance(input_data, pd.api.extensions.ExtensionArray):
|
883
|
+
return DataFormat.PANDAS_ARRAY
|
733
884
|
elif is_modin_data_object(input_data):
|
734
885
|
return DataFormat.MODIN_OBJECT
|
735
886
|
elif is_snowpandas_data_object(input_data):
|
736
887
|
return DataFormat.SNOWPANDAS_OBJECT
|
737
888
|
elif is_pyspark_data_object(input_data):
|
738
889
|
return DataFormat.PYSPARK_OBJECT
|
739
|
-
elif
|
890
|
+
elif is_snowpark_data_object(input_data) or is_snowpark_row_list(input_data):
|
891
|
+
return DataFormat.SNOWPARK_OBJECT
|
892
|
+
elif isinstance(
|
893
|
+
input_data, (range, EnumMeta, KeysView, ValuesView, deque, map, array.ArrayType)
|
894
|
+
):
|
895
|
+
return DataFormat.LIST_OF_VALUES
|
896
|
+
elif (
|
897
|
+
isinstance(input_data, (ChainMap, MappingProxyType, UserDict))
|
898
|
+
or is_dataclass_instance(input_data)
|
899
|
+
or is_namedtuple(input_data)
|
900
|
+
or is_custom_dict(input_data)
|
901
|
+
):
|
902
|
+
return DataFormat.KEY_VALUE_DICT
|
903
|
+
elif isinstance(input_data, (ItemsView, enumerate)):
|
904
|
+
return DataFormat.LIST_OF_ROWS
|
905
|
+
elif isinstance(input_data, (list, tuple, set, frozenset)):
|
740
906
|
if _is_list_of_scalars(input_data):
|
741
907
|
# -> one-dimensional data structure
|
742
908
|
if isinstance(input_data, tuple):
|
743
909
|
return DataFormat.TUPLE_OF_VALUES
|
744
|
-
if isinstance(input_data, set):
|
910
|
+
if isinstance(input_data, (set, frozenset)):
|
745
911
|
return DataFormat.SET_OF_VALUES
|
746
912
|
return DataFormat.LIST_OF_VALUES
|
747
913
|
else:
|
@@ -751,23 +917,23 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
751
917
|
first_element = next(iter(input_data))
|
752
918
|
if isinstance(first_element, dict):
|
753
919
|
return DataFormat.LIST_OF_RECORDS
|
754
|
-
if isinstance(first_element, (list, tuple, set)):
|
920
|
+
if isinstance(first_element, (list, tuple, set, frozenset)):
|
755
921
|
return DataFormat.LIST_OF_ROWS
|
756
922
|
elif isinstance(input_data, dict):
|
757
923
|
if not input_data:
|
758
924
|
return DataFormat.KEY_VALUE_DICT
|
759
925
|
if len(input_data) > 0:
|
760
926
|
first_value = next(iter(input_data.values()))
|
927
|
+
# In the future, we could potentially also support tight & split formats
|
761
928
|
if isinstance(first_value, dict):
|
762
929
|
return DataFormat.COLUMN_INDEX_MAPPING
|
763
930
|
if isinstance(first_value, (list, tuple)):
|
764
931
|
return DataFormat.COLUMN_VALUE_MAPPING
|
765
932
|
if isinstance(first_value, pd.Series):
|
766
933
|
return DataFormat.COLUMN_SERIES_MAPPING
|
767
|
-
#
|
768
|
-
|
769
|
-
|
770
|
-
return DataFormat.KEY_VALUE_DICT
|
934
|
+
# Use key-value dict as fallback. However, if the values of the dict
|
935
|
+
# contains mixed types, it will become non-editable in the frontend.
|
936
|
+
return DataFormat.KEY_VALUE_DICT
|
771
937
|
return DataFormat.UNKNOWN
|
772
938
|
|
773
939
|
|
@@ -783,12 +949,30 @@ def _unify_missing_values(df: DataFrame) -> DataFrame:
|
|
783
949
|
return df.fillna(np.nan).replace([np.nan], [None])
|
784
950
|
|
785
951
|
|
952
|
+
def _pandas_df_to_series(df: DataFrame) -> Series[Any]:
|
953
|
+
"""Convert a Pandas DataFrame to a Pandas Series by selecting the first column.
|
954
|
+
|
955
|
+
Raises
|
956
|
+
------
|
957
|
+
ValueError
|
958
|
+
If the DataFrame has more than one column.
|
959
|
+
"""
|
960
|
+
# Select first column in dataframe and create a new series based on the values
|
961
|
+
if len(df.columns) != 1:
|
962
|
+
raise ValueError(
|
963
|
+
"DataFrame is expected to have a single column but "
|
964
|
+
f"has {len(df.columns)}."
|
965
|
+
)
|
966
|
+
return df[df.columns[0]]
|
967
|
+
|
968
|
+
|
786
969
|
def convert_pandas_df_to_data_format(
|
787
970
|
df: DataFrame, data_format: DataFormat
|
788
971
|
) -> (
|
789
972
|
DataFrame
|
790
973
|
| Series[Any]
|
791
974
|
| pa.Table
|
975
|
+
| pa.Array
|
792
976
|
| np.ndarray[Any, np.dtype[Any]]
|
793
977
|
| tuple[Any]
|
794
978
|
| list[Any]
|
@@ -818,6 +1002,7 @@ def convert_pandas_df_to_data_format(
|
|
818
1002
|
DataFormat.PYSPARK_OBJECT,
|
819
1003
|
DataFormat.PANDAS_INDEX,
|
820
1004
|
DataFormat.PANDAS_STYLER,
|
1005
|
+
DataFormat.PANDAS_ARRAY,
|
821
1006
|
DataFormat.MODIN_OBJECT,
|
822
1007
|
DataFormat.SNOWPANDAS_OBJECT,
|
823
1008
|
]:
|
@@ -838,13 +1023,12 @@ def convert_pandas_df_to_data_format(
|
|
838
1023
|
import pyarrow as pa
|
839
1024
|
|
840
1025
|
return pa.Table.from_pandas(df)
|
1026
|
+
elif data_format == DataFormat.PYARROW_ARRAY:
|
1027
|
+
import pyarrow as pa
|
1028
|
+
|
1029
|
+
return pa.Array.from_pandas(_pandas_df_to_series(df))
|
841
1030
|
elif data_format == DataFormat.PANDAS_SERIES:
|
842
|
-
|
843
|
-
if len(df.columns) != 1:
|
844
|
-
raise ValueError(
|
845
|
-
f"DataFrame is expected to have a single column but has {len(df.columns)}."
|
846
|
-
)
|
847
|
-
return df[df.columns[0]]
|
1031
|
+
return _pandas_df_to_series(df)
|
848
1032
|
elif data_format == DataFormat.LIST_OF_RECORDS:
|
849
1033
|
return _unify_missing_values(df).to_dict(orient="records")
|
850
1034
|
elif data_format == DataFormat.LIST_OF_ROWS:
|
@@ -868,7 +1052,8 @@ def convert_pandas_df_to_data_format(
|
|
868
1052
|
return_list = df[df.columns[0]].tolist()
|
869
1053
|
elif len(df.columns) >= 1:
|
870
1054
|
raise ValueError(
|
871
|
-
|
1055
|
+
"DataFrame is expected to have a single column but "
|
1056
|
+
f"has {len(df.columns)}."
|
872
1057
|
)
|
873
1058
|
if data_format == DataFormat.TUPLE_OF_VALUES:
|
874
1059
|
return tuple(return_list)
|
streamlit/delta_generator.py
CHANGED
@@ -708,7 +708,8 @@ def _prep_data_for_add_rows(
|
|
708
708
|
) -> tuple[Data, AddRowsMetadata | None]:
|
709
709
|
if not add_rows_metadata:
|
710
710
|
if dataframe_util.is_pandas_styler(data):
|
711
|
-
# When calling add_rows on st.table or st.dataframe we want styles to
|
711
|
+
# When calling add_rows on st.table or st.dataframe we want styles to
|
712
|
+
# pass through.
|
712
713
|
return data, None
|
713
714
|
return dataframe_util.convert_anything_to_pandas_df(data), None
|
714
715
|
|
streamlit/elements/arrow.py
CHANGED
@@ -513,12 +513,7 @@ class ArrowMixin:
|
|
513
513
|
data_df = dataframe_util.convert_anything_to_pandas_df(
|
514
514
|
data, ensure_copy=False
|
515
515
|
)
|
516
|
-
apply_data_specific_configs(
|
517
|
-
column_config_mapping,
|
518
|
-
data_df,
|
519
|
-
data_format,
|
520
|
-
check_arrow_compatibility=False,
|
521
|
-
)
|
516
|
+
apply_data_specific_configs(column_config_mapping, data_format)
|
522
517
|
# Serialize the data to bytes:
|
523
518
|
proto.data = dataframe_util.convert_pandas_df_to_arrow_bytes(data_df)
|
524
519
|
|
@@ -599,8 +594,10 @@ class ArrowMixin:
|
|
599
594
|
|
600
595
|
"""
|
601
596
|
|
602
|
-
# Check if data is uncollected, and collect it but with 100 rows max, instead of
|
603
|
-
#
|
597
|
+
# Check if data is uncollected, and collect it but with 100 rows max, instead of
|
598
|
+
# 10k rows, which is done in all other cases.
|
599
|
+
# We use 100 rows in st.table, because large tables render slowly,
|
600
|
+
# take too much screen space, and can crush the app.
|
604
601
|
if dataframe_util.is_unevaluated_data_object(data):
|
605
602
|
data = dataframe_util.convert_anything_to_pandas_df(
|
606
603
|
data, max_unevaluated_rows=100
|