streamlit-nightly 1.37.2.dev20240805__py2.py3-none-any.whl → 1.37.2.dev20240806__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. streamlit/dataframe_util.py +254 -69
  2. streamlit/delta_generator.py +2 -1
  3. streamlit/elements/arrow.py +5 -8
  4. streamlit/elements/json.py +13 -13
  5. streamlit/elements/lib/column_config_utils.py +1 -35
  6. streamlit/elements/widgets/data_editor.py +28 -9
  7. streamlit/elements/write.py +20 -27
  8. streamlit/runtime/caching/cache_utils.py +12 -9
  9. streamlit/static/asset-manifest.json +16 -15
  10. streamlit/static/index.html +1 -1
  11. streamlit/static/static/js/{1451.e9542cc9.chunk.js → 1451.913b0f90.chunk.js} +1 -1
  12. streamlit/static/static/js/{1792.8bd6ce2a.chunk.js → 1792.eb8a836f.chunk.js} +1 -1
  13. streamlit/static/static/js/{2469.31e2695e.chunk.js → 2469.c4454803.chunk.js} +1 -1
  14. streamlit/static/static/js/3466.0cd981ca.chunk.js +2 -0
  15. streamlit/static/static/js/{3466.bb8e2e0c.chunk.js.LICENSE.txt → 3466.0cd981ca.chunk.js.LICENSE.txt} +10 -0
  16. streamlit/static/static/js/{3513.7dedbda2.chunk.js → 3513.577f3dc5.chunk.js} +1 -1
  17. streamlit/static/static/js/{4113.99983645.chunk.js → 4113.8b8c523d.chunk.js} +1 -1
  18. streamlit/static/static/js/{4335.fb48fd8e.chunk.js → 4335.2b77e051.chunk.js} +1 -1
  19. streamlit/static/static/js/{4477.b85593dd.chunk.js → 4477.edb1d80a.chunk.js} +1 -1
  20. streamlit/static/static/js/{5106.e53c485c.chunk.js → 5106.656a5db4.chunk.js} +1 -1
  21. streamlit/static/static/js/5267.b73f42da.chunk.js +2 -0
  22. streamlit/static/static/js/5267.b73f42da.chunk.js.LICENSE.txt +1 -0
  23. streamlit/static/static/js/{6853.93dd1c4c.chunk.js → 6853.e7b24972.chunk.js} +1 -1
  24. streamlit/static/static/js/8148.89abd285.chunk.js +1 -0
  25. streamlit/static/static/js/{8427.e051b59f.chunk.js → 8427.4594845a.chunk.js} +1 -1
  26. streamlit/static/static/js/{8477.4d2a23c2.chunk.js → 8477.90b06bd9.chunk.js} +1 -1
  27. streamlit/static/static/js/main.5d7956c8.js +2 -0
  28. streamlit/static/static/js/{main.95daee38.js.LICENSE.txt → main.5d7956c8.js.LICENSE.txt} +23 -1
  29. streamlit/type_util.py +46 -14
  30. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/METADATA +1 -1
  31. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/RECORD +35 -33
  32. streamlit/static/static/js/3466.bb8e2e0c.chunk.js +0 -2
  33. streamlit/static/static/js/8148.c7db8490.chunk.js +0 -1
  34. streamlit/static/static/js/main.95daee38.js +0 -2
  35. {streamlit_nightly-1.37.2.dev20240805.data → streamlit_nightly-1.37.2.dev20240806.data}/scripts/streamlit.cmd +0 -0
  36. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/WHEEL +0 -0
  37. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/entry_points.txt +0 -0
  38. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240806.dist-info}/top_level.txt +0 -0
@@ -17,8 +17,14 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  import contextlib
20
+ import dataclasses
21
+ import inspect
20
22
  import math
23
+ import re
24
+ from collections import ChainMap, UserDict, deque
25
+ from collections.abc import ItemsView, KeysView, ValuesView
21
26
  from enum import Enum, EnumMeta, auto
27
+ from types import MappingProxyType
22
28
  from typing import (
23
29
  TYPE_CHECKING,
24
30
  Any,
@@ -34,9 +40,14 @@ from typing import (
34
40
 
35
41
  from typing_extensions import TypeAlias, TypeGuard
36
42
 
37
- import streamlit as st
38
43
  from streamlit import config, errors, logger, string_util
39
- from streamlit.type_util import is_type
44
+ from streamlit.type_util import (
45
+ has_callable_attr,
46
+ is_custom_dict,
47
+ is_dataclass_instance,
48
+ is_namedtuple,
49
+ is_type,
50
+ )
40
51
 
41
52
  if TYPE_CHECKING:
42
53
  import numpy as np
@@ -51,6 +62,7 @@ _LOGGER: Final = logger.get_logger(__name__)
51
62
  # Maximum number of rows to request from an unevaluated (out-of-core) dataframe
52
63
  _MAX_UNEVALUATED_DF_ROWS = 10000
53
64
 
65
+ _PANDAS_DATA_OBJECT_TYPE_RE: Final = re.compile(r"^pandas.*$")
54
66
  _PANDAS_STYLER_TYPE_STR: Final = "pandas.io.formats.style.Styler"
55
67
  _SNOWPARK_DF_TYPE_STR: Final = "snowflake.snowpark.dataframe.DataFrame"
56
68
  _SNOWPARK_DF_ROW_TYPE_STR: Final = "snowflake.snowpark.row.Row"
@@ -61,7 +73,6 @@ _MODIN_SERIES_TYPE_STR: Final = "modin.pandas.series.Series"
61
73
  _SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.DataFrame"
62
74
  _SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
63
75
 
64
-
65
76
  V_co = TypeVar(
66
77
  "V_co",
67
78
  covariant=True, # https://peps.python.org/pep-0484/#covariance-and-contravariance
@@ -111,9 +122,11 @@ class DataFormat(Enum):
111
122
  PANDAS_DATAFRAME = auto() # pd.DataFrame
112
123
  PANDAS_SERIES = auto() # pd.Series
113
124
  PANDAS_INDEX = auto() # pd.Index
125
+ PANDAS_ARRAY = auto() # pd.array
114
126
  NUMPY_LIST = auto() # np.array[Scalar]
115
127
  NUMPY_MATRIX = auto() # np.array[List[Scalar]]
116
128
  PYARROW_TABLE = auto() # pyarrow.Table
129
+ PYARROW_ARRAY = auto() # pyarrow.Array
117
130
  SNOWPARK_OBJECT = auto() # Snowpark DataFrame, Table, List[Row]
118
131
  PYSPARK_OBJECT = auto() # pyspark.DataFrame
119
132
  MODIN_OBJECT = auto() # Modin DataFrame, Series
@@ -136,9 +149,9 @@ def is_dataframe_like(obj: object) -> bool:
136
149
  This does not include basic collection types like list, dict, tuple, etc.
137
150
  """
138
151
 
139
- if obj is None or isinstance(
140
- obj, (list, tuple, set, dict, str, bytes, int, float, bool)
141
- ):
152
+ # We exclude list and dict here since there are some cases where a list or dict is
153
+ # considered a dataframe-like object.
154
+ if obj is None or isinstance(obj, (tuple, set, str, bytes, int, float, bool)):
142
155
  # Basic types are not considered dataframe-like, so we can
143
156
  # return False early to avoid unnecessary checks.
144
157
  return False
@@ -148,13 +161,16 @@ def is_dataframe_like(obj: object) -> bool:
148
161
  DataFormat.PANDAS_SERIES,
149
162
  DataFormat.PANDAS_INDEX,
150
163
  DataFormat.PANDAS_STYLER,
164
+ DataFormat.PANDAS_ARRAY,
151
165
  DataFormat.NUMPY_LIST,
152
166
  DataFormat.NUMPY_MATRIX,
153
167
  DataFormat.PYARROW_TABLE,
168
+ DataFormat.PYARROW_ARRAY,
154
169
  DataFormat.SNOWPARK_OBJECT,
155
170
  DataFormat.PYSPARK_OBJECT,
156
171
  DataFormat.MODIN_OBJECT,
157
172
  DataFormat.SNOWPANDAS_OBJECT,
173
+ DataFormat.COLUMN_SERIES_MAPPING,
158
174
  ]
159
175
 
160
176
 
@@ -166,6 +182,7 @@ def is_unevaluated_data_object(obj: object) -> bool:
166
182
  - PySpark DataFrame
167
183
  - Modin DataFrame / Series
168
184
  - Snowpandas DataFrame / Series
185
+ - Generator functions
169
186
 
170
187
  Unevaluated means that the data is not yet in the local memory.
171
188
  Unevaluated data objects are treated differently from other data objects by only
@@ -176,9 +193,15 @@ def is_unevaluated_data_object(obj: object) -> bool:
176
193
  or is_pyspark_data_object(obj)
177
194
  or is_snowpandas_data_object(obj)
178
195
  or is_modin_data_object(obj)
196
+ or inspect.isgeneratorfunction(obj)
179
197
  )
180
198
 
181
199
 
200
+ def is_pandas_data_object(obj: object) -> bool:
201
+ """True if obj is a Pandas object (e.g. DataFrame, Series, Index, Styler, ...)."""
202
+ return is_type(obj, _PANDAS_DATA_OBJECT_TYPE_RE)
203
+
204
+
182
205
  def is_snowpark_data_object(obj: object) -> bool:
183
206
  """True if obj is a Snowpark DataFrame or Table."""
184
207
  return is_type(obj, _SNOWPARK_TABLE_TYPE_STR) or is_type(obj, _SNOWPARK_DF_TYPE_STR)
@@ -186,13 +209,12 @@ def is_snowpark_data_object(obj: object) -> bool:
186
209
 
187
210
  def is_snowpark_row_list(obj: object) -> bool:
188
211
  """True if obj is a list of snowflake.snowpark.row.Row."""
189
- if not isinstance(obj, list):
190
- return False
191
- if len(obj) < 1:
192
- return False
193
- if not hasattr(obj[0], "__class__"):
194
- return False
195
- return is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
212
+ return (
213
+ isinstance(obj, list)
214
+ and len(obj) > 0
215
+ and is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
216
+ and has_callable_attr(obj[0], "as_dict")
217
+ )
196
218
 
197
219
 
198
220
  def is_pyspark_data_object(obj: object) -> bool:
@@ -221,6 +243,78 @@ def is_pandas_styler(obj: object) -> TypeGuard[Styler]:
221
243
  return is_type(obj, _PANDAS_STYLER_TYPE_STR)
222
244
 
223
245
 
246
+ def _is_list_of_scalars(data: Iterable[Any]) -> bool:
247
+ """Check if the list only contains scalar values."""
248
+ from pandas.api.types import infer_dtype
249
+
250
+ # Overview on all value that are interpreted as scalar:
251
+ # https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
252
+ return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
253
+
254
+
255
+ def _iterable_to_list(
256
+ iterable: Iterable[Any], max_iterations: int | None = None
257
+ ) -> list[Any]:
258
+ """Convert an iterable to a list.
259
+
260
+ Parameters
261
+ ----------
262
+ iterable : Iterable
263
+ The iterable to convert to a list.
264
+
265
+ max_iterations : int or None
266
+ The maximum number of iterations to perform. If None, all iterations are performed.
267
+
268
+ Returns
269
+ -------
270
+ list
271
+ The converted list.
272
+ """
273
+ if max_iterations is None:
274
+ return list(iterable)
275
+
276
+ result = []
277
+ for i, item in enumerate(iterable):
278
+ if i >= max_iterations:
279
+ break
280
+ result.append(item)
281
+ return result
282
+
283
+
284
+ def _fix_column_naming(data_df: DataFrame) -> DataFrame:
285
+ """Rename the first column to "value" if it is not named
286
+ and if there is only one column in the dataframe.
287
+
288
+ The default name of the first column is 0 if it is not named
289
+ which is not very descriptive.
290
+ """
291
+
292
+ if len(data_df.columns) == 1 and data_df.columns[0] == 0:
293
+ # Pandas automatically names the first column with 0 if it is not named.
294
+ # We rename it to "value" to make it more descriptive if there is only
295
+ # one column in the dataframe.
296
+ data_df.rename(columns={0: "value"}, inplace=True)
297
+ return data_df
298
+
299
+
300
+ def _dict_to_pandas_df(data: dict[Any, Any]) -> DataFrame:
301
+ """Convert a key-value dict to a Pandas DataFrame.
302
+
303
+ Parameters
304
+ ----------
305
+ data : dict
306
+ The dict to convert to a Pandas DataFrame.
307
+
308
+ Returns
309
+ -------
310
+ pandas.DataFrame
311
+ The converted Pandas DataFrame.
312
+ """
313
+ import pandas as pd
314
+
315
+ return _fix_column_naming(pd.DataFrame.from_dict(data, orient="index"))
316
+
317
+
224
318
  def convert_anything_to_pandas_df(
225
319
  data: Any,
226
320
  max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
@@ -246,81 +340,123 @@ def convert_anything_to_pandas_df(
246
340
  pandas.DataFrame
247
341
 
248
342
  """
343
+ import array
344
+
249
345
  import numpy as np
250
346
  import pandas as pd
251
347
 
252
348
  if isinstance(data, pd.DataFrame):
253
349
  return data.copy() if ensure_copy else cast(pd.DataFrame, data)
254
350
 
255
- if isinstance(data, (pd.Series, pd.Index)):
351
+ if isinstance(data, (pd.Series, pd.Index, pd.api.extensions.ExtensionArray)):
256
352
  return pd.DataFrame(data)
257
353
 
258
354
  if is_pandas_styler(data):
259
355
  return cast(pd.DataFrame, data.data.copy() if ensure_copy else data.data)
260
356
 
261
357
  if isinstance(data, np.ndarray):
262
- return pd.DataFrame([]) if len(data.shape) == 0 else pd.DataFrame(data)
358
+ return (
359
+ pd.DataFrame([])
360
+ if len(data.shape) == 0
361
+ else _fix_column_naming(pd.DataFrame(data))
362
+ )
263
363
 
264
364
  if is_modin_data_object(data):
265
365
  data = data.head(max_unevaluated_rows)._to_pandas()
266
366
 
267
- if isinstance(data, pd.Series):
367
+ if isinstance(data, (pd.Series, pd.Index)):
268
368
  data = data.to_frame()
269
369
 
270
370
  if data.shape[0] == max_unevaluated_rows:
271
- st.caption(
371
+ _show_data_information(
272
372
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
273
- "rows. Call `_to_pandas()` on the dataframe to show more."
373
+ "rows. Call `_to_pandas()` on the data object to show more."
274
374
  )
275
375
  return cast(pd.DataFrame, data)
276
376
 
277
377
  if is_pyspark_data_object(data):
278
378
  data = data.limit(max_unevaluated_rows).toPandas()
279
379
  if data.shape[0] == max_unevaluated_rows:
280
- st.caption(
380
+ _show_data_information(
381
+ f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
382
+ "rows. Call `toPandas()` on the data object to show more."
383
+ )
384
+ return cast(pd.DataFrame, data)
385
+
386
+ if is_snowpandas_data_object(data):
387
+ data = data[:max_unevaluated_rows].to_pandas()
388
+
389
+ if isinstance(data, (pd.Series, pd.Index)):
390
+ data = data.to_frame()
391
+
392
+ if data.shape[0] == max_unevaluated_rows:
393
+ _show_data_information(
281
394
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
282
- "rows. Call `toPandas()` on the dataframe to show more."
395
+ "rows. Call `to_pandas()` on the data object to show more."
283
396
  )
284
397
  return cast(pd.DataFrame, data)
285
398
 
286
399
  if is_snowpark_data_object(data):
287
400
  data = data.limit(max_unevaluated_rows).to_pandas()
288
401
  if data.shape[0] == max_unevaluated_rows:
289
- st.caption(
402
+ _show_data_information(
290
403
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
291
- "rows. Call `to_pandas()` on the dataframe to show more."
404
+ "rows. Call `to_pandas()` on the data object to show more."
292
405
  )
293
406
  return cast(pd.DataFrame, data)
294
407
 
295
- if is_snowpandas_data_object(data):
296
- data = data.head(max_unevaluated_rows).to_pandas()
408
+ if is_snowpark_row_list(data):
409
+ return pd.DataFrame([row.as_dict() for row in data])
297
410
 
298
- if isinstance(data, pd.Series):
299
- data = data.to_frame()
411
+ if has_callable_attr(data, "to_pandas"):
412
+ return pd.DataFrame(data.to_pandas())
413
+
414
+ # Support for generator functions
415
+ if inspect.isgeneratorfunction(data):
416
+ data = _fix_column_naming(
417
+ pd.DataFrame(_iterable_to_list(data(), max_iterations=max_unevaluated_rows))
418
+ )
300
419
 
301
420
  if data.shape[0] == max_unevaluated_rows:
302
- st.caption(
421
+ _show_data_information(
303
422
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
304
- "rows. Call `to_pandas()` on the dataframe to show more."
423
+ "rows. Convert the data to a list to show more."
305
424
  )
306
- return cast(pd.DataFrame, data)
425
+ return data
307
426
 
308
- # This is inefficient when data is a pyarrow.Table as it will be converted
309
- # back to Arrow when marshalled to protobuf, but area/bar/line charts need
310
- # DataFrame magic to generate the correct output.
311
- if hasattr(data, "to_pandas"):
312
- return pd.DataFrame(data.to_pandas())
427
+ if isinstance(data, EnumMeta):
428
+ # Support for enum classes
429
+ return _fix_column_naming(pd.DataFrame([c.value for c in data])) # type: ignore
430
+
431
+ # Support for some list like objects
432
+ if isinstance(data, (deque, map, array.ArrayType)):
433
+ return _fix_column_naming(pd.DataFrame(list(data)))
434
+
435
+ # Support for Streamlit's custom dict-like objects
436
+ if is_custom_dict(data):
437
+ return _dict_to_pandas_df(data.to_dict())
438
+
439
+ # Support for named tuples
440
+ if is_namedtuple(data):
441
+ return _dict_to_pandas_df(data._asdict())
442
+
443
+ # Support for dataclass instances
444
+ if is_dataclass_instance(data):
445
+ return _dict_to_pandas_df(dataclasses.asdict(data))
446
+
447
+ # Support for dict-like objects
448
+ if isinstance(data, (ChainMap, MappingProxyType, UserDict)):
449
+ return _dict_to_pandas_df(dict(data))
313
450
 
314
451
  # Try to convert to pandas.DataFrame. This will raise an error is df is not
315
452
  # compatible with the pandas.DataFrame constructor.
316
453
  try:
317
- return pd.DataFrame(data)
318
-
454
+ return _fix_column_naming(pd.DataFrame(data))
319
455
  except ValueError as ex:
320
456
  if isinstance(data, dict):
321
457
  with contextlib.suppress(ValueError):
322
458
  # Try to use index orient as back-up to support key-value dicts
323
- return pd.DataFrame.from_dict(data, orient="index")
459
+ return _dict_to_pandas_df(data)
324
460
  raise errors.StreamlitAPIException(
325
461
  f"""
326
462
  Unable to convert object of type `{type(data)}` to `pandas.DataFrame`.
@@ -419,6 +555,14 @@ def convert_arrow_bytes_to_pandas_df(source: bytes) -> DataFrame:
419
555
  return reader.read_pandas()
420
556
 
421
557
 
558
+ def _show_data_information(msg: str) -> None:
559
+ """Show a message to the user with important information
560
+ about the processed dataset."""
561
+ from streamlit.delta_generator import main_dg
562
+
563
+ main_dg.caption(msg)
564
+
565
+
422
566
  def convert_anything_to_arrow_bytes(
423
567
  data: Any,
424
568
  max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
@@ -449,8 +593,16 @@ def convert_anything_to_arrow_bytes(
449
593
  if isinstance(data, pa.Table):
450
594
  return convert_arrow_table_to_arrow_bytes(data)
451
595
 
596
+ if is_pandas_data_object(data):
597
+ # All pandas data objects should be handled via our pandas
598
+ # conversion logic. We are already calling it here
599
+ # to ensure that its not handled via the interchange
600
+ # protocol support below.
601
+ df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
602
+ return convert_pandas_df_to_arrow_bytes(df)
603
+
452
604
  # Fallback: try to convert to pandas DataFrame
453
- # and then to Arrow bytes
605
+ # and then to Arrow bytes.
454
606
  df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
455
607
  return convert_pandas_df_to_arrow_bytes(df)
456
608
 
@@ -475,7 +627,9 @@ def convert_anything_to_sequence(obj: OptionSequence[V_co]) -> Sequence[V_co]:
475
627
  if obj is None:
476
628
  return [] # type: ignore
477
629
 
478
- if isinstance(obj, (str, list, tuple, set, range, EnumMeta)):
630
+ if isinstance(
631
+ obj, (str, list, tuple, set, range, EnumMeta, deque, map)
632
+ ) and not is_snowpark_row_list(obj):
479
633
  # This also ensures that the sequence is copied to prevent
480
634
  # potential mutations to the original object.
481
635
  return list(obj)
@@ -569,8 +723,7 @@ def _maybe_truncate_table(
569
723
  # we just display the exact numbers.
570
724
  displayed_rows = str(table.num_rows)
571
725
  total_rows = str(table.num_rows + truncated_rows)
572
-
573
- st.caption(
726
+ _show_data_information(
574
727
  f"⚠️ Showing {displayed_rows} out of {total_rows} "
575
728
  "rows due to data size limitations."
576
729
  )
@@ -579,7 +732,8 @@ def _maybe_truncate_table(
579
732
 
580
733
 
581
734
  def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
582
- """Return True if the column type is known to cause issues during Arrow conversion."""
735
+ """Return True if the column type is known to cause issues during
736
+ Arrow conversion."""
583
737
  from pandas.api.types import infer_dtype, is_dict_like, is_list_like
584
738
 
585
739
  if column.dtype.kind in [
@@ -610,7 +764,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
610
764
  ]:
611
765
  return True
612
766
  elif inferred_type == "mixed":
613
- # This includes most of the more complex/custom types (objects, dicts, lists, ...)
767
+ # This includes most of the more complex/custom types (objects, dicts,
768
+ # lists, ...)
614
769
  if len(column) == 0 or not hasattr(column, "iloc"):
615
770
  # The column seems to be invalid, so we assume it is incompatible.
616
771
  # But this would most likely never happen since empty columns
@@ -622,7 +777,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
622
777
 
623
778
  if (
624
779
  not is_list_like(first_value)
625
- # dicts are list-like, but have issues in Arrow JS (see comments in Quiver.ts)
780
+ # dicts are list-like, but have issues in Arrow JS (see comments in
781
+ # Quiver.ts)
626
782
  or is_dict_like(first_value)
627
783
  # Frozensets are list-like, but are not compatible with pyarrow.
628
784
  or isinstance(first_value, frozenset)
@@ -684,15 +840,6 @@ def fix_arrow_incompatible_column_types(
684
840
  return df_copy if df_copy is not None else df
685
841
 
686
842
 
687
- def _is_list_of_scalars(data: Iterable[Any]) -> bool:
688
- """Check if the list only contains scalar values."""
689
- from pandas.api.types import infer_dtype
690
-
691
- # Overview on all value that are interpreted as scalar:
692
- # https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
693
- return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
694
-
695
-
696
843
  def determine_data_format(input_data: Any) -> DataFormat:
697
844
  """Determine the data format of the input data.
698
845
 
@@ -706,6 +853,8 @@ def determine_data_format(input_data: Any) -> DataFormat:
706
853
  DataFormat
707
854
  The data format of the input data.
708
855
  """
856
+ import array
857
+
709
858
  import numpy as np
710
859
  import pandas as pd
711
860
  import pyarrow as pa
@@ -722,26 +871,43 @@ def determine_data_format(input_data: Any) -> DataFormat:
722
871
  return DataFormat.NUMPY_MATRIX
723
872
  elif isinstance(input_data, pa.Table):
724
873
  return DataFormat.PYARROW_TABLE
874
+ elif isinstance(input_data, pa.Array):
875
+ return DataFormat.PYARROW_ARRAY
725
876
  elif isinstance(input_data, pd.Series):
726
877
  return DataFormat.PANDAS_SERIES
727
878
  elif isinstance(input_data, pd.Index):
728
879
  return DataFormat.PANDAS_INDEX
729
880
  elif is_pandas_styler(input_data):
730
881
  return DataFormat.PANDAS_STYLER
731
- elif is_snowpark_data_object(input_data):
732
- return DataFormat.SNOWPARK_OBJECT
882
+ elif isinstance(input_data, pd.api.extensions.ExtensionArray):
883
+ return DataFormat.PANDAS_ARRAY
733
884
  elif is_modin_data_object(input_data):
734
885
  return DataFormat.MODIN_OBJECT
735
886
  elif is_snowpandas_data_object(input_data):
736
887
  return DataFormat.SNOWPANDAS_OBJECT
737
888
  elif is_pyspark_data_object(input_data):
738
889
  return DataFormat.PYSPARK_OBJECT
739
- elif isinstance(input_data, (list, tuple, set)):
890
+ elif is_snowpark_data_object(input_data) or is_snowpark_row_list(input_data):
891
+ return DataFormat.SNOWPARK_OBJECT
892
+ elif isinstance(
893
+ input_data, (range, EnumMeta, KeysView, ValuesView, deque, map, array.ArrayType)
894
+ ):
895
+ return DataFormat.LIST_OF_VALUES
896
+ elif (
897
+ isinstance(input_data, (ChainMap, MappingProxyType, UserDict))
898
+ or is_dataclass_instance(input_data)
899
+ or is_namedtuple(input_data)
900
+ or is_custom_dict(input_data)
901
+ ):
902
+ return DataFormat.KEY_VALUE_DICT
903
+ elif isinstance(input_data, (ItemsView, enumerate)):
904
+ return DataFormat.LIST_OF_ROWS
905
+ elif isinstance(input_data, (list, tuple, set, frozenset)):
740
906
  if _is_list_of_scalars(input_data):
741
907
  # -> one-dimensional data structure
742
908
  if isinstance(input_data, tuple):
743
909
  return DataFormat.TUPLE_OF_VALUES
744
- if isinstance(input_data, set):
910
+ if isinstance(input_data, (set, frozenset)):
745
911
  return DataFormat.SET_OF_VALUES
746
912
  return DataFormat.LIST_OF_VALUES
747
913
  else:
@@ -751,23 +917,23 @@ def determine_data_format(input_data: Any) -> DataFormat:
751
917
  first_element = next(iter(input_data))
752
918
  if isinstance(first_element, dict):
753
919
  return DataFormat.LIST_OF_RECORDS
754
- if isinstance(first_element, (list, tuple, set)):
920
+ if isinstance(first_element, (list, tuple, set, frozenset)):
755
921
  return DataFormat.LIST_OF_ROWS
756
922
  elif isinstance(input_data, dict):
757
923
  if not input_data:
758
924
  return DataFormat.KEY_VALUE_DICT
759
925
  if len(input_data) > 0:
760
926
  first_value = next(iter(input_data.values()))
927
+ # In the future, we could potentially also support tight & split formats
761
928
  if isinstance(first_value, dict):
762
929
  return DataFormat.COLUMN_INDEX_MAPPING
763
930
  if isinstance(first_value, (list, tuple)):
764
931
  return DataFormat.COLUMN_VALUE_MAPPING
765
932
  if isinstance(first_value, pd.Series):
766
933
  return DataFormat.COLUMN_SERIES_MAPPING
767
- # In the future, we could potentially also support the tight & split formats here
768
- if _is_list_of_scalars(input_data.values()):
769
- # Only use the key-value dict format if the values are only scalar values
770
- return DataFormat.KEY_VALUE_DICT
934
+ # Use key-value dict as fallback. However, if the values of the dict
935
+ # contains mixed types, it will become non-editable in the frontend.
936
+ return DataFormat.KEY_VALUE_DICT
771
937
  return DataFormat.UNKNOWN
772
938
 
773
939
 
@@ -783,12 +949,30 @@ def _unify_missing_values(df: DataFrame) -> DataFrame:
783
949
  return df.fillna(np.nan).replace([np.nan], [None])
784
950
 
785
951
 
952
+ def _pandas_df_to_series(df: DataFrame) -> Series[Any]:
953
+ """Convert a Pandas DataFrame to a Pandas Series by selecting the first column.
954
+
955
+ Raises
956
+ ------
957
+ ValueError
958
+ If the DataFrame has more than one column.
959
+ """
960
+ # Select first column in dataframe and create a new series based on the values
961
+ if len(df.columns) != 1:
962
+ raise ValueError(
963
+ "DataFrame is expected to have a single column but "
964
+ f"has {len(df.columns)}."
965
+ )
966
+ return df[df.columns[0]]
967
+
968
+
786
969
  def convert_pandas_df_to_data_format(
787
970
  df: DataFrame, data_format: DataFormat
788
971
  ) -> (
789
972
  DataFrame
790
973
  | Series[Any]
791
974
  | pa.Table
975
+ | pa.Array
792
976
  | np.ndarray[Any, np.dtype[Any]]
793
977
  | tuple[Any]
794
978
  | list[Any]
@@ -818,6 +1002,7 @@ def convert_pandas_df_to_data_format(
818
1002
  DataFormat.PYSPARK_OBJECT,
819
1003
  DataFormat.PANDAS_INDEX,
820
1004
  DataFormat.PANDAS_STYLER,
1005
+ DataFormat.PANDAS_ARRAY,
821
1006
  DataFormat.MODIN_OBJECT,
822
1007
  DataFormat.SNOWPANDAS_OBJECT,
823
1008
  ]:
@@ -838,13 +1023,12 @@ def convert_pandas_df_to_data_format(
838
1023
  import pyarrow as pa
839
1024
 
840
1025
  return pa.Table.from_pandas(df)
1026
+ elif data_format == DataFormat.PYARROW_ARRAY:
1027
+ import pyarrow as pa
1028
+
1029
+ return pa.Array.from_pandas(_pandas_df_to_series(df))
841
1030
  elif data_format == DataFormat.PANDAS_SERIES:
842
- # Select first column in dataframe and create a new series based on the values
843
- if len(df.columns) != 1:
844
- raise ValueError(
845
- f"DataFrame is expected to have a single column but has {len(df.columns)}."
846
- )
847
- return df[df.columns[0]]
1031
+ return _pandas_df_to_series(df)
848
1032
  elif data_format == DataFormat.LIST_OF_RECORDS:
849
1033
  return _unify_missing_values(df).to_dict(orient="records")
850
1034
  elif data_format == DataFormat.LIST_OF_ROWS:
@@ -868,7 +1052,8 @@ def convert_pandas_df_to_data_format(
868
1052
  return_list = df[df.columns[0]].tolist()
869
1053
  elif len(df.columns) >= 1:
870
1054
  raise ValueError(
871
- f"DataFrame is expected to have a single column but has {len(df.columns)}."
1055
+ "DataFrame is expected to have a single column but "
1056
+ f"has {len(df.columns)}."
872
1057
  )
873
1058
  if data_format == DataFormat.TUPLE_OF_VALUES:
874
1059
  return tuple(return_list)
@@ -708,7 +708,8 @@ def _prep_data_for_add_rows(
708
708
  ) -> tuple[Data, AddRowsMetadata | None]:
709
709
  if not add_rows_metadata:
710
710
  if dataframe_util.is_pandas_styler(data):
711
- # When calling add_rows on st.table or st.dataframe we want styles to pass through.
711
+ # When calling add_rows on st.table or st.dataframe we want styles to
712
+ # pass through.
712
713
  return data, None
713
714
  return dataframe_util.convert_anything_to_pandas_df(data), None
714
715
 
@@ -513,12 +513,7 @@ class ArrowMixin:
513
513
  data_df = dataframe_util.convert_anything_to_pandas_df(
514
514
  data, ensure_copy=False
515
515
  )
516
- apply_data_specific_configs(
517
- column_config_mapping,
518
- data_df,
519
- data_format,
520
- check_arrow_compatibility=False,
521
- )
516
+ apply_data_specific_configs(column_config_mapping, data_format)
522
517
  # Serialize the data to bytes:
523
518
  proto.data = dataframe_util.convert_pandas_df_to_arrow_bytes(data_df)
524
519
 
@@ -599,8 +594,10 @@ class ArrowMixin:
599
594
 
600
595
  """
601
596
 
602
- # Check if data is uncollected, and collect it but with 100 rows max, instead of 10k rows, which is done in all other cases.
603
- # Avoid this and use 100 rows in st.table, because large tables render slowly, take too much screen space, and can crush the app.
597
+ # Check if data is uncollected, and collect it but with 100 rows max, instead of
598
+ # 10k rows, which is done in all other cases.
599
+ # We use 100 rows in st.table, because large tables render slowly,
600
+ # take too much screen space, and can crush the app.
604
601
  if dataframe_util.is_unevaluated_data_object(data):
605
602
  data = dataframe_util.convert_anything_to_pandas_df(
606
603
  data, max_unevaluated_rows=100