streamlit-nightly 1.37.2.dev20240805__py2.py3-none-any.whl → 1.37.2.dev20240807__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. streamlit/dataframe_util.py +328 -72
  2. streamlit/delta_generator.py +2 -1
  3. streamlit/elements/arrow.py +5 -8
  4. streamlit/elements/json.py +13 -13
  5. streamlit/elements/lib/column_config_utils.py +1 -35
  6. streamlit/elements/widgets/data_editor.py +28 -9
  7. streamlit/elements/write.py +21 -28
  8. streamlit/runtime/caching/cache_utils.py +12 -9
  9. streamlit/static/asset-manifest.json +16 -15
  10. streamlit/static/index.html +1 -1
  11. streamlit/static/static/js/{1451.e9542cc9.chunk.js → 1451.913b0f90.chunk.js} +1 -1
  12. streamlit/static/static/js/{1792.8bd6ce2a.chunk.js → 1792.eb8a836f.chunk.js} +1 -1
  13. streamlit/static/static/js/{2469.31e2695e.chunk.js → 2469.c4454803.chunk.js} +1 -1
  14. streamlit/static/static/js/3466.0cd981ca.chunk.js +2 -0
  15. streamlit/static/static/js/{3466.bb8e2e0c.chunk.js.LICENSE.txt → 3466.0cd981ca.chunk.js.LICENSE.txt} +10 -0
  16. streamlit/static/static/js/{3513.7dedbda2.chunk.js → 3513.577f3dc5.chunk.js} +1 -1
  17. streamlit/static/static/js/{4113.99983645.chunk.js → 4113.8b8c523d.chunk.js} +1 -1
  18. streamlit/static/static/js/{4335.fb48fd8e.chunk.js → 4335.2b77e051.chunk.js} +1 -1
  19. streamlit/static/static/js/{4477.b85593dd.chunk.js → 4477.edb1d80a.chunk.js} +1 -1
  20. streamlit/static/static/js/{5106.e53c485c.chunk.js → 5106.656a5db4.chunk.js} +1 -1
  21. streamlit/static/static/js/5267.b73f42da.chunk.js +2 -0
  22. streamlit/static/static/js/5267.b73f42da.chunk.js.LICENSE.txt +1 -0
  23. streamlit/static/static/js/{6853.93dd1c4c.chunk.js → 6853.e7b24972.chunk.js} +1 -1
  24. streamlit/static/static/js/8148.539ddabe.chunk.js +1 -0
  25. streamlit/static/static/js/{8427.e051b59f.chunk.js → 8427.4594845a.chunk.js} +1 -1
  26. streamlit/static/static/js/{8477.4d2a23c2.chunk.js → 8477.90b06bd9.chunk.js} +1 -1
  27. streamlit/static/static/js/main.80efcd23.js +2 -0
  28. streamlit/static/static/js/{main.95daee38.js.LICENSE.txt → main.80efcd23.js.LICENSE.txt} +23 -1
  29. streamlit/type_util.py +46 -14
  30. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/METADATA +2 -2
  31. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/RECORD +35 -33
  32. streamlit/static/static/js/3466.bb8e2e0c.chunk.js +0 -2
  33. streamlit/static/static/js/8148.c7db8490.chunk.js +0 -1
  34. streamlit/static/static/js/main.95daee38.js +0 -2
  35. {streamlit_nightly-1.37.2.dev20240805.data → streamlit_nightly-1.37.2.dev20240807.data}/scripts/streamlit.cmd +0 -0
  36. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/WHEEL +0 -0
  37. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/entry_points.txt +0 -0
  38. {streamlit_nightly-1.37.2.dev20240805.dist-info → streamlit_nightly-1.37.2.dev20240807.dist-info}/top_level.txt +0 -0
@@ -17,8 +17,14 @@
17
17
  from __future__ import annotations
18
18
 
19
19
  import contextlib
20
+ import dataclasses
21
+ import inspect
20
22
  import math
23
+ import re
24
+ from collections import ChainMap, UserDict, deque
25
+ from collections.abc import ItemsView, KeysView, ValuesView
21
26
  from enum import Enum, EnumMeta, auto
27
+ from types import MappingProxyType
22
28
  from typing import (
23
29
  TYPE_CHECKING,
24
30
  Any,
@@ -34,9 +40,14 @@ from typing import (
34
40
 
35
41
  from typing_extensions import TypeAlias, TypeGuard
36
42
 
37
- import streamlit as st
38
43
  from streamlit import config, errors, logger, string_util
39
- from streamlit.type_util import is_type
44
+ from streamlit.type_util import (
45
+ has_callable_attr,
46
+ is_custom_dict,
47
+ is_dataclass_instance,
48
+ is_namedtuple,
49
+ is_type,
50
+ )
40
51
 
41
52
  if TYPE_CHECKING:
42
53
  import numpy as np
@@ -51,6 +62,7 @@ _LOGGER: Final = logger.get_logger(__name__)
51
62
  # Maximum number of rows to request from an unevaluated (out-of-core) dataframe
52
63
  _MAX_UNEVALUATED_DF_ROWS = 10000
53
64
 
65
+ _PANDAS_DATA_OBJECT_TYPE_RE: Final = re.compile(r"^pandas.*$")
54
66
  _PANDAS_STYLER_TYPE_STR: Final = "pandas.io.formats.style.Styler"
55
67
  _SNOWPARK_DF_TYPE_STR: Final = "snowflake.snowpark.dataframe.DataFrame"
56
68
  _SNOWPARK_DF_ROW_TYPE_STR: Final = "snowflake.snowpark.row.Row"
@@ -60,7 +72,12 @@ _MODIN_DF_TYPE_STR: Final = "modin.pandas.dataframe.DataFrame"
60
72
  _MODIN_SERIES_TYPE_STR: Final = "modin.pandas.series.Series"
61
73
  _SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.DataFrame"
62
74
  _SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
63
-
75
+ _SNOWPANDAS_INDEX_TYPE_STR: Final = (
76
+ "snowflake.snowpark.modin.plugin.extensions.index.Index"
77
+ )
78
+ _POLARS_DATAFRAME: Final = "polars.dataframe.frame.DataFrame"
79
+ _POLARS_SERIES: Final = "polars.series.series.Series"
80
+ _POLARS_LAZYFRAME: Final = "polars.lazyframe.frame.LazyFrame"
64
81
 
65
82
  V_co = TypeVar(
66
83
  "V_co",
@@ -111,14 +128,19 @@ class DataFormat(Enum):
111
128
  PANDAS_DATAFRAME = auto() # pd.DataFrame
112
129
  PANDAS_SERIES = auto() # pd.Series
113
130
  PANDAS_INDEX = auto() # pd.Index
131
+ PANDAS_ARRAY = auto() # pd.array
114
132
  NUMPY_LIST = auto() # np.array[Scalar]
115
133
  NUMPY_MATRIX = auto() # np.array[List[Scalar]]
116
134
  PYARROW_TABLE = auto() # pyarrow.Table
135
+ PYARROW_ARRAY = auto() # pyarrow.Array
117
136
  SNOWPARK_OBJECT = auto() # Snowpark DataFrame, Table, List[Row]
118
137
  PYSPARK_OBJECT = auto() # pyspark.DataFrame
119
138
  MODIN_OBJECT = auto() # Modin DataFrame, Series
120
139
  SNOWPANDAS_OBJECT = auto() # Snowpandas DataFrame, Series
121
140
  PANDAS_STYLER = auto() # pandas Styler
141
+ POLARS_DATAFRAME = auto() # polars.dataframe.frame.DataFrame
142
+ POLARS_LAZYFRAME = auto() # polars.lazyframe.frame.LazyFrame
143
+ POLARS_SERIES = auto() # polars.series.series.Series
122
144
  LIST_OF_RECORDS = auto() # List[Dict[str, Scalar]]
123
145
  LIST_OF_ROWS = auto() # List[List[Scalar]]
124
146
  LIST_OF_VALUES = auto() # List[Scalar]
@@ -136,9 +158,9 @@ def is_dataframe_like(obj: object) -> bool:
136
158
  This does not include basic collection types like list, dict, tuple, etc.
137
159
  """
138
160
 
139
- if obj is None or isinstance(
140
- obj, (list, tuple, set, dict, str, bytes, int, float, bool)
141
- ):
161
+ # We exclude list and dict here since there are some cases where a list or dict is
162
+ # considered a dataframe-like object.
163
+ if obj is None or isinstance(obj, (tuple, set, str, bytes, int, float, bool)):
142
164
  # Basic types are not considered dataframe-like, so we can
143
165
  # return False early to avoid unnecessary checks.
144
166
  return False
@@ -148,13 +170,19 @@ def is_dataframe_like(obj: object) -> bool:
148
170
  DataFormat.PANDAS_SERIES,
149
171
  DataFormat.PANDAS_INDEX,
150
172
  DataFormat.PANDAS_STYLER,
173
+ DataFormat.PANDAS_ARRAY,
151
174
  DataFormat.NUMPY_LIST,
152
175
  DataFormat.NUMPY_MATRIX,
153
176
  DataFormat.PYARROW_TABLE,
177
+ DataFormat.PYARROW_ARRAY,
154
178
  DataFormat.SNOWPARK_OBJECT,
155
179
  DataFormat.PYSPARK_OBJECT,
156
180
  DataFormat.MODIN_OBJECT,
157
181
  DataFormat.SNOWPANDAS_OBJECT,
182
+ DataFormat.POLARS_SERIES,
183
+ DataFormat.POLARS_DATAFRAME,
184
+ DataFormat.POLARS_LAZYFRAME,
185
+ DataFormat.COLUMN_SERIES_MAPPING,
158
186
  ]
159
187
 
160
188
 
@@ -165,7 +193,9 @@ def is_unevaluated_data_object(obj: object) -> bool:
165
193
  - Snowpark DataFrame / Table
166
194
  - PySpark DataFrame
167
195
  - Modin DataFrame / Series
168
- - Snowpandas DataFrame / Series
196
+ - Snowpandas DataFrame / Series / Index
197
+ - Polars LazyFrame
198
+ - Generator functions
169
199
 
170
200
  Unevaluated means that the data is not yet in the local memory.
171
201
  Unevaluated data objects are treated differently from other data objects by only
@@ -176,9 +206,16 @@ def is_unevaluated_data_object(obj: object) -> bool:
176
206
  or is_pyspark_data_object(obj)
177
207
  or is_snowpandas_data_object(obj)
178
208
  or is_modin_data_object(obj)
209
+ or is_polars_lazyframe(obj)
210
+ or inspect.isgeneratorfunction(obj)
179
211
  )
180
212
 
181
213
 
214
+ def is_pandas_data_object(obj: object) -> bool:
215
+ """True if obj is a Pandas object (e.g. DataFrame, Series, Index, Styler, ...)."""
216
+ return is_type(obj, _PANDAS_DATA_OBJECT_TYPE_RE)
217
+
218
+
182
219
  def is_snowpark_data_object(obj: object) -> bool:
183
220
  """True if obj is a Snowpark DataFrame or Table."""
184
221
  return is_type(obj, _SNOWPARK_TABLE_TYPE_STR) or is_type(obj, _SNOWPARK_DF_TYPE_STR)
@@ -186,13 +223,12 @@ def is_snowpark_data_object(obj: object) -> bool:
186
223
 
187
224
  def is_snowpark_row_list(obj: object) -> bool:
188
225
  """True if obj is a list of snowflake.snowpark.row.Row."""
189
- if not isinstance(obj, list):
190
- return False
191
- if len(obj) < 1:
192
- return False
193
- if not hasattr(obj[0], "__class__"):
194
- return False
195
- return is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
226
+ return (
227
+ isinstance(obj, list)
228
+ and len(obj) > 0
229
+ and is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
230
+ and has_callable_attr(obj[0], "as_dict")
231
+ )
196
232
 
197
233
 
198
234
  def is_pyspark_data_object(obj: object) -> bool:
@@ -211,16 +247,105 @@ def is_modin_data_object(obj: object) -> bool:
211
247
 
212
248
  def is_snowpandas_data_object(obj: object) -> bool:
213
249
  """True if obj is a Snowpark Pandas DataFrame or Series."""
214
- return is_type(obj, _SNOWPANDAS_DF_TYPE_STR) or is_type(
215
- obj, _SNOWPANDAS_SERIES_TYPE_STR
250
+ return (
251
+ is_type(obj, _SNOWPANDAS_DF_TYPE_STR)
252
+ or is_type(obj, _SNOWPANDAS_SERIES_TYPE_STR)
253
+ or is_type(obj, _SNOWPANDAS_INDEX_TYPE_STR)
216
254
  )
217
255
 
218
256
 
257
+ def is_polars_dataframe(obj: object) -> bool:
258
+ """True if obj is a Polars Dataframe."""
259
+ return is_type(obj, _POLARS_DATAFRAME)
260
+
261
+
262
+ def is_polars_series(obj: object) -> bool:
263
+ """True if obj is a Polars Series."""
264
+ return is_type(obj, _POLARS_SERIES)
265
+
266
+
267
+ def is_polars_lazyframe(obj: object) -> bool:
268
+ """True if obj is a Polars Lazyframe."""
269
+ return is_type(obj, _POLARS_LAZYFRAME)
270
+
271
+
219
272
  def is_pandas_styler(obj: object) -> TypeGuard[Styler]:
220
273
  """True if obj is a pandas Styler."""
221
274
  return is_type(obj, _PANDAS_STYLER_TYPE_STR)
222
275
 
223
276
 
277
+ def _is_list_of_scalars(data: Iterable[Any]) -> bool:
278
+ """Check if the list only contains scalar values."""
279
+ from pandas.api.types import infer_dtype
280
+
281
+ # Overview on all value that are interpreted as scalar:
282
+ # https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
283
+ return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
284
+
285
+
286
+ def _iterable_to_list(
287
+ iterable: Iterable[Any], max_iterations: int | None = None
288
+ ) -> list[Any]:
289
+ """Convert an iterable to a list.
290
+
291
+ Parameters
292
+ ----------
293
+ iterable : Iterable
294
+ The iterable to convert to a list.
295
+
296
+ max_iterations : int or None
297
+ The maximum number of iterations to perform. If None, all iterations are performed.
298
+
299
+ Returns
300
+ -------
301
+ list
302
+ The converted list.
303
+ """
304
+ if max_iterations is None:
305
+ return list(iterable)
306
+
307
+ result = []
308
+ for i, item in enumerate(iterable):
309
+ if i >= max_iterations:
310
+ break
311
+ result.append(item)
312
+ return result
313
+
314
+
315
+ def _fix_column_naming(data_df: DataFrame) -> DataFrame:
316
+ """Rename the first column to "value" if it is not named
317
+ and if there is only one column in the dataframe.
318
+
319
+ The default name of the first column is 0 if it is not named
320
+ which is not very descriptive.
321
+ """
322
+
323
+ if len(data_df.columns) == 1 and data_df.columns[0] == 0:
324
+ # Pandas automatically names the first column with 0 if it is not named.
325
+ # We rename it to "value" to make it more descriptive if there is only
326
+ # one column in the dataframe.
327
+ data_df.rename(columns={0: "value"}, inplace=True)
328
+ return data_df
329
+
330
+
331
+ def _dict_to_pandas_df(data: dict[Any, Any]) -> DataFrame:
332
+ """Convert a key-value dict to a Pandas DataFrame.
333
+
334
+ Parameters
335
+ ----------
336
+ data : dict
337
+ The dict to convert to a Pandas DataFrame.
338
+
339
+ Returns
340
+ -------
341
+ pandas.DataFrame
342
+ The converted Pandas DataFrame.
343
+ """
344
+ import pandas as pd
345
+
346
+ return _fix_column_naming(pd.DataFrame.from_dict(data, orient="index"))
347
+
348
+
224
349
  def convert_anything_to_pandas_df(
225
350
  data: Any,
226
351
  max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
@@ -246,81 +371,140 @@ def convert_anything_to_pandas_df(
246
371
  pandas.DataFrame
247
372
 
248
373
  """
374
+ import array
375
+
249
376
  import numpy as np
250
377
  import pandas as pd
251
378
 
252
379
  if isinstance(data, pd.DataFrame):
253
380
  return data.copy() if ensure_copy else cast(pd.DataFrame, data)
254
381
 
255
- if isinstance(data, (pd.Series, pd.Index)):
382
+ if isinstance(data, (pd.Series, pd.Index, pd.api.extensions.ExtensionArray)):
256
383
  return pd.DataFrame(data)
257
384
 
258
385
  if is_pandas_styler(data):
259
386
  return cast(pd.DataFrame, data.data.copy() if ensure_copy else data.data)
260
387
 
261
388
  if isinstance(data, np.ndarray):
262
- return pd.DataFrame([]) if len(data.shape) == 0 else pd.DataFrame(data)
389
+ return (
390
+ pd.DataFrame([])
391
+ if len(data.shape) == 0
392
+ else _fix_column_naming(pd.DataFrame(data))
393
+ )
394
+
395
+ if is_polars_dataframe(data):
396
+ data = data.clone() if ensure_copy else data
397
+ return data.to_pandas()
398
+
399
+ if is_polars_series(data):
400
+ data = data.clone() if ensure_copy else data
401
+ return data.to_pandas().to_frame()
402
+
403
+ if is_polars_lazyframe(data):
404
+ data = data.limit(max_unevaluated_rows).collect().to_pandas()
405
+ if data.shape[0] == max_unevaluated_rows:
406
+ _show_data_information(
407
+ f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
408
+ "rows. Call `collect()` on the dataframe to show more."
409
+ )
410
+ return cast(pd.DataFrame, data)
263
411
 
264
412
  if is_modin_data_object(data):
265
413
  data = data.head(max_unevaluated_rows)._to_pandas()
266
414
 
267
- if isinstance(data, pd.Series):
415
+ if isinstance(data, (pd.Series, pd.Index)):
268
416
  data = data.to_frame()
269
417
 
270
418
  if data.shape[0] == max_unevaluated_rows:
271
- st.caption(
419
+ _show_data_information(
272
420
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
273
- "rows. Call `_to_pandas()` on the dataframe to show more."
421
+ "rows. Call `_to_pandas()` on the data object to show more."
274
422
  )
275
423
  return cast(pd.DataFrame, data)
276
424
 
277
425
  if is_pyspark_data_object(data):
278
426
  data = data.limit(max_unevaluated_rows).toPandas()
279
427
  if data.shape[0] == max_unevaluated_rows:
280
- st.caption(
428
+ _show_data_information(
429
+ f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
430
+ "rows. Call `toPandas()` on the data object to show more."
431
+ )
432
+ return cast(pd.DataFrame, data)
433
+
434
+ if is_snowpandas_data_object(data):
435
+ data = data[:max_unevaluated_rows].to_pandas()
436
+
437
+ if isinstance(data, (pd.Series, pd.Index)):
438
+ data = data.to_frame()
439
+
440
+ if data.shape[0] == max_unevaluated_rows:
441
+ _show_data_information(
281
442
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
282
- "rows. Call `toPandas()` on the dataframe to show more."
443
+ "rows. Call `to_pandas()` on the data object to show more."
283
444
  )
284
445
  return cast(pd.DataFrame, data)
285
446
 
286
447
  if is_snowpark_data_object(data):
287
448
  data = data.limit(max_unevaluated_rows).to_pandas()
288
449
  if data.shape[0] == max_unevaluated_rows:
289
- st.caption(
450
+ _show_data_information(
290
451
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
291
- "rows. Call `to_pandas()` on the dataframe to show more."
452
+ "rows. Call `to_pandas()` on the data object to show more."
292
453
  )
293
454
  return cast(pd.DataFrame, data)
294
455
 
295
- if is_snowpandas_data_object(data):
296
- data = data.head(max_unevaluated_rows).to_pandas()
456
+ if is_snowpark_row_list(data):
457
+ return pd.DataFrame([row.as_dict() for row in data])
297
458
 
298
- if isinstance(data, pd.Series):
299
- data = data.to_frame()
459
+ if has_callable_attr(data, "to_pandas"):
460
+ return pd.DataFrame(data.to_pandas())
461
+
462
+ # Support for generator functions
463
+ if inspect.isgeneratorfunction(data):
464
+ data = _fix_column_naming(
465
+ pd.DataFrame(_iterable_to_list(data(), max_iterations=max_unevaluated_rows))
466
+ )
300
467
 
301
468
  if data.shape[0] == max_unevaluated_rows:
302
- st.caption(
469
+ _show_data_information(
303
470
  f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
304
- "rows. Call `to_pandas()` on the dataframe to show more."
471
+ "rows. Convert the data to a list to show more."
305
472
  )
306
- return cast(pd.DataFrame, data)
473
+ return data
307
474
 
308
- # This is inefficient when data is a pyarrow.Table as it will be converted
309
- # back to Arrow when marshalled to protobuf, but area/bar/line charts need
310
- # DataFrame magic to generate the correct output.
311
- if hasattr(data, "to_pandas"):
312
- return pd.DataFrame(data.to_pandas())
475
+ if isinstance(data, EnumMeta):
476
+ # Support for enum classes
477
+ return _fix_column_naming(pd.DataFrame([c.value for c in data])) # type: ignore
478
+
479
+ # Support for some list like objects
480
+ if isinstance(data, (deque, map, array.ArrayType)):
481
+ return _fix_column_naming(pd.DataFrame(list(data)))
482
+
483
+ # Support for Streamlit's custom dict-like objects
484
+ if is_custom_dict(data):
485
+ return _dict_to_pandas_df(data.to_dict())
486
+
487
+ # Support for named tuples
488
+ if is_namedtuple(data):
489
+ return _dict_to_pandas_df(data._asdict())
490
+
491
+ # Support for dataclass instances
492
+ if is_dataclass_instance(data):
493
+ return _dict_to_pandas_df(dataclasses.asdict(data))
494
+
495
+ # Support for dict-like objects
496
+ if isinstance(data, (ChainMap, MappingProxyType, UserDict)):
497
+ return _dict_to_pandas_df(dict(data))
313
498
 
314
499
  # Try to convert to pandas.DataFrame. This will raise an error is df is not
315
500
  # compatible with the pandas.DataFrame constructor.
316
501
  try:
317
- return pd.DataFrame(data)
318
-
502
+ return _fix_column_naming(pd.DataFrame(data))
319
503
  except ValueError as ex:
320
504
  if isinstance(data, dict):
321
505
  with contextlib.suppress(ValueError):
322
506
  # Try to use index orient as back-up to support key-value dicts
323
- return pd.DataFrame.from_dict(data, orient="index")
507
+ return _dict_to_pandas_df(data)
324
508
  raise errors.StreamlitAPIException(
325
509
  f"""
326
510
  Unable to convert object of type `{type(data)}` to `pandas.DataFrame`.
@@ -419,6 +603,14 @@ def convert_arrow_bytes_to_pandas_df(source: bytes) -> DataFrame:
419
603
  return reader.read_pandas()
420
604
 
421
605
 
606
+ def _show_data_information(msg: str) -> None:
607
+ """Show a message to the user with important information
608
+ about the processed dataset."""
609
+ from streamlit.delta_generator import main_dg
610
+
611
+ main_dg.caption(msg)
612
+
613
+
422
614
  def convert_anything_to_arrow_bytes(
423
615
  data: Any,
424
616
  max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
@@ -449,8 +641,22 @@ def convert_anything_to_arrow_bytes(
449
641
  if isinstance(data, pa.Table):
450
642
  return convert_arrow_table_to_arrow_bytes(data)
451
643
 
644
+ if is_pandas_data_object(data):
645
+ # All pandas data objects should be handled via our pandas
646
+ # conversion logic. We are already calling it here
647
+ # to ensure that its not handled via the interchange
648
+ # protocol support below.
649
+ df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
650
+ return convert_pandas_df_to_arrow_bytes(df)
651
+
652
+ if is_polars_dataframe(data):
653
+ return convert_arrow_table_to_arrow_bytes(data.to_arrow())
654
+
655
+ if is_polars_series(data):
656
+ return convert_arrow_table_to_arrow_bytes(data.to_frame().to_arrow())
657
+
452
658
  # Fallback: try to convert to pandas DataFrame
453
- # and then to Arrow bytes
659
+ # and then to Arrow bytes.
454
660
  df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
455
661
  return convert_pandas_df_to_arrow_bytes(df)
456
662
 
@@ -475,7 +681,9 @@ def convert_anything_to_sequence(obj: OptionSequence[V_co]) -> Sequence[V_co]:
475
681
  if obj is None:
476
682
  return [] # type: ignore
477
683
 
478
- if isinstance(obj, (str, list, tuple, set, range, EnumMeta)):
684
+ if isinstance(
685
+ obj, (str, list, tuple, set, range, EnumMeta, deque, map)
686
+ ) and not is_snowpark_row_list(obj):
479
687
  # This also ensures that the sequence is copied to prevent
480
688
  # potential mutations to the original object.
481
689
  return list(obj)
@@ -569,8 +777,7 @@ def _maybe_truncate_table(
569
777
  # we just display the exact numbers.
570
778
  displayed_rows = str(table.num_rows)
571
779
  total_rows = str(table.num_rows + truncated_rows)
572
-
573
- st.caption(
780
+ _show_data_information(
574
781
  f"⚠️ Showing {displayed_rows} out of {total_rows} "
575
782
  "rows due to data size limitations."
576
783
  )
@@ -579,7 +786,8 @@ def _maybe_truncate_table(
579
786
 
580
787
 
581
788
  def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
582
- """Return True if the column type is known to cause issues during Arrow conversion."""
789
+ """Return True if the column type is known to cause issues during
790
+ Arrow conversion."""
583
791
  from pandas.api.types import infer_dtype, is_dict_like, is_list_like
584
792
 
585
793
  if column.dtype.kind in [
@@ -610,7 +818,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
610
818
  ]:
611
819
  return True
612
820
  elif inferred_type == "mixed":
613
- # This includes most of the more complex/custom types (objects, dicts, lists, ...)
821
+ # This includes most of the more complex/custom types (objects, dicts,
822
+ # lists, ...)
614
823
  if len(column) == 0 or not hasattr(column, "iloc"):
615
824
  # The column seems to be invalid, so we assume it is incompatible.
616
825
  # But this would most likely never happen since empty columns
@@ -622,7 +831,8 @@ def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
622
831
 
623
832
  if (
624
833
  not is_list_like(first_value)
625
- # dicts are list-like, but have issues in Arrow JS (see comments in Quiver.ts)
834
+ # dicts are list-like, but have issues in Arrow JS (see comments in
835
+ # Quiver.ts)
626
836
  or is_dict_like(first_value)
627
837
  # Frozensets are list-like, but are not compatible with pyarrow.
628
838
  or isinstance(first_value, frozenset)
@@ -684,15 +894,6 @@ def fix_arrow_incompatible_column_types(
684
894
  return df_copy if df_copy is not None else df
685
895
 
686
896
 
687
- def _is_list_of_scalars(data: Iterable[Any]) -> bool:
688
- """Check if the list only contains scalar values."""
689
- from pandas.api.types import infer_dtype
690
-
691
- # Overview on all value that are interpreted as scalar:
692
- # https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
693
- return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
694
-
695
-
696
897
  def determine_data_format(input_data: Any) -> DataFormat:
697
898
  """Determine the data format of the input data.
698
899
 
@@ -706,6 +907,8 @@ def determine_data_format(input_data: Any) -> DataFormat:
706
907
  DataFormat
707
908
  The data format of the input data.
708
909
  """
910
+ import array
911
+
709
912
  import numpy as np
710
913
  import pandas as pd
711
914
  import pyarrow as pa
@@ -722,26 +925,49 @@ def determine_data_format(input_data: Any) -> DataFormat:
722
925
  return DataFormat.NUMPY_MATRIX
723
926
  elif isinstance(input_data, pa.Table):
724
927
  return DataFormat.PYARROW_TABLE
928
+ elif isinstance(input_data, pa.Array):
929
+ return DataFormat.PYARROW_ARRAY
725
930
  elif isinstance(input_data, pd.Series):
726
931
  return DataFormat.PANDAS_SERIES
727
932
  elif isinstance(input_data, pd.Index):
728
933
  return DataFormat.PANDAS_INDEX
729
934
  elif is_pandas_styler(input_data):
730
935
  return DataFormat.PANDAS_STYLER
731
- elif is_snowpark_data_object(input_data):
732
- return DataFormat.SNOWPARK_OBJECT
936
+ elif isinstance(input_data, pd.api.extensions.ExtensionArray):
937
+ return DataFormat.PANDAS_ARRAY
938
+ elif is_polars_series(input_data):
939
+ return DataFormat.POLARS_SERIES
940
+ elif is_polars_dataframe(input_data):
941
+ return DataFormat.POLARS_DATAFRAME
942
+ elif is_polars_lazyframe(input_data):
943
+ return DataFormat.POLARS_LAZYFRAME
733
944
  elif is_modin_data_object(input_data):
734
945
  return DataFormat.MODIN_OBJECT
735
946
  elif is_snowpandas_data_object(input_data):
736
947
  return DataFormat.SNOWPANDAS_OBJECT
737
948
  elif is_pyspark_data_object(input_data):
738
949
  return DataFormat.PYSPARK_OBJECT
739
- elif isinstance(input_data, (list, tuple, set)):
950
+ elif is_snowpark_data_object(input_data) or is_snowpark_row_list(input_data):
951
+ return DataFormat.SNOWPARK_OBJECT
952
+ elif isinstance(
953
+ input_data, (range, EnumMeta, KeysView, ValuesView, deque, map, array.ArrayType)
954
+ ):
955
+ return DataFormat.LIST_OF_VALUES
956
+ elif (
957
+ isinstance(input_data, (ChainMap, MappingProxyType, UserDict))
958
+ or is_dataclass_instance(input_data)
959
+ or is_namedtuple(input_data)
960
+ or is_custom_dict(input_data)
961
+ ):
962
+ return DataFormat.KEY_VALUE_DICT
963
+ elif isinstance(input_data, (ItemsView, enumerate)):
964
+ return DataFormat.LIST_OF_ROWS
965
+ elif isinstance(input_data, (list, tuple, set, frozenset)):
740
966
  if _is_list_of_scalars(input_data):
741
967
  # -> one-dimensional data structure
742
968
  if isinstance(input_data, tuple):
743
969
  return DataFormat.TUPLE_OF_VALUES
744
- if isinstance(input_data, set):
970
+ if isinstance(input_data, (set, frozenset)):
745
971
  return DataFormat.SET_OF_VALUES
746
972
  return DataFormat.LIST_OF_VALUES
747
973
  else:
@@ -751,23 +977,23 @@ def determine_data_format(input_data: Any) -> DataFormat:
751
977
  first_element = next(iter(input_data))
752
978
  if isinstance(first_element, dict):
753
979
  return DataFormat.LIST_OF_RECORDS
754
- if isinstance(first_element, (list, tuple, set)):
980
+ if isinstance(first_element, (list, tuple, set, frozenset)):
755
981
  return DataFormat.LIST_OF_ROWS
756
982
  elif isinstance(input_data, dict):
757
983
  if not input_data:
758
984
  return DataFormat.KEY_VALUE_DICT
759
985
  if len(input_data) > 0:
760
986
  first_value = next(iter(input_data.values()))
987
+ # In the future, we could potentially also support tight & split formats
761
988
  if isinstance(first_value, dict):
762
989
  return DataFormat.COLUMN_INDEX_MAPPING
763
990
  if isinstance(first_value, (list, tuple)):
764
991
  return DataFormat.COLUMN_VALUE_MAPPING
765
992
  if isinstance(first_value, pd.Series):
766
993
  return DataFormat.COLUMN_SERIES_MAPPING
767
- # In the future, we could potentially also support the tight & split formats here
768
- if _is_list_of_scalars(input_data.values()):
769
- # Only use the key-value dict format if the values are only scalar values
770
- return DataFormat.KEY_VALUE_DICT
994
+ # Use key-value dict as fallback. However, if the values of the dict
995
+ # contains mixed types, it will become non-editable in the frontend.
996
+ return DataFormat.KEY_VALUE_DICT
771
997
  return DataFormat.UNKNOWN
772
998
 
773
999
 
@@ -783,12 +1009,30 @@ def _unify_missing_values(df: DataFrame) -> DataFrame:
783
1009
  return df.fillna(np.nan).replace([np.nan], [None])
784
1010
 
785
1011
 
1012
+ def _pandas_df_to_series(df: DataFrame) -> Series[Any]:
1013
+ """Convert a Pandas DataFrame to a Pandas Series by selecting the first column.
1014
+
1015
+ Raises
1016
+ ------
1017
+ ValueError
1018
+ If the DataFrame has more than one column.
1019
+ """
1020
+ # Select first column in dataframe and create a new series based on the values
1021
+ if len(df.columns) != 1:
1022
+ raise ValueError(
1023
+ "DataFrame is expected to have a single column but "
1024
+ f"has {len(df.columns)}."
1025
+ )
1026
+ return df[df.columns[0]]
1027
+
1028
+
786
1029
  def convert_pandas_df_to_data_format(
787
1030
  df: DataFrame, data_format: DataFormat
788
1031
  ) -> (
789
1032
  DataFrame
790
1033
  | Series[Any]
791
1034
  | pa.Table
1035
+ | pa.Array
792
1036
  | np.ndarray[Any, np.dtype[Any]]
793
1037
  | tuple[Any]
794
1038
  | list[Any]
@@ -818,6 +1062,7 @@ def convert_pandas_df_to_data_format(
818
1062
  DataFormat.PYSPARK_OBJECT,
819
1063
  DataFormat.PANDAS_INDEX,
820
1064
  DataFormat.PANDAS_STYLER,
1065
+ DataFormat.PANDAS_ARRAY,
821
1066
  DataFormat.MODIN_OBJECT,
822
1067
  DataFormat.SNOWPANDAS_OBJECT,
823
1068
  ]:
@@ -838,13 +1083,23 @@ def convert_pandas_df_to_data_format(
838
1083
  import pyarrow as pa
839
1084
 
840
1085
  return pa.Table.from_pandas(df)
1086
+ elif data_format == DataFormat.PYARROW_ARRAY:
1087
+ import pyarrow as pa
1088
+
1089
+ return pa.Array.from_pandas(_pandas_df_to_series(df))
841
1090
  elif data_format == DataFormat.PANDAS_SERIES:
842
- # Select first column in dataframe and create a new series based on the values
843
- if len(df.columns) != 1:
844
- raise ValueError(
845
- f"DataFrame is expected to have a single column but has {len(df.columns)}."
846
- )
847
- return df[df.columns[0]]
1091
+ return _pandas_df_to_series(df)
1092
+ elif (
1093
+ data_format == DataFormat.POLARS_DATAFRAME
1094
+ or data_format == DataFormat.POLARS_LAZYFRAME
1095
+ ):
1096
+ import polars as pl
1097
+
1098
+ return pl.from_pandas(df)
1099
+ elif data_format == DataFormat.POLARS_SERIES:
1100
+ import polars as pl
1101
+
1102
+ return pl.from_pandas(_pandas_df_to_series(df))
848
1103
  elif data_format == DataFormat.LIST_OF_RECORDS:
849
1104
  return _unify_missing_values(df).to_dict(orient="records")
850
1105
  elif data_format == DataFormat.LIST_OF_ROWS:
@@ -868,7 +1123,8 @@ def convert_pandas_df_to_data_format(
868
1123
  return_list = df[df.columns[0]].tolist()
869
1124
  elif len(df.columns) >= 1:
870
1125
  raise ValueError(
871
- f"DataFrame is expected to have a single column but has {len(df.columns)}."
1126
+ "DataFrame is expected to have a single column but "
1127
+ f"has {len(df.columns)}."
872
1128
  )
873
1129
  if data_format == DataFormat.TUPLE_OF_VALUES:
874
1130
  return tuple(return_list)