streamlit-nightly 1.36.1.dev20240703__py2.py3-none-any.whl → 1.36.1.dev20240704__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamlit/components/v1/component_arrow.py +6 -6
- streamlit/dataframe_util.py +181 -132
- streamlit/delta_generator.py +1 -1
- streamlit/elements/arrow.py +14 -37
- streamlit/elements/lib/built_in_chart_utils.py +3 -10
- streamlit/elements/lib/pandas_styler_utils.py +3 -1
- streamlit/elements/lib/policies.py +41 -7
- streamlit/elements/map.py +2 -13
- streamlit/elements/plotly_chart.py +10 -11
- streamlit/elements/vega_charts.py +16 -29
- streamlit/elements/widgets/button.py +16 -14
- streamlit/elements/widgets/camera_input.py +8 -8
- streamlit/elements/widgets/chat.py +8 -10
- streamlit/elements/widgets/checkbox.py +6 -9
- streamlit/elements/widgets/color_picker.py +7 -8
- streamlit/elements/widgets/data_editor.py +10 -12
- streamlit/elements/widgets/file_uploader.py +8 -8
- streamlit/elements/widgets/multiselect.py +7 -8
- streamlit/elements/widgets/number_input.py +6 -9
- streamlit/elements/widgets/radio.py +7 -8
- streamlit/elements/widgets/select_slider.py +7 -6
- streamlit/elements/widgets/selectbox.py +7 -8
- streamlit/elements/widgets/slider.py +7 -8
- streamlit/elements/widgets/text_widgets.py +13 -12
- streamlit/elements/widgets/time_widgets.py +11 -14
- streamlit/elements/write.py +4 -12
- streamlit/runtime/caching/cache_utils.py +2 -5
- streamlit/testing/v1/element_tree.py +2 -2
- {streamlit_nightly-1.36.1.dev20240703.dist-info → streamlit_nightly-1.36.1.dev20240704.dist-info}/METADATA +1 -1
- {streamlit_nightly-1.36.1.dev20240703.dist-info → streamlit_nightly-1.36.1.dev20240704.dist-info}/RECORD +34 -34
- {streamlit_nightly-1.36.1.dev20240703.data → streamlit_nightly-1.36.1.dev20240704.data}/scripts/streamlit.cmd +0 -0
- {streamlit_nightly-1.36.1.dev20240703.dist-info → streamlit_nightly-1.36.1.dev20240704.dist-info}/WHEEL +0 -0
- {streamlit_nightly-1.36.1.dev20240703.dist-info → streamlit_nightly-1.36.1.dev20240704.dist-info}/entry_points.txt +0 -0
- {streamlit_nightly-1.36.1.dev20240703.dist-info → streamlit_nightly-1.36.1.dev20240704.dist-info}/top_level.txt +0 -0
@@ -74,7 +74,7 @@ def _marshall_index(proto: ArrowTableProto, index: Index) -> None:
|
|
74
74
|
|
75
75
|
index = map(_maybe_tuple_to_list, index.values)
|
76
76
|
index_df = pd.DataFrame(index)
|
77
|
-
proto.index = dataframe_util.
|
77
|
+
proto.index = dataframe_util.convert_pandas_df_to_arrow_bytes(index_df)
|
78
78
|
|
79
79
|
|
80
80
|
def _marshall_columns(proto: ArrowTableProto, columns: Series) -> None:
|
@@ -94,7 +94,7 @@ def _marshall_columns(proto: ArrowTableProto, columns: Series) -> None:
|
|
94
94
|
|
95
95
|
columns = map(_maybe_tuple_to_list, columns.values)
|
96
96
|
columns_df = pd.DataFrame(columns)
|
97
|
-
proto.columns = dataframe_util.
|
97
|
+
proto.columns = dataframe_util.convert_pandas_df_to_arrow_bytes(columns_df)
|
98
98
|
|
99
99
|
|
100
100
|
def _marshall_data(proto: ArrowTableProto, df: DataFrame) -> None:
|
@@ -109,7 +109,7 @@ def _marshall_data(proto: ArrowTableProto, df: DataFrame) -> None:
|
|
109
109
|
A dataframe to marshall.
|
110
110
|
|
111
111
|
"""
|
112
|
-
proto.data = dataframe_util.
|
112
|
+
proto.data = dataframe_util.convert_pandas_df_to_arrow_bytes(df)
|
113
113
|
|
114
114
|
|
115
115
|
def arrow_proto_to_dataframe(proto: ArrowTableProto) -> DataFrame:
|
@@ -130,9 +130,9 @@ def arrow_proto_to_dataframe(proto: ArrowTableProto) -> DataFrame:
|
|
130
130
|
|
131
131
|
import pandas as pd
|
132
132
|
|
133
|
-
data = dataframe_util.
|
134
|
-
index = dataframe_util.
|
135
|
-
columns = dataframe_util.
|
133
|
+
data = dataframe_util.convert_arrow_bytes_to_pandas_df(proto.data)
|
134
|
+
index = dataframe_util.convert_arrow_bytes_to_pandas_df(proto.index)
|
135
|
+
columns = dataframe_util.convert_arrow_bytes_to_pandas_df(proto.columns)
|
136
136
|
|
137
137
|
return pd.DataFrame(
|
138
138
|
data.values, index=index.values.T.tolist(), columns=columns.values.T.tolist()
|
streamlit/dataframe_util.py
CHANGED
@@ -22,11 +22,11 @@ from enum import Enum, EnumMeta, auto
|
|
22
22
|
from typing import (
|
23
23
|
TYPE_CHECKING,
|
24
24
|
Any,
|
25
|
+
Dict,
|
25
26
|
Final,
|
26
27
|
Iterable,
|
27
28
|
Protocol,
|
28
29
|
Sequence,
|
29
|
-
Tuple,
|
30
30
|
TypeVar,
|
31
31
|
Union,
|
32
32
|
cast,
|
@@ -49,13 +49,9 @@ _LOGGER: Final = logger.get_logger(__name__)
|
|
49
49
|
|
50
50
|
|
51
51
|
# Maximum number of rows to request from an unevaluated (out-of-core) dataframe
|
52
|
-
|
52
|
+
_MAX_UNEVALUATED_DF_ROWS = 10000
|
53
53
|
|
54
|
-
_PANDAS_DF_TYPE_STR: Final = "pandas.core.frame.DataFrame"
|
55
|
-
_PANDAS_INDEX_TYPE_STR: Final = "pandas.core.indexes.base.Index"
|
56
|
-
_PANDAS_SERIES_TYPE_STR: Final = "pandas.core.series.Series"
|
57
54
|
_PANDAS_STYLER_TYPE_STR: Final = "pandas.io.formats.style.Styler"
|
58
|
-
_NUMPY_ARRAY_TYPE_STR: Final = "numpy.ndarray"
|
59
55
|
_SNOWPARK_DF_TYPE_STR: Final = "snowflake.snowpark.dataframe.DataFrame"
|
60
56
|
_SNOWPARK_DF_ROW_TYPE_STR: Final = "snowflake.snowpark.row.Row"
|
61
57
|
_SNOWPARK_TABLE_TYPE_STR: Final = "snowflake.snowpark.table.Table"
|
@@ -66,39 +62,6 @@ _SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.Data
|
|
66
62
|
_SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
|
67
63
|
|
68
64
|
|
69
|
-
_DATAFRAME_LIKE_TYPES: Final[tuple[str, ...]] = (
|
70
|
-
_PANDAS_DF_TYPE_STR,
|
71
|
-
_PANDAS_INDEX_TYPE_STR,
|
72
|
-
_PANDAS_SERIES_TYPE_STR,
|
73
|
-
_PANDAS_STYLER_TYPE_STR,
|
74
|
-
_NUMPY_ARRAY_TYPE_STR,
|
75
|
-
)
|
76
|
-
|
77
|
-
# We show a special "UnevaluatedDataFrame" warning for cached funcs
|
78
|
-
# that attempt to return one of these unserializable types:
|
79
|
-
UNEVALUATED_DATAFRAME_TYPES = (
|
80
|
-
_MODIN_DF_TYPE_STR,
|
81
|
-
_MODIN_SERIES_TYPE_STR,
|
82
|
-
_PYSPARK_DF_TYPE_STR,
|
83
|
-
_SNOWPANDAS_DF_TYPE_STR,
|
84
|
-
_SNOWPANDAS_SERIES_TYPE_STR,
|
85
|
-
_SNOWPARK_DF_TYPE_STR,
|
86
|
-
_SNOWPARK_TABLE_TYPE_STR,
|
87
|
-
)
|
88
|
-
|
89
|
-
DataFrameLike: TypeAlias = "Union[DataFrame, Index, Series, Styler]"
|
90
|
-
|
91
|
-
_DATAFRAME_COMPATIBLE_TYPES: Final[tuple[type, ...]] = (
|
92
|
-
dict,
|
93
|
-
list,
|
94
|
-
set,
|
95
|
-
tuple,
|
96
|
-
type(None),
|
97
|
-
)
|
98
|
-
|
99
|
-
_DataFrameCompatible: TypeAlias = Union[dict, list, set, Tuple[Any], None]
|
100
|
-
DataFrameCompatible: TypeAlias = Union[_DataFrameCompatible, DataFrameLike]
|
101
|
-
|
102
65
|
V_co = TypeVar(
|
103
66
|
"V_co",
|
104
67
|
covariant=True, # https://peps.python.org/pep-0484/#covariance-and-contravariance
|
@@ -124,6 +87,21 @@ OptionSequence: TypeAlias = Union[
|
|
124
87
|
DataFrameGenericAlias[V_co],
|
125
88
|
]
|
126
89
|
|
90
|
+
# Various data types supported by our dataframe processing
|
91
|
+
# used for commands like `st.dataframe`, `st.table`, `st.map`,
|
92
|
+
# st.line_chart`...
|
93
|
+
Data: TypeAlias = Union[
|
94
|
+
"DataFrame",
|
95
|
+
"Series",
|
96
|
+
"Styler",
|
97
|
+
"Index",
|
98
|
+
"pa.Table",
|
99
|
+
"np.ndarray",
|
100
|
+
Iterable[Any],
|
101
|
+
Dict[Any, Any],
|
102
|
+
None,
|
103
|
+
]
|
104
|
+
|
127
105
|
|
128
106
|
class DataFormat(Enum):
|
129
107
|
"""DataFormat is used to determine the format of the data."""
|
@@ -152,12 +130,32 @@ class DataFormat(Enum):
|
|
152
130
|
KEY_VALUE_DICT = auto() # {index: value}
|
153
131
|
|
154
132
|
|
155
|
-
def
|
156
|
-
|
133
|
+
def is_dataframe_like(obj: object) -> bool:
|
134
|
+
"""True if the object is a dataframe-like object.
|
157
135
|
|
136
|
+
This does not include basic collection types like list, dict, tuple, etc.
|
137
|
+
"""
|
158
138
|
|
159
|
-
|
160
|
-
|
139
|
+
if obj is None or isinstance(
|
140
|
+
obj, (list, tuple, set, dict, str, bytes, int, float, bool)
|
141
|
+
):
|
142
|
+
# Basic types are not considered dataframe-like, so we can
|
143
|
+
# return False early to avoid unnecessary checks.
|
144
|
+
return False
|
145
|
+
|
146
|
+
return determine_data_format(obj) in [
|
147
|
+
DataFormat.PANDAS_DATAFRAME,
|
148
|
+
DataFormat.PANDAS_SERIES,
|
149
|
+
DataFormat.PANDAS_INDEX,
|
150
|
+
DataFormat.PANDAS_STYLER,
|
151
|
+
DataFormat.NUMPY_LIST,
|
152
|
+
DataFormat.NUMPY_MATRIX,
|
153
|
+
DataFormat.PYARROW_TABLE,
|
154
|
+
DataFormat.SNOWPARK_OBJECT,
|
155
|
+
DataFormat.PYSPARK_OBJECT,
|
156
|
+
DataFormat.MODIN_OBJECT,
|
157
|
+
DataFormat.SNOWPANDAS_OBJECT,
|
158
|
+
]
|
161
159
|
|
162
160
|
|
163
161
|
def is_unevaluated_data_object(obj: object) -> bool:
|
@@ -218,18 +216,14 @@ def is_snowpandas_data_object(obj: object) -> bool:
|
|
218
216
|
)
|
219
217
|
|
220
218
|
|
221
|
-
def is_dataframe_compatible(obj: object) -> TypeGuard[DataFrameCompatible]:
|
222
|
-
"""True if type that can be passed to convert_anything_to_pandas_df."""
|
223
|
-
return is_dataframe_like(obj) or type(obj) in _DATAFRAME_COMPATIBLE_TYPES
|
224
|
-
|
225
|
-
|
226
219
|
def is_pandas_styler(obj: object) -> TypeGuard[Styler]:
|
220
|
+
"""True if obj is a pandas Styler."""
|
227
221
|
return is_type(obj, _PANDAS_STYLER_TYPE_STR)
|
228
222
|
|
229
223
|
|
230
224
|
def convert_anything_to_pandas_df(
|
231
225
|
data: Any,
|
232
|
-
max_unevaluated_rows: int =
|
226
|
+
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
233
227
|
ensure_copy: bool = False,
|
234
228
|
) -> DataFrame:
|
235
229
|
"""Try to convert different formats to a Pandas Dataframe.
|
@@ -241,7 +235,7 @@ def convert_anything_to_pandas_df(
|
|
241
235
|
|
242
236
|
max_unevaluated_rows: int
|
243
237
|
If unevaluated data is detected this func will evaluate it,
|
244
|
-
taking max_unevaluated_rows, defaults to 10k
|
238
|
+
taking max_unevaluated_rows, defaults to 10k.
|
245
239
|
|
246
240
|
ensure_copy: bool
|
247
241
|
If True, make sure to always return a copy of the data. If False, it depends on
|
@@ -252,16 +246,21 @@ def convert_anything_to_pandas_df(
|
|
252
246
|
pandas.DataFrame
|
253
247
|
|
254
248
|
"""
|
249
|
+
import numpy as np
|
255
250
|
import pandas as pd
|
256
251
|
|
257
|
-
if
|
252
|
+
if isinstance(data, pd.DataFrame):
|
258
253
|
return data.copy() if ensure_copy else cast(pd.DataFrame, data)
|
259
254
|
|
255
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
256
|
+
return pd.DataFrame(data)
|
257
|
+
|
260
258
|
if is_pandas_styler(data):
|
261
|
-
return cast(
|
259
|
+
return cast(pd.DataFrame, data.data.copy() if ensure_copy else data.data)
|
262
260
|
|
263
|
-
if
|
261
|
+
if isinstance(data, np.ndarray):
|
264
262
|
return pd.DataFrame([]) if len(data.shape) == 0 else pd.DataFrame(data)
|
263
|
+
|
265
264
|
if is_modin_data_object(data):
|
266
265
|
data = data.head(max_unevaluated_rows)._to_pandas()
|
267
266
|
|
@@ -332,6 +331,130 @@ Offending object:
|
|
332
331
|
) from ex
|
333
332
|
|
334
333
|
|
334
|
+
def convert_arrow_table_to_arrow_bytes(table: pa.Table) -> bytes:
|
335
|
+
"""Serialize pyarrow.Table to Arrow IPC bytes.
|
336
|
+
|
337
|
+
Parameters
|
338
|
+
----------
|
339
|
+
table : pyarrow.Table
|
340
|
+
A table to convert.
|
341
|
+
|
342
|
+
Returns
|
343
|
+
-------
|
344
|
+
bytes
|
345
|
+
The serialized Arrow IPC bytes.
|
346
|
+
"""
|
347
|
+
try:
|
348
|
+
table = _maybe_truncate_table(table)
|
349
|
+
except RecursionError as err:
|
350
|
+
# This is a very unlikely edge case, but we want to make sure that
|
351
|
+
# it doesn't lead to unexpected behavior.
|
352
|
+
# If there is a recursion error, we just return the table as-is
|
353
|
+
# which will lead to the normal message limit exceed error.
|
354
|
+
_LOGGER.warning(
|
355
|
+
"Recursion error while truncating Arrow table. This is not "
|
356
|
+
"supposed to happen.",
|
357
|
+
exc_info=err,
|
358
|
+
)
|
359
|
+
|
360
|
+
import pyarrow as pa
|
361
|
+
|
362
|
+
# Convert table to bytes
|
363
|
+
sink = pa.BufferOutputStream()
|
364
|
+
writer = pa.RecordBatchStreamWriter(sink, table.schema)
|
365
|
+
writer.write_table(table)
|
366
|
+
writer.close()
|
367
|
+
return cast(bytes, sink.getvalue().to_pybytes())
|
368
|
+
|
369
|
+
|
370
|
+
def convert_pandas_df_to_arrow_bytes(df: DataFrame) -> bytes:
|
371
|
+
"""Serialize pandas.DataFrame to Arrow IPC bytes.
|
372
|
+
|
373
|
+
Parameters
|
374
|
+
----------
|
375
|
+
df : pandas.DataFrame
|
376
|
+
A dataframe to convert.
|
377
|
+
|
378
|
+
Returns
|
379
|
+
-------
|
380
|
+
bytes
|
381
|
+
The serialized Arrow IPC bytes.
|
382
|
+
"""
|
383
|
+
import pyarrow as pa
|
384
|
+
|
385
|
+
try:
|
386
|
+
table = pa.Table.from_pandas(df)
|
387
|
+
except (pa.ArrowTypeError, pa.ArrowInvalid, pa.ArrowNotImplementedError) as ex:
|
388
|
+
_LOGGER.info(
|
389
|
+
"Serialization of dataframe to Arrow table was unsuccessful due to: %s. "
|
390
|
+
"Applying automatic fixes for column types to make the dataframe "
|
391
|
+
"Arrow-compatible.",
|
392
|
+
ex,
|
393
|
+
)
|
394
|
+
df = fix_arrow_incompatible_column_types(df)
|
395
|
+
table = pa.Table.from_pandas(df)
|
396
|
+
return convert_arrow_table_to_arrow_bytes(table)
|
397
|
+
|
398
|
+
|
399
|
+
def convert_arrow_bytes_to_pandas_df(source: bytes) -> DataFrame:
|
400
|
+
"""Convert Arrow bytes (IPC format) to pandas.DataFrame.
|
401
|
+
|
402
|
+
Using this function in production needs to make sure that
|
403
|
+
the pyarrow version >= 14.0.1, because of a critical
|
404
|
+
security vulnerability in pyarrow < 14.0.1.
|
405
|
+
|
406
|
+
Parameters
|
407
|
+
----------
|
408
|
+
source : bytes
|
409
|
+
A bytes object to convert.
|
410
|
+
|
411
|
+
Returns
|
412
|
+
-------
|
413
|
+
pandas.DataFrame
|
414
|
+
The converted dataframe.
|
415
|
+
"""
|
416
|
+
import pyarrow as pa
|
417
|
+
|
418
|
+
reader = pa.RecordBatchStreamReader(source)
|
419
|
+
return reader.read_pandas()
|
420
|
+
|
421
|
+
|
422
|
+
def convert_anything_to_arrow_bytes(
|
423
|
+
data: Any,
|
424
|
+
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
425
|
+
) -> bytes:
|
426
|
+
"""Try to convert different formats to Arrow IPC format (bytes).
|
427
|
+
|
428
|
+
This method tries to directly convert the input data to Arrow bytes
|
429
|
+
for some supported formats, but falls back to conversion to a Pandas
|
430
|
+
DataFrame and then to Arrow bytes.
|
431
|
+
|
432
|
+
Parameters
|
433
|
+
----------
|
434
|
+
data : any
|
435
|
+
The data to convert to Arrow bytes.
|
436
|
+
|
437
|
+
max_unevaluated_rows: int
|
438
|
+
If unevaluated data is detected this func will evaluate it,
|
439
|
+
taking max_unevaluated_rows, defaults to 10k.
|
440
|
+
|
441
|
+
Returns
|
442
|
+
-------
|
443
|
+
bytes
|
444
|
+
The serialized Arrow IPC bytes.
|
445
|
+
"""
|
446
|
+
|
447
|
+
import pyarrow as pa
|
448
|
+
|
449
|
+
if isinstance(data, pa.Table):
|
450
|
+
return convert_arrow_table_to_arrow_bytes(data)
|
451
|
+
|
452
|
+
# Fallback: try to convert to pandas DataFrame
|
453
|
+
# and then to Arrow bytes
|
454
|
+
df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
|
455
|
+
return convert_pandas_df_to_arrow_bytes(df)
|
456
|
+
|
457
|
+
|
335
458
|
def convert_anything_to_sequence(obj: OptionSequence[V_co]) -> Sequence[V_co]:
|
336
459
|
"""Try to convert different formats to an indexable Sequence.
|
337
460
|
|
@@ -455,38 +578,6 @@ def _maybe_truncate_table(
|
|
455
578
|
return table
|
456
579
|
|
457
580
|
|
458
|
-
def pyarrow_table_to_bytes(table: pa.Table) -> bytes:
|
459
|
-
"""Serialize pyarrow.Table to bytes using Apache Arrow.
|
460
|
-
|
461
|
-
Parameters
|
462
|
-
----------
|
463
|
-
table : pyarrow.Table
|
464
|
-
A table to convert.
|
465
|
-
|
466
|
-
"""
|
467
|
-
try:
|
468
|
-
table = _maybe_truncate_table(table)
|
469
|
-
except RecursionError as err:
|
470
|
-
# This is a very unlikely edge case, but we want to make sure that
|
471
|
-
# it doesn't lead to unexpected behavior.
|
472
|
-
# If there is a recursion error, we just return the table as-is
|
473
|
-
# which will lead to the normal message limit exceed error.
|
474
|
-
_LOGGER.warning(
|
475
|
-
"Recursion error while truncating Arrow table. This is not "
|
476
|
-
"supposed to happen.",
|
477
|
-
exc_info=err,
|
478
|
-
)
|
479
|
-
|
480
|
-
import pyarrow as pa
|
481
|
-
|
482
|
-
# Convert table to bytes
|
483
|
-
sink = pa.BufferOutputStream()
|
484
|
-
writer = pa.RecordBatchStreamWriter(sink, table.schema)
|
485
|
-
writer.write_table(table)
|
486
|
-
writer.close()
|
487
|
-
return cast(bytes, sink.getvalue().to_pybytes())
|
488
|
-
|
489
|
-
|
490
581
|
def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
491
582
|
"""Return True if the column type is known to cause issues during Arrow conversion."""
|
492
583
|
from pandas.api.types import infer_dtype, is_dict_like, is_list_like
|
@@ -593,49 +684,7 @@ def fix_arrow_incompatible_column_types(
|
|
593
684
|
return df_copy if df_copy is not None else df
|
594
685
|
|
595
686
|
|
596
|
-
def
|
597
|
-
"""Serialize pandas.DataFrame to bytes using Apache Arrow.
|
598
|
-
|
599
|
-
Parameters
|
600
|
-
----------
|
601
|
-
df : pandas.DataFrame
|
602
|
-
A dataframe to convert.
|
603
|
-
|
604
|
-
"""
|
605
|
-
import pyarrow as pa
|
606
|
-
|
607
|
-
try:
|
608
|
-
table = pa.Table.from_pandas(df)
|
609
|
-
except (pa.ArrowTypeError, pa.ArrowInvalid, pa.ArrowNotImplementedError) as ex:
|
610
|
-
_LOGGER.info(
|
611
|
-
"Serialization of dataframe to Arrow table was unsuccessful due to: %s. "
|
612
|
-
"Applying automatic fixes for column types to make the dataframe Arrow-compatible.",
|
613
|
-
ex,
|
614
|
-
)
|
615
|
-
df = fix_arrow_incompatible_column_types(df)
|
616
|
-
table = pa.Table.from_pandas(df)
|
617
|
-
return pyarrow_table_to_bytes(table)
|
618
|
-
|
619
|
-
|
620
|
-
def bytes_to_data_frame(source: bytes) -> DataFrame:
|
621
|
-
"""Convert bytes to pandas.DataFrame.
|
622
|
-
|
623
|
-
Using this function in production needs to make sure that
|
624
|
-
the pyarrow version >= 14.0.1.
|
625
|
-
|
626
|
-
Parameters
|
627
|
-
----------
|
628
|
-
source : bytes
|
629
|
-
A bytes object to convert.
|
630
|
-
|
631
|
-
"""
|
632
|
-
import pyarrow as pa
|
633
|
-
|
634
|
-
reader = pa.RecordBatchStreamReader(source)
|
635
|
-
return reader.read_pandas()
|
636
|
-
|
637
|
-
|
638
|
-
def is_list_of_scalars(data: Iterable[Any]) -> bool:
|
687
|
+
def _is_list_of_scalars(data: Iterable[Any]) -> bool:
|
639
688
|
"""Check if the list only contains scalar values."""
|
640
689
|
from pandas.api.types import infer_dtype
|
641
690
|
|
@@ -688,7 +737,7 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
688
737
|
elif is_pyspark_data_object(input_data):
|
689
738
|
return DataFormat.PYSPARK_OBJECT
|
690
739
|
elif isinstance(input_data, (list, tuple, set)):
|
691
|
-
if
|
740
|
+
if _is_list_of_scalars(input_data):
|
692
741
|
# -> one-dimensional data structure
|
693
742
|
if isinstance(input_data, tuple):
|
694
743
|
return DataFormat.TUPLE_OF_VALUES
|
@@ -716,7 +765,7 @@ def determine_data_format(input_data: Any) -> DataFormat:
|
|
716
765
|
if isinstance(first_value, pd.Series):
|
717
766
|
return DataFormat.COLUMN_SERIES_MAPPING
|
718
767
|
# In the future, we could potentially also support the tight & split formats here
|
719
|
-
if
|
768
|
+
if _is_list_of_scalars(input_data.values()):
|
720
769
|
# Only use the key-value dict format if the values are only scalar values
|
721
770
|
return DataFormat.KEY_VALUE_DICT
|
722
771
|
return DataFormat.UNKNOWN
|
@@ -734,7 +783,7 @@ def _unify_missing_values(df: DataFrame) -> DataFrame:
|
|
734
783
|
return df.fillna(np.nan).replace([np.nan], [None])
|
735
784
|
|
736
785
|
|
737
|
-
def
|
786
|
+
def convert_pandas_df_to_data_format(
|
738
787
|
df: DataFrame, data_format: DataFormat
|
739
788
|
) -> (
|
740
789
|
DataFrame
|
@@ -746,7 +795,7 @@ def convert_df_to_data_format(
|
|
746
795
|
| set[Any]
|
747
796
|
| dict[str, Any]
|
748
797
|
):
|
749
|
-
"""Convert a
|
798
|
+
"""Convert a Pandas DataFrame to the specified data format.
|
750
799
|
|
751
800
|
Parameters
|
752
801
|
----------
|
streamlit/delta_generator.py
CHANGED
@@ -100,7 +100,7 @@ if TYPE_CHECKING:
|
|
100
100
|
from pandas import DataFrame
|
101
101
|
|
102
102
|
from streamlit.cursor import Cursor
|
103
|
-
from streamlit.
|
103
|
+
from streamlit.dataframe_util import Data
|
104
104
|
from streamlit.elements.lib.built_in_chart_utils import AddRowsMetadata
|
105
105
|
|
106
106
|
|
streamlit/elements/arrow.py
CHANGED
@@ -18,14 +18,10 @@ import json
|
|
18
18
|
from dataclasses import dataclass
|
19
19
|
from typing import (
|
20
20
|
TYPE_CHECKING,
|
21
|
-
Any,
|
22
|
-
Dict,
|
23
21
|
Final,
|
24
22
|
Iterable,
|
25
|
-
List,
|
26
23
|
Literal,
|
27
24
|
TypedDict,
|
28
|
-
Union,
|
29
25
|
cast,
|
30
26
|
overload,
|
31
27
|
)
|
@@ -43,11 +39,7 @@ from streamlit.elements.lib.column_config_utils import (
|
|
43
39
|
)
|
44
40
|
from streamlit.elements.lib.event_utils import AttributeDictionary
|
45
41
|
from streamlit.elements.lib.pandas_styler_utils import marshall_styler
|
46
|
-
from streamlit.elements.lib.policies import
|
47
|
-
check_cache_replay_rules,
|
48
|
-
check_callback_rules,
|
49
|
-
check_session_state_rules,
|
50
|
-
)
|
42
|
+
from streamlit.elements.lib.policies import check_widget_policies
|
51
43
|
from streamlit.elements.lib.utils import Key, to_key
|
52
44
|
from streamlit.errors import StreamlitAPIException
|
53
45
|
from streamlit.proto.Arrow_pb2 import Arrow as ArrowProto
|
@@ -57,24 +49,9 @@ from streamlit.runtime.state import WidgetCallback, register_widget
|
|
57
49
|
from streamlit.runtime.state.common import compute_widget_id
|
58
50
|
|
59
51
|
if TYPE_CHECKING:
|
60
|
-
|
61
|
-
from numpy import ndarray
|
62
|
-
from pandas import DataFrame, Index, Series
|
63
|
-
from pandas.io.formats.style import Styler
|
64
|
-
|
52
|
+
from streamlit.dataframe_util import Data
|
65
53
|
from streamlit.delta_generator import DeltaGenerator
|
66
54
|
|
67
|
-
Data: TypeAlias = Union[
|
68
|
-
"DataFrame",
|
69
|
-
"Series",
|
70
|
-
"Styler",
|
71
|
-
"Index",
|
72
|
-
"pa.Table",
|
73
|
-
"ndarray",
|
74
|
-
Iterable,
|
75
|
-
Dict[str, List[Any]],
|
76
|
-
None,
|
77
|
-
]
|
78
55
|
|
79
56
|
SelectionMode: TypeAlias = Literal[
|
80
57
|
"single-row", "multi-row", "single-column", "multi-column"
|
@@ -489,10 +466,15 @@ class ArrowMixin:
|
|
489
466
|
|
490
467
|
if is_selection_activated:
|
491
468
|
# Run some checks that are only relevant when selections are activated
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
469
|
+
is_callback = callable(on_select)
|
470
|
+
check_widget_policies(
|
471
|
+
self.dg,
|
472
|
+
key,
|
473
|
+
on_change=cast(WidgetCallback, on_select) if is_callback else None,
|
474
|
+
default_value=None,
|
475
|
+
writes_allowed=False,
|
476
|
+
enable_check_callback_rules=is_callback,
|
477
|
+
)
|
496
478
|
|
497
479
|
# Convert the user provided column config into the frontend compatible format:
|
498
480
|
column_config_mapping = process_config_mapping(column_config)
|
@@ -511,7 +493,7 @@ class ArrowMixin:
|
|
511
493
|
|
512
494
|
if isinstance(data, pa.Table):
|
513
495
|
# For pyarrow tables, we can just serialize the table directly
|
514
|
-
proto.data = dataframe_util.
|
496
|
+
proto.data = dataframe_util.convert_arrow_table_to_arrow_bytes(data)
|
515
497
|
else:
|
516
498
|
# For all other data formats, we need to convert them to a pandas.DataFrame
|
517
499
|
# thereby, we also apply some data specific configs
|
@@ -538,7 +520,7 @@ class ArrowMixin:
|
|
538
520
|
check_arrow_compatibility=False,
|
539
521
|
)
|
540
522
|
# Serialize the data to bytes:
|
541
|
-
proto.data = dataframe_util.
|
523
|
+
proto.data = dataframe_util.convert_pandas_df_to_arrow_bytes(data_df)
|
542
524
|
|
543
525
|
if hide_index is not None:
|
544
526
|
update_column_config(
|
@@ -709,7 +691,6 @@ def marshall(proto: ArrowProto, data: Data, default_uuid: str | None = None) ->
|
|
709
691
|
(e.g. charts) can ignore it.
|
710
692
|
|
711
693
|
"""
|
712
|
-
import pyarrow as pa
|
713
694
|
|
714
695
|
if dataframe_util.is_pandas_styler(data):
|
715
696
|
# default_uuid is a string only if the data is a `Styler`,
|
@@ -719,8 +700,4 @@ def marshall(proto: ArrowProto, data: Data, default_uuid: str | None = None) ->
|
|
719
700
|
), "Default UUID must be a string for Styler data."
|
720
701
|
marshall_styler(proto, data, default_uuid)
|
721
702
|
|
722
|
-
|
723
|
-
proto.data = dataframe_util.pyarrow_table_to_bytes(data)
|
724
|
-
else:
|
725
|
-
df = dataframe_util.convert_anything_to_pandas_df(data)
|
726
|
-
proto.data = dataframe_util.data_frame_to_bytes(df)
|
703
|
+
proto.data = dataframe_util.convert_anything_to_arrow_bytes(data)
|
@@ -47,8 +47,7 @@ if TYPE_CHECKING:
|
|
47
47
|
import altair as alt
|
48
48
|
import pandas as pd
|
49
49
|
|
50
|
-
from streamlit.dataframe_util import
|
51
|
-
from streamlit.elements.arrow import Data
|
50
|
+
from streamlit.dataframe_util import Data
|
52
51
|
|
53
52
|
VegaLiteType: TypeAlias = Literal["quantitative", "ordinal", "temporal", "nominal"]
|
54
53
|
ChartStackType: TypeAlias = Literal["normalize", "center", "layered"]
|
@@ -391,15 +390,9 @@ def _prep_data(
|
|
391
390
|
|
392
391
|
|
393
392
|
def _last_index_for_melted_dataframes(
|
394
|
-
data:
|
393
|
+
data: pd.DataFrame,
|
395
394
|
) -> Hashable | None:
|
396
|
-
|
397
|
-
data = dataframe_util.convert_anything_to_pandas_df(data)
|
398
|
-
|
399
|
-
if data.index.size > 0:
|
400
|
-
return cast(Hashable, data.index[-1])
|
401
|
-
|
402
|
-
return None
|
395
|
+
return cast(Hashable, data.index[-1]) if data.index.size > 0 else None
|
403
396
|
|
404
397
|
|
405
398
|
def _is_date_column(df: pd.DataFrame, name: str | None) -> bool:
|
@@ -236,7 +236,9 @@ def _marshall_display_values(
|
|
236
236
|
|
237
237
|
"""
|
238
238
|
new_df = _use_display_values(df, styles)
|
239
|
-
proto.styler.display_values = dataframe_util.
|
239
|
+
proto.styler.display_values = dataframe_util.convert_pandas_df_to_arrow_bytes(
|
240
|
+
new_df
|
241
|
+
)
|
240
242
|
|
241
243
|
|
242
244
|
def _use_display_values(df: DataFrame, styles: Mapping[str, Any]) -> DataFrame:
|