streamlit-nightly 1.33.1.dev20240501__py2.py3-none-any.whl → 1.34.1.dev20240503__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. streamlit/components/v1/custom_component.py +3 -9
  2. streamlit/delta_generator.py +32 -208
  3. streamlit/elements/lib/built_in_chart_utils.py +920 -0
  4. streamlit/elements/utils.py +1 -14
  5. streamlit/elements/{arrow_altair.py → vega_charts.py} +301 -836
  6. streamlit/static/asset-manifest.json +5 -5
  7. streamlit/static/index.html +1 -1
  8. streamlit/static/static/js/5441.71804c26.chunk.js +1 -0
  9. streamlit/static/static/js/7483.64f23be7.chunk.js +2 -0
  10. streamlit/static/static/js/{main.af77b7ba.js → main.3b0201f6.js} +2 -2
  11. {streamlit_nightly-1.33.1.dev20240501.dist-info → streamlit_nightly-1.34.1.dev20240503.dist-info}/METADATA +1 -1
  12. {streamlit_nightly-1.33.1.dev20240501.dist-info → streamlit_nightly-1.34.1.dev20240503.dist-info}/RECORD +19 -20
  13. streamlit/elements/altair_utils.py +0 -40
  14. streamlit/elements/arrow_vega_lite.py +0 -229
  15. streamlit/static/static/js/43.c6749504.chunk.js +0 -1
  16. streamlit/static/static/js/656.7150a933.chunk.js +0 -2
  17. /streamlit/static/static/css/{43.e3b876c5.chunk.css → 5441.e3b876c5.chunk.css} +0 -0
  18. /streamlit/static/static/js/{656.7150a933.chunk.js.LICENSE.txt → 7483.64f23be7.chunk.js.LICENSE.txt} +0 -0
  19. /streamlit/static/static/js/{main.af77b7ba.js.LICENSE.txt → main.3b0201f6.js.LICENSE.txt} +0 -0
  20. {streamlit_nightly-1.33.1.dev20240501.data → streamlit_nightly-1.34.1.dev20240503.data}/scripts/streamlit.cmd +0 -0
  21. {streamlit_nightly-1.33.1.dev20240501.dist-info → streamlit_nightly-1.34.1.dev20240503.dist-info}/WHEEL +0 -0
  22. {streamlit_nightly-1.33.1.dev20240501.dist-info → streamlit_nightly-1.34.1.dev20240503.dist-info}/entry_points.txt +0 -0
  23. {streamlit_nightly-1.33.1.dev20240501.dist-info → streamlit_nightly-1.34.1.dev20240503.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,920 @@
1
+ # Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2024)
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Utilities for our built-in charts commands."""
16
+
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+ from datetime import date
21
+ from enum import Enum
22
+ from typing import (
23
+ TYPE_CHECKING,
24
+ Any,
25
+ Collection,
26
+ Final,
27
+ Hashable,
28
+ Sequence,
29
+ TypedDict,
30
+ cast,
31
+ )
32
+
33
+ from streamlit import type_util
34
+ from streamlit.color_util import (
35
+ Color,
36
+ is_color_like,
37
+ is_color_tuple_like,
38
+ is_hex_color_like,
39
+ to_css_color,
40
+ )
41
+ from streamlit.errors import Error, StreamlitAPIException
42
+
43
+ if TYPE_CHECKING:
44
+ import altair as alt
45
+ import pandas as pd
46
+
47
+ from streamlit.elements.arrow import Data
48
+ from streamlit.type_util import DataFrameCompatible
49
+
50
+
51
+ class PrepDataColumns(TypedDict):
52
+ """Columns used for the prep_data step in Altair Arrow charts."""
53
+
54
+ x_column: str | None
55
+ y_column_list: list[str]
56
+ color_column: str | None
57
+ size_column: str | None
58
+
59
+
60
+ @dataclass
61
+ class AddRowsMetadata:
62
+ """Metadata needed by add_rows on native charts.
63
+
64
+ This class is used to pass some important info to add_rows.
65
+ """
66
+
67
+ chart_command: str
68
+ last_index: Hashable | None
69
+ columns: PrepDataColumns
70
+
71
+
72
+ class ChartType(Enum):
73
+ AREA = {"mark_type": "area", "command": "area_chart"}
74
+ BAR = {"mark_type": "bar", "command": "bar_chart"}
75
+ LINE = {"mark_type": "line", "command": "line_chart"}
76
+ SCATTER = {"mark_type": "circle", "command": "scatter_chart"}
77
+
78
+
79
+ # Color and size legends need different title paddings in order for them
80
+ # to be vertically aligned.
81
+ #
82
+ # NOTE: I don't think it's possible to *perfectly* align the size and
83
+ # color legends in all instances, since the "size" circles vary in size based
84
+ # on the data, and their container is top-aligned with the color container. But
85
+ # through trial-and-error I found this value to be a good enough middle ground.
86
+ # See e2e/scripts/st_arrow_scatter_chart.py for some alignment tests.
87
+ #
88
+ # NOTE #2: In theory, we could move COLOR_LEGEND_SETTINGS into
89
+ # ArrowVegaLiteChart/CustomTheme.tsx, but this would impact existing behavior.
90
+ # (See https://github.com/streamlit/streamlit/pull/7164#discussion_r1307707345)
91
+ _COLOR_LEGEND_SETTINGS: Final = dict(titlePadding=5, offset=5, orient="bottom")
92
+ _SIZE_LEGEND_SETTINGS: Final = dict(titlePadding=0.5, offset=5, orient="bottom")
93
+
94
+ # User-readable names to give the index and melted columns.
95
+ _SEPARATED_INDEX_COLUMN_TITLE: Final = "index"
96
+ _MELTED_Y_COLUMN_TITLE: Final = "value"
97
+ _MELTED_COLOR_COLUMN_TITLE: Final = "color"
98
+
99
+ # Crazy internal (non-user-visible) names for the index and melted columns, in order to
100
+ # avoid collision with existing column names. The suffix below was generated with an
101
+ # online random number generator. Rationale: because it makes it even less likely to
102
+ # lead to a conflict than something that's human-readable (like "--streamlit-fake-field"
103
+ # or something).
104
+ _PROTECTION_SUFFIX: Final = "--p5bJXXpQgvPz6yvQMFiy"
105
+ _SEPARATED_INDEX_COLUMN_NAME: Final = _SEPARATED_INDEX_COLUMN_TITLE + _PROTECTION_SUFFIX
106
+ _MELTED_Y_COLUMN_NAME: Final = _MELTED_Y_COLUMN_TITLE + _PROTECTION_SUFFIX
107
+ _MELTED_COLOR_COLUMN_NAME: Final = _MELTED_COLOR_COLUMN_TITLE + _PROTECTION_SUFFIX
108
+
109
+ # Name we use for a column we know doesn't exist in the data, to address a Vega-Lite rendering bug
110
+ # where empty charts need x, y encodings set in order to take up space.
111
+ _NON_EXISTENT_COLUMN_NAME: Final = "DOES_NOT_EXIST" + _PROTECTION_SUFFIX
112
+
113
+
114
+ def generate_chart(
115
+ chart_type: ChartType,
116
+ data: Data | None,
117
+ x_from_user: str | None = None,
118
+ y_from_user: str | Sequence[str] | None = None,
119
+ color_from_user: str | Color | list[Color] | None = None,
120
+ size_from_user: str | float | None = None,
121
+ width: int = 0,
122
+ height: int = 0,
123
+ ) -> tuple[alt.Chart, AddRowsMetadata]:
124
+ """Function to use the chart's type, data columns and indices to figure out the chart's spec."""
125
+ import altair as alt
126
+
127
+ df = type_util.convert_anything_to_df(data, ensure_copy=True)
128
+
129
+ # From now on, use "df" instead of "data". Deleting "data" to guarantee we follow this.
130
+ del data
131
+
132
+ # Convert arguments received from the user to things Vega-Lite understands.
133
+ # Get name of column to use for x.
134
+ x_column = _parse_x_column(df, x_from_user)
135
+ # Get name of columns to use for y.
136
+ y_column_list = _parse_y_columns(df, y_from_user, x_column)
137
+ # Get name of column to use for color, or constant value to use. Any/both could be None.
138
+ color_column, color_value = _parse_generic_column(df, color_from_user)
139
+ # Get name of column to use for size, or constant value to use. Any/both could be None.
140
+ size_column, size_value = _parse_generic_column(df, size_from_user)
141
+
142
+ # Store some info so we can use it in add_rows.
143
+ add_rows_metadata = AddRowsMetadata(
144
+ # The st command that was used to generate this chart.
145
+ chart_command=chart_type.value["command"],
146
+ # The last index of df so we can adjust the input df in add_rows:
147
+ last_index=_last_index_for_melted_dataframes(df),
148
+ # This is the input to prep_data (except for the df):
149
+ columns=dict(
150
+ x_column=x_column,
151
+ y_column_list=y_column_list,
152
+ color_column=color_column,
153
+ size_column=size_column,
154
+ ),
155
+ )
156
+
157
+ # At this point, all foo_column variables are either None/empty or contain actual
158
+ # columns that are guaranteed to exist.
159
+
160
+ df, x_column, y_column, color_column, size_column = _prep_data(
161
+ df, x_column, y_column_list, color_column, size_column
162
+ )
163
+
164
+ # At this point, x_column is only None if user did not provide one AND df is empty.
165
+
166
+ # Create a Chart with x and y encodings.
167
+ chart = alt.Chart(
168
+ data=df,
169
+ mark=chart_type.value["mark_type"],
170
+ width=width,
171
+ height=height,
172
+ ).encode(
173
+ x=_get_x_encoding(df, x_column, x_from_user, chart_type),
174
+ y=_get_y_encoding(df, y_column, y_from_user),
175
+ )
176
+
177
+ # Set up opacity encoding.
178
+ opacity_enc = _get_opacity_encoding(chart_type, color_column)
179
+ if opacity_enc is not None:
180
+ chart = chart.encode(opacity=opacity_enc)
181
+
182
+ # Set up color encoding.
183
+ color_enc = _get_color_encoding(
184
+ df, color_value, color_column, y_column_list, color_from_user
185
+ )
186
+ if color_enc is not None:
187
+ chart = chart.encode(color=color_enc)
188
+
189
+ # Set up size encoding.
190
+ size_enc = _get_size_encoding(chart_type, size_column, size_value)
191
+ if size_enc is not None:
192
+ chart = chart.encode(size=size_enc)
193
+
194
+ # Set up tooltip encoding.
195
+ if x_column is not None and y_column is not None:
196
+ chart = chart.encode(
197
+ tooltip=_get_tooltip_encoding(
198
+ x_column,
199
+ y_column,
200
+ size_column,
201
+ color_column,
202
+ color_enc,
203
+ )
204
+ )
205
+
206
+ return chart.interactive(), add_rows_metadata
207
+
208
+
209
+ def prep_chart_data_for_add_rows(
210
+ data: Data,
211
+ add_rows_metadata: AddRowsMetadata,
212
+ ) -> tuple[Data, AddRowsMetadata]:
213
+ """Prepares the data for add_rows on our built-in charts.
214
+
215
+ This includes aspects like conversion of the data to Pandas DataFrame,
216
+ changes to the index, and melting the data if needed.
217
+ """
218
+ import pandas as pd
219
+
220
+ df = cast(pd.DataFrame, type_util.convert_anything_to_df(data))
221
+
222
+ # Make range indices start at last_index.
223
+ if isinstance(df.index, pd.RangeIndex):
224
+ old_step = _get_pandas_index_attr(df, "step")
225
+
226
+ # We have to drop the predefined index
227
+ df = df.reset_index(drop=True)
228
+
229
+ old_stop = _get_pandas_index_attr(df, "stop")
230
+
231
+ if old_step is None or old_stop is None:
232
+ raise StreamlitAPIException("'RangeIndex' object has no attribute 'step'")
233
+
234
+ start = add_rows_metadata.last_index + old_step
235
+ stop = add_rows_metadata.last_index + old_step + old_stop
236
+
237
+ df.index = pd.RangeIndex(start=start, stop=stop, step=old_step)
238
+ add_rows_metadata.last_index = stop - 1
239
+
240
+ out_data, *_ = _prep_data(df, **add_rows_metadata.columns)
241
+
242
+ return out_data, add_rows_metadata
243
+
244
+
245
+ def _get_pandas_index_attr(
246
+ data: pd.DataFrame | pd.Series,
247
+ attr: str,
248
+ ) -> Any | None:
249
+ return getattr(data.index, attr, None)
250
+
251
+
252
+ def _prep_data(
253
+ df: pd.DataFrame,
254
+ x_column: str | None,
255
+ y_column_list: list[str],
256
+ color_column: str | None,
257
+ size_column: str | None,
258
+ ) -> tuple[pd.DataFrame, str | None, str | None, str | None, str | None]:
259
+ """Prepares the data for charting. This is also used in add_rows.
260
+
261
+ Returns the prepared dataframe and the new names of the x column (taking the index reset into
262
+ consideration) and y, color, and size columns.
263
+ """
264
+
265
+ # If y is provided, but x is not, we'll use the index as x.
266
+ # So we need to pull the index into its own column.
267
+ x_column = _maybe_reset_index_in_place(df, x_column, y_column_list)
268
+
269
+ # Drop columns we're not using.
270
+ selected_data = _drop_unused_columns(
271
+ df, x_column, color_column, size_column, *y_column_list
272
+ )
273
+
274
+ # Maybe convert color to Vega colors.
275
+ _maybe_convert_color_column_in_place(selected_data, color_column)
276
+
277
+ # Make sure all columns have string names.
278
+ (
279
+ x_column,
280
+ y_column_list,
281
+ color_column,
282
+ size_column,
283
+ ) = _convert_col_names_to_str_in_place(
284
+ selected_data, x_column, y_column_list, color_column, size_column
285
+ )
286
+
287
+ # Maybe melt data from wide format into long format.
288
+ melted_data, y_column, color_column = _maybe_melt(
289
+ selected_data, x_column, y_column_list, color_column, size_column
290
+ )
291
+
292
+ # Return the data, but also the new names to use for x, y, and color.
293
+ return melted_data, x_column, y_column, color_column, size_column
294
+
295
+
296
+ def _last_index_for_melted_dataframes(
297
+ data: DataFrameCompatible | Any,
298
+ ) -> Hashable | None:
299
+ if type_util.is_dataframe_compatible(data):
300
+ data = type_util.convert_anything_to_df(data)
301
+
302
+ if data.index.size > 0:
303
+ return cast(Hashable, data.index[-1])
304
+
305
+ return None
306
+
307
+
308
+ def _is_date_column(df: pd.DataFrame, name: str | None) -> bool:
309
+ """True if the column with the given name stores datetime.date values.
310
+
311
+ This function just checks the first value in the given column, so
312
+ it's meaningful only for columns whose values all share the same type.
313
+
314
+ Parameters
315
+ ----------
316
+ df : pd.DataFrame
317
+ name : str
318
+ The column name
319
+
320
+ Returns
321
+ -------
322
+ bool
323
+
324
+ """
325
+ if name is None:
326
+ return False
327
+
328
+ column = df[name]
329
+ if column.size == 0:
330
+ return False
331
+
332
+ return isinstance(column.iloc[0], date)
333
+
334
+
335
+ def _melt_data(
336
+ df: pd.DataFrame,
337
+ columns_to_leave_alone: list[str],
338
+ columns_to_melt: list[str] | None,
339
+ new_y_column_name: str,
340
+ new_color_column_name: str,
341
+ ) -> pd.DataFrame:
342
+ """Converts a wide-format dataframe to a long-format dataframe.
343
+
344
+ You can find more info about melting on the Pandas documentation:
345
+ https://pandas.pydata.org/docs/reference/api/pandas.melt.html
346
+
347
+ Parameters
348
+ ----------
349
+ df : pd.DataFrame
350
+ The dataframe to melt.
351
+ columns_to_leave_alone : list[str]
352
+ The columns to leave as they are.
353
+ columns_to_melt : list[str]
354
+ The columns to melt.
355
+ new_y_column_name : str
356
+ The name of the new column that will store the values of the melted columns.
357
+ new_color_column_name : str
358
+ The name of column that will store the original column names.
359
+
360
+ Returns
361
+ -------
362
+ pd.DataFrame
363
+ The melted dataframe.
364
+
365
+
366
+ Examples
367
+ --------
368
+
369
+ >>> import pandas as pd
370
+ >>> df = pd.DataFrame({
371
+ ... "a": [1, 2, 3],
372
+ ... "b": [4, 5, 6],
373
+ ... "c": [7, 8, 9],
374
+ ... })
375
+ >>> _melt_data(df, ["a"], ["b", "c"], "value", "color")
376
+ >>> a color value
377
+ >>> 0 1 b 4
378
+ >>> 1 2 b 5
379
+ >>> 2 3 b 6
380
+ >>> ...
381
+
382
+ """
383
+ import pandas as pd
384
+ from pandas.api.types import infer_dtype
385
+
386
+ melted_df = pd.melt(
387
+ df,
388
+ id_vars=columns_to_leave_alone,
389
+ value_vars=columns_to_melt,
390
+ var_name=new_color_column_name,
391
+ value_name=new_y_column_name,
392
+ )
393
+
394
+ y_series = melted_df[new_y_column_name]
395
+ if (
396
+ y_series.dtype == "object"
397
+ and "mixed" in infer_dtype(y_series)
398
+ and len(y_series.unique()) > 100
399
+ ):
400
+ raise StreamlitAPIException(
401
+ "The columns used for rendering the chart contain too many values with mixed types. Please select the columns manually via the y parameter."
402
+ )
403
+
404
+ # Arrow has problems with object types after melting two different dtypes
405
+ # pyarrow.lib.ArrowTypeError: "Expected a <TYPE> object, got a object"
406
+ fixed_df = type_util.fix_arrow_incompatible_column_types(
407
+ melted_df,
408
+ selected_columns=[
409
+ *columns_to_leave_alone,
410
+ new_color_column_name,
411
+ new_y_column_name,
412
+ ],
413
+ )
414
+
415
+ return fixed_df
416
+
417
+
418
+ def _maybe_reset_index_in_place(
419
+ df: pd.DataFrame, x_column: str | None, y_column_list: list[str]
420
+ ) -> str | None:
421
+ if x_column is None and len(y_column_list) > 0:
422
+ if df.index.name is None:
423
+ # Pick column name that is unlikely to collide with user-given names.
424
+ x_column = _SEPARATED_INDEX_COLUMN_NAME
425
+ else:
426
+ # Reuse index's name for the new column.
427
+ x_column = df.index.name
428
+
429
+ df.index.name = x_column
430
+ df.reset_index(inplace=True)
431
+
432
+ return x_column
433
+
434
+
435
+ def _drop_unused_columns(df: pd.DataFrame, *column_names: str | None) -> pd.DataFrame:
436
+ """Returns a subset of df, selecting only column_names that aren't None."""
437
+
438
+ # We can't just call set(col_names) because sets don't have stable ordering,
439
+ # which means tests that depend on ordering will fail.
440
+ # Performance-wise, it's not a problem, though, since this function is only ever
441
+ # used on very small lists.
442
+ seen = set()
443
+ keep = []
444
+
445
+ for x in column_names:
446
+ if x is None:
447
+ continue
448
+ if x in seen:
449
+ continue
450
+ seen.add(x)
451
+ keep.append(x)
452
+
453
+ return df[keep]
454
+
455
+
456
+ def _maybe_convert_color_column_in_place(df: pd.DataFrame, color_column: str | None):
457
+ """If needed, convert color column to a format Vega understands."""
458
+ if color_column is None or len(df[color_column]) == 0:
459
+ return
460
+
461
+ first_color_datum = df[color_column].iat[0]
462
+
463
+ if is_hex_color_like(first_color_datum):
464
+ # Hex is already CSS-valid.
465
+ pass
466
+ elif is_color_tuple_like(first_color_datum):
467
+ # Tuples need to be converted to CSS-valid.
468
+ df[color_column] = df[color_column].map(to_css_color)
469
+ else:
470
+ # Other kinds of colors columns (i.e. pure numbers or nominal strings) shouldn't
471
+ # be converted since they are treated by Vega-Lite as sequential or categorical colors.
472
+ pass
473
+
474
+
475
+ def _convert_col_names_to_str_in_place(
476
+ df: pd.DataFrame,
477
+ x_column: str | None,
478
+ y_column_list: list[str],
479
+ color_column: str | None,
480
+ size_column: str | None,
481
+ ) -> tuple[str | None, list[str], str | None, str | None]:
482
+ """Converts column names to strings, since Vega-Lite does not accept ints, etc."""
483
+ import pandas as pd
484
+
485
+ column_names = list(df.columns) # list() converts RangeIndex, etc, to regular list.
486
+ str_column_names = [str(c) for c in column_names]
487
+ df.columns = pd.Index(str_column_names)
488
+
489
+ return (
490
+ None if x_column is None else str(x_column),
491
+ [str(c) for c in y_column_list],
492
+ None if color_column is None else str(color_column),
493
+ None if size_column is None else str(size_column),
494
+ )
495
+
496
+
497
+ def _parse_generic_column(
498
+ df: pd.DataFrame, column_or_value: Any
499
+ ) -> tuple[str | None, Any]:
500
+ if isinstance(column_or_value, str) and column_or_value in df.columns:
501
+ column_name = column_or_value
502
+ value = None
503
+ else:
504
+ column_name = None
505
+ value = column_or_value
506
+
507
+ return column_name, value
508
+
509
+
510
+ def _parse_x_column(df: pd.DataFrame, x_from_user: str | None) -> str | None:
511
+ if x_from_user is None:
512
+ return None
513
+
514
+ elif isinstance(x_from_user, str):
515
+ if x_from_user not in df.columns:
516
+ raise StreamlitColumnNotFoundError(df, x_from_user)
517
+
518
+ return x_from_user
519
+
520
+ else:
521
+ raise StreamlitAPIException(
522
+ "x parameter should be a column name (str) or None to use the "
523
+ f" dataframe's index. Value given: {x_from_user} "
524
+ f"(type {type(x_from_user)})"
525
+ )
526
+
527
+
528
+ def _parse_y_columns(
529
+ df: pd.DataFrame,
530
+ y_from_user: str | Sequence[str] | None,
531
+ x_column: str | None,
532
+ ) -> list[str]:
533
+ y_column_list: list[str] = []
534
+
535
+ if y_from_user is None:
536
+ y_column_list = list(df.columns)
537
+
538
+ elif isinstance(y_from_user, str):
539
+ y_column_list = [y_from_user]
540
+
541
+ elif type_util.is_sequence(y_from_user):
542
+ y_column_list = list(str(col) for col in y_from_user)
543
+
544
+ else:
545
+ raise StreamlitAPIException(
546
+ "y parameter should be a column name (str) or list thereof. "
547
+ f"Value given: {y_from_user} (type {type(y_from_user)})"
548
+ )
549
+
550
+ for col in y_column_list:
551
+ if col not in df.columns:
552
+ raise StreamlitColumnNotFoundError(df, col)
553
+
554
+ # y_column_list should only include x_column when user explicitly asked for it.
555
+ if x_column in y_column_list and (not y_from_user or x_column not in y_from_user):
556
+ y_column_list.remove(x_column)
557
+
558
+ return y_column_list
559
+
560
+
561
+ def _get_opacity_encoding(
562
+ chart_type: ChartType, color_column: str | None
563
+ ) -> alt.OpacityValue | None:
564
+ import altair as alt
565
+
566
+ if color_column and chart_type == ChartType.AREA:
567
+ return alt.OpacityValue(0.7)
568
+
569
+ return None
570
+
571
+
572
+ def _get_axis_config(df: pd.DataFrame, column_name: str | None, grid: bool) -> alt.Axis:
573
+ import altair as alt
574
+ from pandas.api.types import is_integer_dtype
575
+
576
+ if column_name is not None and is_integer_dtype(df[column_name]):
577
+ # Use a max tick size of 1 for integer columns (prevents zoom into float numbers)
578
+ # and deactivate grid lines for x-axis
579
+ return alt.Axis(tickMinStep=1, grid=grid)
580
+
581
+ return alt.Axis(grid=grid)
582
+
583
+
584
+ def _maybe_melt(
585
+ df: pd.DataFrame,
586
+ x_column: str | None,
587
+ y_column_list: list[str],
588
+ color_column: str | None,
589
+ size_column: str | None,
590
+ ) -> tuple[pd.DataFrame, str | None, str | None]:
591
+ """If multiple columns are set for y, melt the dataframe into long format."""
592
+ y_column: str | None
593
+
594
+ if len(y_column_list) == 0:
595
+ y_column = None
596
+ elif len(y_column_list) == 1:
597
+ y_column = y_column_list[0]
598
+ elif x_column is not None:
599
+ # Pick column names that are unlikely to collide with user-given names.
600
+ y_column = _MELTED_Y_COLUMN_NAME
601
+ color_column = _MELTED_COLOR_COLUMN_NAME
602
+
603
+ columns_to_leave_alone = [x_column]
604
+ if size_column:
605
+ columns_to_leave_alone.append(size_column)
606
+
607
+ df = _melt_data(
608
+ df=df,
609
+ columns_to_leave_alone=columns_to_leave_alone,
610
+ columns_to_melt=y_column_list,
611
+ new_y_column_name=y_column,
612
+ new_color_column_name=color_column,
613
+ )
614
+
615
+ return df, y_column, color_column
616
+
617
+
618
+ def _get_x_encoding(
619
+ df: pd.DataFrame,
620
+ x_column: str | None,
621
+ x_from_user: str | None,
622
+ chart_type: ChartType,
623
+ ) -> alt.X:
624
+ import altair as alt
625
+
626
+ if x_column is None:
627
+ # If no field is specified, the full axis disappears when no data is present.
628
+ # Maybe a bug in vega-lite? So we pass a field that doesn't exist.
629
+ x_field = _NON_EXISTENT_COLUMN_NAME
630
+ x_title = ""
631
+ elif x_column == _SEPARATED_INDEX_COLUMN_NAME:
632
+ # If the x column name is the crazy anti-collision name we gave it, then need to set
633
+ # up a title so we never show the crazy name to the user.
634
+ x_field = x_column
635
+ # Don't show a label in the x axis (not even a nice label like
636
+ # SEPARATED_INDEX_COLUMN_TITLE) when we pull the x axis from the index.
637
+ x_title = ""
638
+ else:
639
+ x_field = x_column
640
+
641
+ # Only show a label in the x axis if the user passed a column explicitly. We
642
+ # could go either way here, but I'm keeping this to avoid breaking the existing
643
+ # behavior.
644
+ if x_from_user is None:
645
+ x_title = ""
646
+ else:
647
+ x_title = x_column
648
+
649
+ return alt.X(
650
+ x_field,
651
+ title=x_title,
652
+ type=_get_x_encoding_type(df, chart_type, x_column),
653
+ scale=alt.Scale(),
654
+ axis=_get_axis_config(df, x_column, grid=False),
655
+ )
656
+
657
+
658
+ def _get_y_encoding(
659
+ df: pd.DataFrame,
660
+ y_column: str | None,
661
+ y_from_user: str | Sequence[str] | None,
662
+ ) -> alt.Y:
663
+ import altair as alt
664
+
665
+ if y_column is None:
666
+ # If no field is specified, the full axis disappears when no data is present.
667
+ # Maybe a bug in vega-lite? So we pass a field that doesn't exist.
668
+ y_field = _NON_EXISTENT_COLUMN_NAME
669
+ y_title = ""
670
+ elif y_column == _MELTED_Y_COLUMN_NAME:
671
+ # If the y column name is the crazy anti-collision name we gave it, then need to set
672
+ # up a title so we never show the crazy name to the user.
673
+ y_field = y_column
674
+ # Don't show a label in the y axis (not even a nice label like
675
+ # MELTED_Y_COLUMN_TITLE) when we pull the x axis from the index.
676
+ y_title = ""
677
+ else:
678
+ y_field = y_column
679
+
680
+ # Only show a label in the y axis if the user passed a column explicitly. We
681
+ # could go either way here, but I'm keeping this to avoid breaking the existing
682
+ # behavior.
683
+ if y_from_user is None:
684
+ y_title = ""
685
+ else:
686
+ y_title = y_column
687
+
688
+ return alt.Y(
689
+ field=y_field,
690
+ title=y_title,
691
+ type=_get_y_encoding_type(df, y_column),
692
+ scale=alt.Scale(),
693
+ axis=_get_axis_config(df, y_column, grid=True),
694
+ )
695
+
696
+
697
+ def _get_color_encoding(
698
+ df: pd.DataFrame,
699
+ color_value: Color | None,
700
+ color_column: str | None,
701
+ y_column_list: list[str],
702
+ color_from_user: str | Color | list[Color] | None,
703
+ ) -> alt.Color | alt.ColorValue | None:
704
+ import altair as alt
705
+
706
+ has_color_value = color_value not in [None, [], tuple()]
707
+
708
+ # If user passed a color value, that should win over colors coming from the
709
+ # color column (be they manual or auto-assigned due to melting)
710
+ if has_color_value:
711
+ # If the color value is color-like, return that.
712
+ if is_color_like(cast(Any, color_value)):
713
+ if len(y_column_list) != 1:
714
+ raise StreamlitColorLengthError([color_value], y_column_list)
715
+
716
+ return alt.ColorValue(to_css_color(cast(Any, color_value)))
717
+
718
+ # If the color value is a list of colors of approriate length, return that.
719
+ elif isinstance(color_value, (list, tuple)):
720
+ color_values = cast(Collection[Color], color_value)
721
+
722
+ if len(color_values) != len(y_column_list):
723
+ raise StreamlitColorLengthError(color_values, y_column_list)
724
+
725
+ if len(color_value) == 1:
726
+ return alt.ColorValue(to_css_color(cast(Any, color_value[0])))
727
+ else:
728
+ return alt.Color(
729
+ field=color_column,
730
+ scale=alt.Scale(range=[to_css_color(c) for c in color_values]),
731
+ legend=_COLOR_LEGEND_SETTINGS,
732
+ type="nominal",
733
+ title=" ",
734
+ )
735
+
736
+ raise StreamlitInvalidColorError(df, color_from_user)
737
+
738
+ elif color_column is not None:
739
+ column_type: str | tuple[str, list[Any]]
740
+
741
+ if color_column == _MELTED_COLOR_COLUMN_NAME:
742
+ column_type = "nominal"
743
+ else:
744
+ column_type = type_util.infer_vegalite_type(df[color_column])
745
+
746
+ color_enc = alt.Color(
747
+ field=color_column, legend=_COLOR_LEGEND_SETTINGS, type=column_type
748
+ )
749
+
750
+ # Fix title if DF was melted
751
+ if color_column == _MELTED_COLOR_COLUMN_NAME:
752
+ # This has to contain an empty space, otherwise the
753
+ # full y-axis disappears (maybe a bug in vega-lite)?
754
+ color_enc["title"] = " "
755
+
756
+ # If the 0th element in the color column looks like a color, we'll use the color column's
757
+ # values as the colors in our chart.
758
+ elif len(df[color_column]) and is_color_like(df[color_column].iat[0]):
759
+ color_range = [to_css_color(c) for c in df[color_column].unique()]
760
+ color_enc["scale"] = alt.Scale(range=color_range)
761
+ # Don't show the color legend, because it will just show text with the color values,
762
+ # like #f00, #00f, etc, which are not user-readable.
763
+ color_enc["legend"] = None
764
+
765
+ # Otherwise, let Vega-Lite auto-assign colors.
766
+ # This codepath is typically reached when the color column contains numbers (in which case
767
+ # Vega-Lite uses a color gradient to represent them) or strings (in which case Vega-Lite
768
+ # assigns one color for each unique value).
769
+ else:
770
+ pass
771
+
772
+ return color_enc
773
+
774
+ return None
775
+
776
+
777
+ def _get_size_encoding(
778
+ chart_type: ChartType,
779
+ size_column: str | None,
780
+ size_value: str | float | None,
781
+ ) -> alt.Size | alt.SizeValue | None:
782
+ import altair as alt
783
+
784
+ if chart_type == ChartType.SCATTER:
785
+ if size_column is not None:
786
+ return alt.Size(
787
+ size_column,
788
+ legend=_SIZE_LEGEND_SETTINGS,
789
+ )
790
+
791
+ elif isinstance(size_value, (float, int)):
792
+ return alt.SizeValue(size_value)
793
+ elif size_value is None:
794
+ return alt.SizeValue(100)
795
+ else:
796
+ raise StreamlitAPIException(
797
+ f"This does not look like a valid size: {repr(size_value)}"
798
+ )
799
+
800
+ elif size_column is not None or size_value is not None:
801
+ raise Error(
802
+ f"Chart type {chart_type.name} does not support size argument. "
803
+ "This should never happen!"
804
+ )
805
+
806
+ return None
807
+
808
+
809
+ def _get_tooltip_encoding(
810
+ x_column: str,
811
+ y_column: str,
812
+ size_column: str | None,
813
+ color_column: str | None,
814
+ color_enc: alt.Color | alt.ColorValue | None,
815
+ ) -> list[alt.Tooltip]:
816
+ import altair as alt
817
+
818
+ tooltip = []
819
+
820
+ # If the x column name is the crazy anti-collision name we gave it, then need to set
821
+ # up a tooltip title so we never show the crazy name to the user.
822
+ if x_column == _SEPARATED_INDEX_COLUMN_NAME:
823
+ tooltip.append(alt.Tooltip(x_column, title=_SEPARATED_INDEX_COLUMN_TITLE))
824
+ else:
825
+ tooltip.append(alt.Tooltip(x_column))
826
+
827
+ # If the y column name is the crazy anti-collision name we gave it, then need to set
828
+ # up a tooltip title so we never show the crazy name to the user.
829
+ if y_column == _MELTED_Y_COLUMN_NAME:
830
+ tooltip.append(
831
+ alt.Tooltip(
832
+ y_column,
833
+ title=_MELTED_Y_COLUMN_TITLE,
834
+ type="quantitative", # Just picked something random. Doesn't really matter!
835
+ )
836
+ )
837
+ else:
838
+ tooltip.append(alt.Tooltip(y_column))
839
+
840
+ # If we earlier decided that there should be no color legend, that's because the
841
+ # user passed a color column with actual color values (like "#ff0"), so we should
842
+ # not show the color values in the tooltip.
843
+ if color_column and getattr(color_enc, "legend", True) is not None:
844
+ # Use a human-readable title for the color.
845
+ if color_column == _MELTED_COLOR_COLUMN_NAME:
846
+ tooltip.append(
847
+ alt.Tooltip(
848
+ color_column,
849
+ title=_MELTED_COLOR_COLUMN_TITLE,
850
+ type="nominal",
851
+ )
852
+ )
853
+ else:
854
+ tooltip.append(alt.Tooltip(color_column))
855
+
856
+ if size_column:
857
+ tooltip.append(alt.Tooltip(size_column))
858
+
859
+ return tooltip
860
+
861
+
862
+ def _get_x_encoding_type(
863
+ df: pd.DataFrame, chart_type: ChartType, x_column: str | None
864
+ ) -> type_util.VegaLiteType:
865
+ if x_column is None:
866
+ return "quantitative" # Anything. If None, Vega-Lite may hide the axis.
867
+
868
+ # Bar charts should have a discrete (ordinal) x-axis, UNLESS type is date/time
869
+ # https://github.com/streamlit/streamlit/pull/2097#issuecomment-714802475
870
+ if chart_type == ChartType.BAR and not _is_date_column(df, x_column):
871
+ return "ordinal"
872
+
873
+ return type_util.infer_vegalite_type(df[x_column])
874
+
875
+
876
+ def _get_y_encoding_type(
877
+ df: pd.DataFrame, y_column: str | None
878
+ ) -> type_util.VegaLiteType:
879
+ if y_column:
880
+ return type_util.infer_vegalite_type(df[y_column])
881
+
882
+ return "quantitative" # Pick anything. If undefined, Vega-Lite may hide the axis.
883
+
884
+
885
+ class StreamlitColumnNotFoundError(StreamlitAPIException):
886
+ def __init__(self, df, col_name, *args):
887
+ available_columns = ", ".join(str(c) for c in list(df.columns))
888
+ message = (
889
+ f'Data does not have a column named `"{col_name}"`. '
890
+ f"Available columns are `{available_columns}`"
891
+ )
892
+ super().__init__(message, *args)
893
+
894
+
895
+ class StreamlitInvalidColorError(StreamlitAPIException):
896
+ def __init__(self, df, color_from_user, *args):
897
+ ", ".join(str(c) for c in list(df.columns))
898
+ message = f"""
899
+ This does not look like a valid color argument: `{color_from_user}`.
900
+
901
+ The color argument can be:
902
+
903
+ * A hex string like "#ffaa00" or "#ffaa0088".
904
+ * An RGB or RGBA tuple with the red, green, blue, and alpha
905
+ components specified as ints from 0 to 255 or floats from 0.0 to
906
+ 1.0.
907
+ * The name of a column.
908
+ * Or a list of colors, matching the number of y columns to draw.
909
+ """
910
+ super().__init__(message, *args)
911
+
912
+
913
+ class StreamlitColorLengthError(StreamlitAPIException):
914
+ def __init__(self, color_values, y_column_list, *args):
915
+ message = (
916
+ f"The list of colors `{color_values}` must have the same "
917
+ "length as the list of columns to be colored "
918
+ f"`{y_column_list}`."
919
+ )
920
+ super().__init__(message, *args)