streamlit-nightly 1.43.2.dev20250307__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- streamlit/__init__.py +306 -0
- streamlit/__main__.py +20 -0
- streamlit/auth_util.py +218 -0
- streamlit/cli_util.py +105 -0
- streamlit/column_config.py +56 -0
- streamlit/commands/__init__.py +13 -0
- streamlit/commands/echo.py +126 -0
- streamlit/commands/execution_control.py +238 -0
- streamlit/commands/experimental_query_params.py +169 -0
- streamlit/commands/logo.py +189 -0
- streamlit/commands/navigation.py +385 -0
- streamlit/commands/page_config.py +311 -0
- streamlit/components/__init__.py +13 -0
- streamlit/components/lib/__init__.py +13 -0
- streamlit/components/lib/local_component_registry.py +84 -0
- streamlit/components/types/__init__.py +13 -0
- streamlit/components/types/base_component_registry.py +99 -0
- streamlit/components/types/base_custom_component.py +150 -0
- streamlit/components/v1/__init__.py +31 -0
- streamlit/components/v1/component_arrow.py +141 -0
- streamlit/components/v1/component_registry.py +130 -0
- streamlit/components/v1/components.py +38 -0
- streamlit/components/v1/custom_component.py +243 -0
- streamlit/config.py +1513 -0
- streamlit/config_option.py +311 -0
- streamlit/config_util.py +177 -0
- streamlit/connections/__init__.py +28 -0
- streamlit/connections/base_connection.py +174 -0
- streamlit/connections/snowflake_connection.py +562 -0
- streamlit/connections/snowpark_connection.py +213 -0
- streamlit/connections/sql_connection.py +425 -0
- streamlit/connections/util.py +97 -0
- streamlit/cursor.py +210 -0
- streamlit/dataframe_util.py +1416 -0
- streamlit/delta_generator.py +602 -0
- streamlit/delta_generator_singletons.py +204 -0
- streamlit/deprecation_util.py +209 -0
- streamlit/development.py +21 -0
- streamlit/elements/__init__.py +13 -0
- streamlit/elements/alert.py +234 -0
- streamlit/elements/arrow.py +962 -0
- streamlit/elements/balloons.py +47 -0
- streamlit/elements/bokeh_chart.py +133 -0
- streamlit/elements/code.py +114 -0
- streamlit/elements/deck_gl_json_chart.py +546 -0
- streamlit/elements/dialog_decorator.py +267 -0
- streamlit/elements/doc_string.py +558 -0
- streamlit/elements/empty.py +130 -0
- streamlit/elements/exception.py +331 -0
- streamlit/elements/form.py +354 -0
- streamlit/elements/graphviz_chart.py +150 -0
- streamlit/elements/heading.py +302 -0
- streamlit/elements/html.py +105 -0
- streamlit/elements/iframe.py +191 -0
- streamlit/elements/image.py +196 -0
- streamlit/elements/json.py +139 -0
- streamlit/elements/layouts.py +879 -0
- streamlit/elements/lib/__init__.py +13 -0
- streamlit/elements/lib/built_in_chart_utils.py +1157 -0
- streamlit/elements/lib/color_util.py +263 -0
- streamlit/elements/lib/column_config_utils.py +542 -0
- streamlit/elements/lib/column_types.py +2188 -0
- streamlit/elements/lib/dialog.py +147 -0
- streamlit/elements/lib/dicttools.py +154 -0
- streamlit/elements/lib/event_utils.py +37 -0
- streamlit/elements/lib/file_uploader_utils.py +66 -0
- streamlit/elements/lib/form_utils.py +77 -0
- streamlit/elements/lib/image_utils.py +441 -0
- streamlit/elements/lib/js_number.py +105 -0
- streamlit/elements/lib/mutable_status_container.py +183 -0
- streamlit/elements/lib/options_selector_utils.py +250 -0
- streamlit/elements/lib/pandas_styler_utils.py +274 -0
- streamlit/elements/lib/policies.py +194 -0
- streamlit/elements/lib/streamlit_plotly_theme.py +207 -0
- streamlit/elements/lib/subtitle_utils.py +176 -0
- streamlit/elements/lib/utils.py +250 -0
- streamlit/elements/map.py +508 -0
- streamlit/elements/markdown.py +277 -0
- streamlit/elements/media.py +793 -0
- streamlit/elements/metric.py +301 -0
- streamlit/elements/plotly_chart.py +546 -0
- streamlit/elements/progress.py +156 -0
- streamlit/elements/pyplot.py +194 -0
- streamlit/elements/snow.py +47 -0
- streamlit/elements/spinner.py +113 -0
- streamlit/elements/text.py +76 -0
- streamlit/elements/toast.py +98 -0
- streamlit/elements/vega_charts.py +1984 -0
- streamlit/elements/widgets/__init__.py +13 -0
- streamlit/elements/widgets/audio_input.py +310 -0
- streamlit/elements/widgets/button.py +1123 -0
- streamlit/elements/widgets/button_group.py +1008 -0
- streamlit/elements/widgets/camera_input.py +263 -0
- streamlit/elements/widgets/chat.py +647 -0
- streamlit/elements/widgets/checkbox.py +352 -0
- streamlit/elements/widgets/color_picker.py +265 -0
- streamlit/elements/widgets/data_editor.py +983 -0
- streamlit/elements/widgets/file_uploader.py +486 -0
- streamlit/elements/widgets/multiselect.py +338 -0
- streamlit/elements/widgets/number_input.py +545 -0
- streamlit/elements/widgets/radio.py +407 -0
- streamlit/elements/widgets/select_slider.py +437 -0
- streamlit/elements/widgets/selectbox.py +366 -0
- streamlit/elements/widgets/slider.py +880 -0
- streamlit/elements/widgets/text_widgets.py +628 -0
- streamlit/elements/widgets/time_widgets.py +970 -0
- streamlit/elements/write.py +574 -0
- streamlit/emojis.py +34 -0
- streamlit/env_util.py +61 -0
- streamlit/error_util.py +105 -0
- streamlit/errors.py +452 -0
- streamlit/external/__init__.py +13 -0
- streamlit/external/langchain/__init__.py +23 -0
- streamlit/external/langchain/streamlit_callback_handler.py +406 -0
- streamlit/file_util.py +247 -0
- streamlit/git_util.py +173 -0
- streamlit/hello/__init__.py +13 -0
- streamlit/hello/animation_demo.py +82 -0
- streamlit/hello/dataframe_demo.py +71 -0
- streamlit/hello/hello.py +37 -0
- streamlit/hello/mapping_demo.py +114 -0
- streamlit/hello/plotting_demo.py +55 -0
- streamlit/hello/streamlit_app.py +55 -0
- streamlit/hello/utils.py +28 -0
- streamlit/logger.py +130 -0
- streamlit/material_icon_names.py +25 -0
- streamlit/navigation/__init__.py +13 -0
- streamlit/navigation/page.py +302 -0
- streamlit/net_util.py +125 -0
- streamlit/platform.py +33 -0
- streamlit/proto/Alert_pb2.py +29 -0
- streamlit/proto/Alert_pb2.pyi +90 -0
- streamlit/proto/AppPage_pb2.py +27 -0
- streamlit/proto/AppPage_pb2.pyi +64 -0
- streamlit/proto/ArrowNamedDataSet_pb2.py +28 -0
- streamlit/proto/ArrowNamedDataSet_pb2.pyi +57 -0
- streamlit/proto/ArrowVegaLiteChart_pb2.py +29 -0
- streamlit/proto/ArrowVegaLiteChart_pb2.pyi +84 -0
- streamlit/proto/Arrow_pb2.py +33 -0
- streamlit/proto/Arrow_pb2.pyi +188 -0
- streamlit/proto/AudioInput_pb2.py +28 -0
- streamlit/proto/AudioInput_pb2.pyi +58 -0
- streamlit/proto/Audio_pb2.py +27 -0
- streamlit/proto/Audio_pb2.pyi +58 -0
- streamlit/proto/AuthRedirect_pb2.py +27 -0
- streamlit/proto/AuthRedirect_pb2.pyi +41 -0
- streamlit/proto/AutoRerun_pb2.py +27 -0
- streamlit/proto/AutoRerun_pb2.pyi +45 -0
- streamlit/proto/BackMsg_pb2.py +29 -0
- streamlit/proto/BackMsg_pb2.pyi +105 -0
- streamlit/proto/Balloons_pb2.py +27 -0
- streamlit/proto/Balloons_pb2.pyi +43 -0
- streamlit/proto/Block_pb2.py +53 -0
- streamlit/proto/Block_pb2.pyi +322 -0
- streamlit/proto/BokehChart_pb2.py +27 -0
- streamlit/proto/BokehChart_pb2.pyi +49 -0
- streamlit/proto/ButtonGroup_pb2.py +36 -0
- streamlit/proto/ButtonGroup_pb2.pyi +169 -0
- streamlit/proto/Button_pb2.py +27 -0
- streamlit/proto/Button_pb2.pyi +71 -0
- streamlit/proto/CameraInput_pb2.py +28 -0
- streamlit/proto/CameraInput_pb2.pyi +58 -0
- streamlit/proto/ChatInput_pb2.py +31 -0
- streamlit/proto/ChatInput_pb2.pyi +111 -0
- streamlit/proto/Checkbox_pb2.py +30 -0
- streamlit/proto/Checkbox_pb2.pyi +90 -0
- streamlit/proto/ClientState_pb2.py +30 -0
- streamlit/proto/ClientState_pb2.pyi +90 -0
- streamlit/proto/Code_pb2.py +27 -0
- streamlit/proto/Code_pb2.pyi +55 -0
- streamlit/proto/ColorPicker_pb2.py +28 -0
- streamlit/proto/ColorPicker_pb2.pyi +67 -0
- streamlit/proto/Common_pb2.py +51 -0
- streamlit/proto/Common_pb2.pyi +293 -0
- streamlit/proto/Components_pb2.py +35 -0
- streamlit/proto/Components_pb2.pyi +172 -0
- streamlit/proto/DataFrame_pb2.py +56 -0
- streamlit/proto/DataFrame_pb2.pyi +397 -0
- streamlit/proto/DateInput_pb2.py +28 -0
- streamlit/proto/DateInput_pb2.pyi +83 -0
- streamlit/proto/DeckGlJsonChart_pb2.py +29 -0
- streamlit/proto/DeckGlJsonChart_pb2.pyi +102 -0
- streamlit/proto/Delta_pb2.py +31 -0
- streamlit/proto/Delta_pb2.pyi +74 -0
- streamlit/proto/DocString_pb2.py +29 -0
- streamlit/proto/DocString_pb2.pyi +93 -0
- streamlit/proto/DownloadButton_pb2.py +27 -0
- streamlit/proto/DownloadButton_pb2.pyi +70 -0
- streamlit/proto/Element_pb2.py +78 -0
- streamlit/proto/Element_pb2.pyi +312 -0
- streamlit/proto/Empty_pb2.py +27 -0
- streamlit/proto/Empty_pb2.pyi +36 -0
- streamlit/proto/Exception_pb2.py +27 -0
- streamlit/proto/Exception_pb2.pyi +72 -0
- streamlit/proto/Favicon_pb2.py +27 -0
- streamlit/proto/Favicon_pb2.pyi +40 -0
- streamlit/proto/FileUploader_pb2.py +28 -0
- streamlit/proto/FileUploader_pb2.pyi +78 -0
- streamlit/proto/ForwardMsg_pb2.py +53 -0
- streamlit/proto/ForwardMsg_pb2.pyi +293 -0
- streamlit/proto/GitInfo_pb2.py +29 -0
- streamlit/proto/GitInfo_pb2.pyi +83 -0
- streamlit/proto/GraphVizChart_pb2.py +27 -0
- streamlit/proto/GraphVizChart_pb2.pyi +53 -0
- streamlit/proto/Heading_pb2.py +27 -0
- streamlit/proto/Heading_pb2.pyi +56 -0
- streamlit/proto/Html_pb2.py +27 -0
- streamlit/proto/Html_pb2.pyi +42 -0
- streamlit/proto/IFrame_pb2.py +27 -0
- streamlit/proto/IFrame_pb2.pyi +59 -0
- streamlit/proto/Image_pb2.py +29 -0
- streamlit/proto/Image_pb2.pyi +84 -0
- streamlit/proto/Json_pb2.py +27 -0
- streamlit/proto/Json_pb2.pyi +53 -0
- streamlit/proto/LabelVisibilityMessage_pb2.py +29 -0
- streamlit/proto/LabelVisibilityMessage_pb2.pyi +68 -0
- streamlit/proto/LinkButton_pb2.py +27 -0
- streamlit/proto/LinkButton_pb2.pyi +58 -0
- streamlit/proto/Logo_pb2.py +27 -0
- streamlit/proto/Logo_pb2.pyi +51 -0
- streamlit/proto/Markdown_pb2.py +29 -0
- streamlit/proto/Markdown_pb2.pyi +86 -0
- streamlit/proto/Metric_pb2.py +32 -0
- streamlit/proto/Metric_pb2.pyi +101 -0
- streamlit/proto/MetricsEvent_pb2.py +30 -0
- streamlit/proto/MetricsEvent_pb2.pyi +200 -0
- streamlit/proto/MultiSelect_pb2.py +28 -0
- streamlit/proto/MultiSelect_pb2.pyi +81 -0
- streamlit/proto/NamedDataSet_pb2.py +28 -0
- streamlit/proto/NamedDataSet_pb2.pyi +59 -0
- streamlit/proto/Navigation_pb2.py +30 -0
- streamlit/proto/Navigation_pb2.pyi +84 -0
- streamlit/proto/NewSession_pb2.py +51 -0
- streamlit/proto/NewSession_pb2.pyi +481 -0
- streamlit/proto/NumberInput_pb2.py +30 -0
- streamlit/proto/NumberInput_pb2.pyi +121 -0
- streamlit/proto/PageConfig_pb2.py +33 -0
- streamlit/proto/PageConfig_pb2.pyi +126 -0
- streamlit/proto/PageInfo_pb2.py +27 -0
- streamlit/proto/PageInfo_pb2.pyi +43 -0
- streamlit/proto/PageLink_pb2.py +27 -0
- streamlit/proto/PageLink_pb2.pyi +63 -0
- streamlit/proto/PageNotFound_pb2.py +27 -0
- streamlit/proto/PageNotFound_pb2.pyi +42 -0
- streamlit/proto/PageProfile_pb2.py +31 -0
- streamlit/proto/PageProfile_pb2.pyi +127 -0
- streamlit/proto/PagesChanged_pb2.py +28 -0
- streamlit/proto/PagesChanged_pb2.pyi +48 -0
- streamlit/proto/ParentMessage_pb2.py +27 -0
- streamlit/proto/ParentMessage_pb2.pyi +46 -0
- streamlit/proto/PlotlyChart_pb2.py +31 -0
- streamlit/proto/PlotlyChart_pb2.pyi +131 -0
- streamlit/proto/Progress_pb2.py +27 -0
- streamlit/proto/Progress_pb2.pyi +43 -0
- streamlit/proto/Radio_pb2.py +28 -0
- streamlit/proto/Radio_pb2.pyi +84 -0
- streamlit/proto/RootContainer_pb2.py +27 -0
- streamlit/proto/RootContainer_pb2.pyi +56 -0
- streamlit/proto/Selectbox_pb2.py +28 -0
- streamlit/proto/Selectbox_pb2.pyi +80 -0
- streamlit/proto/SessionEvent_pb2.py +28 -0
- streamlit/proto/SessionEvent_pb2.pyi +62 -0
- streamlit/proto/SessionStatus_pb2.py +27 -0
- streamlit/proto/SessionStatus_pb2.pyi +57 -0
- streamlit/proto/Skeleton_pb2.py +29 -0
- streamlit/proto/Skeleton_pb2.pyi +71 -0
- streamlit/proto/Slider_pb2.py +32 -0
- streamlit/proto/Slider_pb2.pyi +142 -0
- streamlit/proto/Snow_pb2.py +27 -0
- streamlit/proto/Snow_pb2.pyi +43 -0
- streamlit/proto/Spinner_pb2.py +27 -0
- streamlit/proto/Spinner_pb2.pyi +49 -0
- streamlit/proto/TextArea_pb2.py +28 -0
- streamlit/proto/TextArea_pb2.pyi +80 -0
- streamlit/proto/TextInput_pb2.py +30 -0
- streamlit/proto/TextInput_pb2.pyi +107 -0
- streamlit/proto/Text_pb2.py +27 -0
- streamlit/proto/Text_pb2.pyi +46 -0
- streamlit/proto/TimeInput_pb2.py +28 -0
- streamlit/proto/TimeInput_pb2.pyi +74 -0
- streamlit/proto/Toast_pb2.py +27 -0
- streamlit/proto/Toast_pb2.pyi +45 -0
- streamlit/proto/VegaLiteChart_pb2.py +29 -0
- streamlit/proto/VegaLiteChart_pb2.pyi +71 -0
- streamlit/proto/Video_pb2.py +31 -0
- streamlit/proto/Video_pb2.pyi +117 -0
- streamlit/proto/WidgetStates_pb2.py +31 -0
- streamlit/proto/WidgetStates_pb2.pyi +126 -0
- streamlit/proto/__init__.py +15 -0
- streamlit/proto/openmetrics_data_model_pb2.py +60 -0
- streamlit/proto/openmetrics_data_model_pb2.pyi +522 -0
- streamlit/py.typed +0 -0
- streamlit/runtime/__init__.py +50 -0
- streamlit/runtime/app_session.py +982 -0
- streamlit/runtime/caching/__init__.py +98 -0
- streamlit/runtime/caching/cache_data_api.py +665 -0
- streamlit/runtime/caching/cache_errors.py +142 -0
- streamlit/runtime/caching/cache_resource_api.py +527 -0
- streamlit/runtime/caching/cache_type.py +33 -0
- streamlit/runtime/caching/cache_utils.py +523 -0
- streamlit/runtime/caching/cached_message_replay.py +290 -0
- streamlit/runtime/caching/hashing.py +637 -0
- streamlit/runtime/caching/legacy_cache_api.py +169 -0
- streamlit/runtime/caching/storage/__init__.py +29 -0
- streamlit/runtime/caching/storage/cache_storage_protocol.py +239 -0
- streamlit/runtime/caching/storage/dummy_cache_storage.py +60 -0
- streamlit/runtime/caching/storage/in_memory_cache_storage_wrapper.py +145 -0
- streamlit/runtime/caching/storage/local_disk_cache_storage.py +223 -0
- streamlit/runtime/connection_factory.py +436 -0
- streamlit/runtime/context.py +280 -0
- streamlit/runtime/credentials.py +364 -0
- streamlit/runtime/forward_msg_cache.py +296 -0
- streamlit/runtime/forward_msg_queue.py +240 -0
- streamlit/runtime/fragment.py +477 -0
- streamlit/runtime/media_file_manager.py +234 -0
- streamlit/runtime/media_file_storage.py +143 -0
- streamlit/runtime/memory_media_file_storage.py +181 -0
- streamlit/runtime/memory_session_storage.py +77 -0
- streamlit/runtime/memory_uploaded_file_manager.py +138 -0
- streamlit/runtime/metrics_util.py +486 -0
- streamlit/runtime/pages_manager.py +165 -0
- streamlit/runtime/runtime.py +792 -0
- streamlit/runtime/runtime_util.py +106 -0
- streamlit/runtime/script_data.py +46 -0
- streamlit/runtime/scriptrunner/__init__.py +38 -0
- streamlit/runtime/scriptrunner/exec_code.py +159 -0
- streamlit/runtime/scriptrunner/magic.py +273 -0
- streamlit/runtime/scriptrunner/magic_funcs.py +32 -0
- streamlit/runtime/scriptrunner/script_cache.py +89 -0
- streamlit/runtime/scriptrunner/script_runner.py +756 -0
- streamlit/runtime/scriptrunner_utils/__init__.py +19 -0
- streamlit/runtime/scriptrunner_utils/exceptions.py +48 -0
- streamlit/runtime/scriptrunner_utils/script_requests.py +307 -0
- streamlit/runtime/scriptrunner_utils/script_run_context.py +287 -0
- streamlit/runtime/secrets.py +534 -0
- streamlit/runtime/session_manager.py +394 -0
- streamlit/runtime/state/__init__.py +41 -0
- streamlit/runtime/state/common.py +191 -0
- streamlit/runtime/state/query_params.py +205 -0
- streamlit/runtime/state/query_params_proxy.py +218 -0
- streamlit/runtime/state/safe_session_state.py +138 -0
- streamlit/runtime/state/session_state.py +772 -0
- streamlit/runtime/state/session_state_proxy.py +153 -0
- streamlit/runtime/state/widgets.py +135 -0
- streamlit/runtime/stats.py +109 -0
- streamlit/runtime/uploaded_file_manager.py +148 -0
- streamlit/runtime/websocket_session_manager.py +167 -0
- streamlit/source_util.py +98 -0
- streamlit/static/favicon.png +0 -0
- streamlit/static/index.html +61 -0
- streamlit/static/static/css/index.Bmkmz40k.css +1 -0
- streamlit/static/static/css/index.DpJG_94W.css +1 -0
- streamlit/static/static/css/index.DzuxGC_t.css +1 -0
- streamlit/static/static/js/FileDownload.esm.Bp9m5jrx.js +1 -0
- streamlit/static/static/js/FileHelper.D_3pbilj.js +5 -0
- streamlit/static/static/js/FormClearHelper.Ct2rwLXo.js +1 -0
- streamlit/static/static/js/Hooks.BKdzj5MJ.js +1 -0
- streamlit/static/static/js/InputInstructions.DB3QGNJP.js +1 -0
- streamlit/static/static/js/ProgressBar.D40A5xc2.js +2 -0
- streamlit/static/static/js/RenderInPortalIfExists.DLUCooTN.js +1 -0
- streamlit/static/static/js/Toolbar.BiGGIQun.js +1 -0
- streamlit/static/static/js/UploadFileInfo.C-jY39rj.js +1 -0
- streamlit/static/static/js/base-input.CQBQT24M.js +4 -0
- streamlit/static/static/js/checkbox.Buj8gd_M.js +9 -0
- streamlit/static/static/js/createDownloadLinkElement.DZMwyjvU.js +1 -0
- streamlit/static/static/js/createSuper.CesK3I23.js +1 -0
- streamlit/static/static/js/data-grid-overlay-editor.B69OOFM4.js +1 -0
- streamlit/static/static/js/downloader.BZQhlBNT.js +1 -0
- streamlit/static/static/js/es6.D9Zhqujy.js +2 -0
- streamlit/static/static/js/iframeResizer.contentWindow.CAzcBpCC.js +1 -0
- streamlit/static/static/js/index.08vcOOvb.js +1 -0
- streamlit/static/static/js/index.0uqKfJUS.js +1 -0
- streamlit/static/static/js/index.B02M5u69.js +203 -0
- streamlit/static/static/js/index.B7mcZKMx.js +1 -0
- streamlit/static/static/js/index.BAQDHFA_.js +1 -0
- streamlit/static/static/js/index.BI60cMVr.js +2 -0
- streamlit/static/static/js/index.BLug2inK.js +1 -0
- streamlit/static/static/js/index.BM6TMY8g.js +2 -0
- streamlit/static/static/js/index.BZ9p1t7G.js +1 -0
- streamlit/static/static/js/index.BZqa87a1.js +2 -0
- streamlit/static/static/js/index.BcsRUzZZ.js +1 -0
- streamlit/static/static/js/index.BgVMiY_P.js +197 -0
- streamlit/static/static/js/index.BtuGy7By.js +6 -0
- streamlit/static/static/js/index.BuDuBmrs.js +1 -0
- streamlit/static/static/js/index.BvXU2oKV.js +1 -0
- streamlit/static/static/js/index.BxcwPacT.js +73 -0
- streamlit/static/static/js/index.CWX8KB81.js +1 -0
- streamlit/static/static/js/index.CXzZTo_q.js +1 -0
- streamlit/static/static/js/index.CcRWp_KL.js +1 -0
- streamlit/static/static/js/index.Cd-_xe55.js +3 -0
- streamlit/static/static/js/index.CdG2PXln.js +4537 -0
- streamlit/static/static/js/index.CjXvXmcP.js +1 -0
- streamlit/static/static/js/index.D1HZENZx.js +776 -0
- streamlit/static/static/js/index.D21Efo64.js +1617 -0
- streamlit/static/static/js/index.D9WgGVBx.js +7 -0
- streamlit/static/static/js/index.DEcsHtvb.js +12 -0
- streamlit/static/static/js/index.DFeMfr_K.js +1 -0
- streamlit/static/static/js/index.DHFBoItz.js +1 -0
- streamlit/static/static/js/index.D_PrBKnJ.js +3 -0
- streamlit/static/static/js/index.DmuRkekN.js +3855 -0
- streamlit/static/static/js/index.Do6eY8sf.js +1 -0
- streamlit/static/static/js/index.Dz3lP2P-.js +1 -0
- streamlit/static/static/js/index.Dz_UqF-s.js +1 -0
- streamlit/static/static/js/index.GkSUsPhJ.js +1 -0
- streamlit/static/static/js/index.H1U1IC_d.js +3 -0
- streamlit/static/static/js/index.g6p_4DPr.js +1 -0
- streamlit/static/static/js/index.g9x_GKss.js +1 -0
- streamlit/static/static/js/index.zo9jm08y.js +1 -0
- streamlit/static/static/js/input.DnaFglHq.js +2 -0
- streamlit/static/static/js/inputUtils.CQWz5UKz.js +1 -0
- streamlit/static/static/js/memory.Crb9x4-F.js +1 -0
- streamlit/static/static/js/mergeWith.ouAz0sK3.js +1 -0
- streamlit/static/static/js/number-overlay-editor._UaN-O48.js +9 -0
- streamlit/static/static/js/possibleConstructorReturn.CtGjGFHz.js +1 -0
- streamlit/static/static/js/sandbox.CBybYOhV.js +1 -0
- streamlit/static/static/js/sprintf.D7DtBTRn.js +1 -0
- streamlit/static/static/js/textarea.Cb_uJt5U.js +2 -0
- streamlit/static/static/js/threshold.DjX0wlsa.js +1 -0
- streamlit/static/static/js/timepicker.DKT7pfoF.js +4 -0
- streamlit/static/static/js/timer.CAwTRJ_g.js +1 -0
- streamlit/static/static/js/toConsumableArray.05Ikp13-.js +3 -0
- streamlit/static/static/js/uniqueId.D2FMWUEI.js +1 -0
- streamlit/static/static/js/useBasicWidgetState.urnZLANY.js +1 -0
- streamlit/static/static/js/useOnInputChange.BOKIIdJ1.js +1 -0
- streamlit/static/static/js/value.CgPGBV_l.js +1 -0
- streamlit/static/static/js/withFullScreenWrapper.C_N8J0Xx.js +1 -0
- streamlit/static/static/media/KaTeX_AMS-Regular.BQhdFMY1.woff2 +0 -0
- streamlit/static/static/media/KaTeX_AMS-Regular.DMm9YOAa.woff +0 -0
- streamlit/static/static/media/KaTeX_AMS-Regular.DRggAlZN.ttf +0 -0
- streamlit/static/static/media/KaTeX_Caligraphic-Bold.ATXxdsX0.ttf +0 -0
- streamlit/static/static/media/KaTeX_Caligraphic-Bold.BEiXGLvX.woff +0 -0
- streamlit/static/static/media/KaTeX_Caligraphic-Bold.Dq_IR9rO.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Caligraphic-Regular.CTRA-rTL.woff +0 -0
- streamlit/static/static/media/KaTeX_Caligraphic-Regular.Di6jR-x-.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Caligraphic-Regular.wX97UBjC.ttf +0 -0
- streamlit/static/static/media/KaTeX_Fraktur-Bold.BdnERNNW.ttf +0 -0
- streamlit/static/static/media/KaTeX_Fraktur-Bold.BsDP51OF.woff +0 -0
- streamlit/static/static/media/KaTeX_Fraktur-Bold.CL6g_b3V.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Fraktur-Regular.CB_wures.ttf +0 -0
- streamlit/static/static/media/KaTeX_Fraktur-Regular.CTYiF6lA.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Fraktur-Regular.Dxdc4cR9.woff +0 -0
- streamlit/static/static/media/KaTeX_Main-Bold.Cx986IdX.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Main-Bold.Jm3AIy58.woff +0 -0
- streamlit/static/static/media/KaTeX_Main-Bold.waoOVXN0.ttf +0 -0
- streamlit/static/static/media/KaTeX_Main-BoldItalic.DxDJ3AOS.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Main-BoldItalic.DzxPMmG6.ttf +0 -0
- streamlit/static/static/media/KaTeX_Main-BoldItalic.SpSLRI95.woff +0 -0
- streamlit/static/static/media/KaTeX_Main-Italic.3WenGoN9.ttf +0 -0
- streamlit/static/static/media/KaTeX_Main-Italic.BMLOBm91.woff +0 -0
- streamlit/static/static/media/KaTeX_Main-Italic.NWA7e6Wa.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Main-Regular.B22Nviop.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Main-Regular.Dr94JaBh.woff +0 -0
- streamlit/static/static/media/KaTeX_Main-Regular.ypZvNtVU.ttf +0 -0
- streamlit/static/static/media/KaTeX_Math-BoldItalic.B3XSjfu4.ttf +0 -0
- streamlit/static/static/media/KaTeX_Math-BoldItalic.CZnvNsCZ.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Math-BoldItalic.iY-2wyZ7.woff +0 -0
- streamlit/static/static/media/KaTeX_Math-Italic.DA0__PXp.woff +0 -0
- streamlit/static/static/media/KaTeX_Math-Italic.flOr_0UB.ttf +0 -0
- streamlit/static/static/media/KaTeX_Math-Italic.t53AETM-.woff2 +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Bold.CFMepnvq.ttf +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Bold.D1sUS0GD.woff2 +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Bold.DbIhKOiC.woff +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Italic.C3H0VqGB.woff2 +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Italic.DN2j7dab.woff +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Italic.YYjJ1zSn.ttf +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Regular.BNo7hRIc.ttf +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Regular.CS6fqUqJ.woff +0 -0
- streamlit/static/static/media/KaTeX_SansSerif-Regular.DDBCnlJ7.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Script-Regular.C5JkGWo-.ttf +0 -0
- streamlit/static/static/media/KaTeX_Script-Regular.D3wIWfF6.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Script-Regular.D5yQViql.woff +0 -0
- streamlit/static/static/media/KaTeX_Size1-Regular.C195tn64.woff +0 -0
- streamlit/static/static/media/KaTeX_Size1-Regular.Dbsnue_I.ttf +0 -0
- streamlit/static/static/media/KaTeX_Size1-Regular.mCD8mA8B.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Size2-Regular.B7gKUWhC.ttf +0 -0
- streamlit/static/static/media/KaTeX_Size2-Regular.Dy4dx90m.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Size2-Regular.oD1tc_U0.woff +0 -0
- streamlit/static/static/media/KaTeX_Size3-Regular.CTq5MqoE.woff +0 -0
- streamlit/static/static/media/KaTeX_Size3-Regular.DgpXs0kz.ttf +0 -0
- streamlit/static/static/media/KaTeX_Size4-Regular.BF-4gkZK.woff +0 -0
- streamlit/static/static/media/KaTeX_Size4-Regular.DWFBv043.ttf +0 -0
- streamlit/static/static/media/KaTeX_Size4-Regular.Dl5lxZxV.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Typewriter-Regular.C0xS9mPB.woff +0 -0
- streamlit/static/static/media/KaTeX_Typewriter-Regular.CO6r4hn1.woff2 +0 -0
- streamlit/static/static/media/KaTeX_Typewriter-Regular.D3Ib7_Hf.ttf +0 -0
- streamlit/static/static/media/MaterialSymbols-Rounded.DcZbplWk.woff2 +0 -0
- streamlit/static/static/media/SourceCodePro-Bold.CFEfr7-q.woff2 +0 -0
- streamlit/static/static/media/SourceCodePro-BoldItalic.C-LkFXxa.woff2 +0 -0
- streamlit/static/static/media/SourceCodePro-Italic.CxFOx7N-.woff2 +0 -0
- streamlit/static/static/media/SourceCodePro-Regular.CBOlD63d.woff2 +0 -0
- streamlit/static/static/media/SourceCodePro-SemiBold.CFHwW3Wd.woff2 +0 -0
- streamlit/static/static/media/SourceCodePro-SemiBoldItalic.Cg2yRu82.woff2 +0 -0
- streamlit/static/static/media/SourceSansPro-Bold.-6c9oR8J.woff2 +0 -0
- streamlit/static/static/media/SourceSansPro-BoldItalic.DmM_grLY.woff2 +0 -0
- streamlit/static/static/media/SourceSansPro-Italic.I1ipWe7Q.woff2 +0 -0
- streamlit/static/static/media/SourceSansPro-Regular.DZLUzqI4.woff2 +0 -0
- streamlit/static/static/media/SourceSansPro-SemiBold.sKQIyTMz.woff2 +0 -0
- streamlit/static/static/media/SourceSansPro-SemiBoldItalic.C0wP0icr.woff2 +0 -0
- streamlit/static/static/media/SourceSerifPro-Bold.8TUnKj4x.woff2 +0 -0
- streamlit/static/static/media/SourceSerifPro-BoldItalic.CBVO7Ve7.woff2 +0 -0
- streamlit/static/static/media/SourceSerifPro-Italic.DkFgL2HZ.woff2 +0 -0
- streamlit/static/static/media/SourceSerifPro-Regular.CNJNET2S.woff2 +0 -0
- streamlit/static/static/media/SourceSerifPro-SemiBold.CHyh9GC5.woff2 +0 -0
- streamlit/static/static/media/SourceSerifPro-SemiBoldItalic.CBtz8sWN.woff2 +0 -0
- streamlit/static/static/media/balloon-0.Czj7AKwE.png +0 -0
- streamlit/static/static/media/balloon-1.CNvFFrND.png +0 -0
- streamlit/static/static/media/balloon-2.DTvC6B1t.png +0 -0
- streamlit/static/static/media/balloon-3.CgSk4tbL.png +0 -0
- streamlit/static/static/media/balloon-4.mbtFrzxf.png +0 -0
- streamlit/static/static/media/balloon-5.CSwkUfRA.png +0 -0
- streamlit/static/static/media/fireworks.B4d-_KUe.gif +0 -0
- streamlit/static/static/media/flake-0.DgWaVvm5.png +0 -0
- streamlit/static/static/media/flake-1.B2r5AHMK.png +0 -0
- streamlit/static/static/media/flake-2.BnWSExPC.png +0 -0
- streamlit/static/static/media/snowflake.JU2jBHL8.svg +11 -0
- streamlit/string_util.py +203 -0
- streamlit/temporary_directory.py +56 -0
- streamlit/testing/__init__.py +13 -0
- streamlit/testing/v1/__init__.py +17 -0
- streamlit/testing/v1/app_test.py +1050 -0
- streamlit/testing/v1/element_tree.py +2083 -0
- streamlit/testing/v1/local_script_runner.py +180 -0
- streamlit/testing/v1/util.py +53 -0
- streamlit/time_util.py +75 -0
- streamlit/type_util.py +460 -0
- streamlit/url_util.py +122 -0
- streamlit/user_info.py +519 -0
- streamlit/util.py +72 -0
- streamlit/vendor/__init__.py +0 -0
- streamlit/vendor/pympler/__init__.py +0 -0
- streamlit/vendor/pympler/asizeof.py +2869 -0
- streamlit/version.py +18 -0
- streamlit/watcher/__init__.py +28 -0
- streamlit/watcher/event_based_path_watcher.py +406 -0
- streamlit/watcher/folder_black_list.py +82 -0
- streamlit/watcher/local_sources_watcher.py +233 -0
- streamlit/watcher/path_watcher.py +185 -0
- streamlit/watcher/polling_path_watcher.py +124 -0
- streamlit/watcher/util.py +207 -0
- streamlit/web/__init__.py +13 -0
- streamlit/web/bootstrap.py +353 -0
- streamlit/web/cache_storage_manager_config.py +38 -0
- streamlit/web/cli.py +369 -0
- streamlit/web/server/__init__.py +26 -0
- streamlit/web/server/app_static_file_handler.py +93 -0
- streamlit/web/server/authlib_tornado_integration.py +60 -0
- streamlit/web/server/browser_websocket_handler.py +246 -0
- streamlit/web/server/component_request_handler.py +116 -0
- streamlit/web/server/media_file_handler.py +141 -0
- streamlit/web/server/oauth_authlib_routes.py +176 -0
- streamlit/web/server/oidc_mixin.py +108 -0
- streamlit/web/server/routes.py +295 -0
- streamlit/web/server/server.py +479 -0
- streamlit/web/server/server_util.py +161 -0
- streamlit/web/server/stats_request_handler.py +95 -0
- streamlit/web/server/upload_file_request_handler.py +137 -0
- streamlit/web/server/websocket_headers.py +56 -0
- streamlit_nightly-1.43.2.dev20250307.data/scripts/streamlit.cmd +16 -0
- streamlit_nightly-1.43.2.dev20250307.dist-info/METADATA +207 -0
- streamlit_nightly-1.43.2.dev20250307.dist-info/RECORD +563 -0
- streamlit_nightly-1.43.2.dev20250307.dist-info/WHEEL +5 -0
- streamlit_nightly-1.43.2.dev20250307.dist-info/entry_points.txt +2 -0
- streamlit_nightly-1.43.2.dev20250307.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1416 @@
|
|
1
|
+
# Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2025)
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
"""A bunch of useful utilities for dealing with dataframes."""
|
16
|
+
|
17
|
+
from __future__ import annotations
|
18
|
+
|
19
|
+
import contextlib
|
20
|
+
import dataclasses
|
21
|
+
import inspect
|
22
|
+
import math
|
23
|
+
import re
|
24
|
+
from collections import ChainMap, UserDict, UserList, deque
|
25
|
+
from collections.abc import ItemsView, Iterable, Mapping, Sequence
|
26
|
+
from enum import Enum, EnumMeta, auto
|
27
|
+
from types import MappingProxyType
|
28
|
+
from typing import (
|
29
|
+
TYPE_CHECKING,
|
30
|
+
Any,
|
31
|
+
Final,
|
32
|
+
Protocol,
|
33
|
+
TypeVar,
|
34
|
+
Union,
|
35
|
+
cast,
|
36
|
+
runtime_checkable,
|
37
|
+
)
|
38
|
+
|
39
|
+
from typing_extensions import TypeAlias, TypeGuard
|
40
|
+
|
41
|
+
from streamlit import config, errors, logger, string_util
|
42
|
+
from streamlit.type_util import (
|
43
|
+
CustomDict,
|
44
|
+
NumpyShape,
|
45
|
+
has_callable_attr,
|
46
|
+
is_custom_dict,
|
47
|
+
is_dataclass_instance,
|
48
|
+
is_list_like,
|
49
|
+
is_namedtuple,
|
50
|
+
is_pydantic_model,
|
51
|
+
is_type,
|
52
|
+
is_version_less_than,
|
53
|
+
)
|
54
|
+
|
55
|
+
if TYPE_CHECKING:
|
56
|
+
import numpy as np
|
57
|
+
import pyarrow as pa
|
58
|
+
from pandas import DataFrame, Index, Series
|
59
|
+
from pandas.core.indexing import _iLocIndexer
|
60
|
+
from pandas.io.formats.style import Styler
|
61
|
+
|
62
|
+
_LOGGER: Final = logger.get_logger(__name__)
|
63
|
+
|
64
|
+
|
65
|
+
# Maximum number of rows to request from an unevaluated (out-of-core) dataframe
|
66
|
+
_MAX_UNEVALUATED_DF_ROWS = 10000
|
67
|
+
|
68
|
+
_PANDAS_DATA_OBJECT_TYPE_RE: Final = re.compile(r"^pandas.*$")
|
69
|
+
|
70
|
+
_DASK_DATAFRAME: Final = "dask.dataframe.dask_expr._collection.DataFrame"
|
71
|
+
_DASK_SERIES: Final = "dask.dataframe.dask_expr._collection.Series"
|
72
|
+
_DASK_INDEX: Final = "dask.dataframe.dask_expr._collection.Index"
|
73
|
+
# Dask removed the old legacy types, to support older and newer versions
|
74
|
+
# we are still supporting the old an new types.
|
75
|
+
_DASK_DATAFRAME_LEGACY: Final = "dask.dataframe.core.DataFrame"
|
76
|
+
_DASK_SERIES_LEGACY: Final = "dask.dataframe.core.Series"
|
77
|
+
_DASK_INDEX_LEGACY: Final = "dask.dataframe.core.Index"
|
78
|
+
_DUCKDB_RELATION: Final = "duckdb.duckdb.DuckDBPyRelation"
|
79
|
+
_MODIN_DF_TYPE_STR: Final = "modin.pandas.dataframe.DataFrame"
|
80
|
+
_MODIN_SERIES_TYPE_STR: Final = "modin.pandas.series.Series"
|
81
|
+
_PANDAS_STYLER_TYPE_STR: Final = "pandas.io.formats.style.Styler"
|
82
|
+
_POLARS_DATAFRAME: Final = "polars.dataframe.frame.DataFrame"
|
83
|
+
_POLARS_LAZYFRAME: Final = "polars.lazyframe.frame.LazyFrame"
|
84
|
+
_POLARS_SERIES: Final = "polars.series.series.Series"
|
85
|
+
_PYSPARK_DF_TYPE_STR: Final = "pyspark.sql.dataframe.DataFrame"
|
86
|
+
_PYSPARK_CONNECT_DF_TYPE_STR: Final = "pyspark.sql.connect.dataframe.DataFrame"
|
87
|
+
_RAY_DATASET: Final = "ray.data.dataset.Dataset"
|
88
|
+
_RAY_MATERIALIZED_DATASET: Final = "ray.data.dataset.MaterializedDataset"
|
89
|
+
_SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.DataFrame"
|
90
|
+
_SNOWPANDAS_INDEX_TYPE_STR: Final = (
|
91
|
+
"snowflake.snowpark.modin.plugin.extensions.index.Index"
|
92
|
+
)
|
93
|
+
_SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
|
94
|
+
_SNOWPARK_DF_ROW_TYPE_STR: Final = "snowflake.snowpark.row.Row"
|
95
|
+
_SNOWPARK_DF_TYPE_STR: Final = "snowflake.snowpark.dataframe.DataFrame"
|
96
|
+
_SNOWPARK_TABLE_TYPE_STR: Final = "snowflake.snowpark.table.Table"
|
97
|
+
_XARRAY_DATASET_TYPE_STR: Final = "xarray.core.dataset.Dataset"
|
98
|
+
_XARRAY_DATA_ARRAY_TYPE_STR: Final = "xarray.core.dataarray.DataArray"
|
99
|
+
|
100
|
+
V_co = TypeVar(
|
101
|
+
"V_co",
|
102
|
+
covariant=True, # https://peps.python.org/pep-0484/#covariance-and-contravariance
|
103
|
+
)
|
104
|
+
|
105
|
+
|
106
|
+
@runtime_checkable
|
107
|
+
class DBAPICursor(Protocol):
|
108
|
+
"""Protocol for DBAPI 2.0 Cursor objects (PEP 249).
|
109
|
+
|
110
|
+
This is a simplified version of the DBAPI Cursor protocol
|
111
|
+
that only contains the methods that are relevant or used for
|
112
|
+
our DB API Integration.
|
113
|
+
|
114
|
+
Specification: https://peps.python.org/pep-0249/
|
115
|
+
Inspired by: https://github.com/python/typeshed/blob/main/stdlib/_typeshed/dbapi.pyi
|
116
|
+
"""
|
117
|
+
|
118
|
+
@property
|
119
|
+
def description(
|
120
|
+
self,
|
121
|
+
) -> (
|
122
|
+
Sequence[
|
123
|
+
tuple[
|
124
|
+
str,
|
125
|
+
Any | None,
|
126
|
+
int | None,
|
127
|
+
int | None,
|
128
|
+
int | None,
|
129
|
+
int | None,
|
130
|
+
bool | None,
|
131
|
+
]
|
132
|
+
]
|
133
|
+
| None
|
134
|
+
): ...
|
135
|
+
def fetchmany(self, size: int = ..., /) -> Sequence[Sequence[Any]]: ...
|
136
|
+
def fetchall(self) -> Sequence[Sequence[Any]]: ...
|
137
|
+
|
138
|
+
|
139
|
+
class DataFrameGenericAlias(Protocol[V_co]):
|
140
|
+
"""Technically not a GenericAlias, but serves the same purpose in
|
141
|
+
OptionSequence below, in that it is a type which admits DataFrame,
|
142
|
+
but is generic. This allows OptionSequence to be a fully generic type,
|
143
|
+
significantly increasing its usefulness.
|
144
|
+
|
145
|
+
We can't use types.GenericAlias, as it is only available from python>=3.9,
|
146
|
+
and isn't easily back-ported.
|
147
|
+
"""
|
148
|
+
|
149
|
+
@property
|
150
|
+
def iloc(self) -> _iLocIndexer: ...
|
151
|
+
|
152
|
+
|
153
|
+
class PandasCompatible(Protocol):
|
154
|
+
"""Protocol for Pandas compatible objects that have a `to_pandas` method."""
|
155
|
+
|
156
|
+
def to_pandas(self) -> DataFrame | Series: ...
|
157
|
+
|
158
|
+
|
159
|
+
class DataframeInterchangeCompatible(Protocol):
|
160
|
+
"""Protocol for objects support the dataframe-interchange protocol.
|
161
|
+
|
162
|
+
https://data-apis.org/dataframe-protocol/latest/index.html
|
163
|
+
"""
|
164
|
+
|
165
|
+
def __dataframe__(self, allow_copy: bool) -> Any: ...
|
166
|
+
|
167
|
+
|
168
|
+
OptionSequence: TypeAlias = Union[
|
169
|
+
Iterable[V_co],
|
170
|
+
DataFrameGenericAlias[V_co],
|
171
|
+
PandasCompatible,
|
172
|
+
DataframeInterchangeCompatible,
|
173
|
+
]
|
174
|
+
|
175
|
+
# Various data types supported by our dataframe processing
|
176
|
+
# used for commands like `st.dataframe`, `st.table`, `st.map`,
|
177
|
+
# st.line_chart`...
|
178
|
+
Data: TypeAlias = Union[
|
179
|
+
"DataFrame",
|
180
|
+
"Series",
|
181
|
+
"Styler",
|
182
|
+
"Index",
|
183
|
+
"pa.Table",
|
184
|
+
"pa.Array",
|
185
|
+
"np.ndarray[Any, np.dtype[Any]]",
|
186
|
+
Iterable[Any],
|
187
|
+
"Mapping[Any, Any]",
|
188
|
+
DBAPICursor,
|
189
|
+
PandasCompatible,
|
190
|
+
DataframeInterchangeCompatible,
|
191
|
+
CustomDict,
|
192
|
+
None,
|
193
|
+
]
|
194
|
+
|
195
|
+
|
196
|
+
class DataFormat(Enum):
|
197
|
+
"""DataFormat is used to determine the format of the data."""
|
198
|
+
|
199
|
+
UNKNOWN = auto()
|
200
|
+
EMPTY = auto() # None
|
201
|
+
|
202
|
+
COLUMN_INDEX_MAPPING = auto() # {column: {index: value}}
|
203
|
+
COLUMN_SERIES_MAPPING = auto() # {column: Series(values)}
|
204
|
+
COLUMN_VALUE_MAPPING = auto() # {column: List[values]}
|
205
|
+
DASK_OBJECT = auto() # dask.dataframe.core.DataFrame, Series, Index
|
206
|
+
DBAPI_CURSOR = auto() # DBAPI Cursor (PEP 249)
|
207
|
+
DUCKDB_RELATION = auto() # DuckDB Relation
|
208
|
+
KEY_VALUE_DICT = auto() # {index: value}
|
209
|
+
LIST_OF_RECORDS = auto() # List[Dict[str, Scalar]]
|
210
|
+
LIST_OF_ROWS = auto() # List[List[Scalar]]
|
211
|
+
LIST_OF_VALUES = auto() # List[Scalar]
|
212
|
+
MODIN_OBJECT = auto() # Modin DataFrame, Series
|
213
|
+
NUMPY_LIST = auto() # np.array[Scalar]
|
214
|
+
NUMPY_MATRIX = auto() # np.array[List[Scalar]]
|
215
|
+
PANDAS_ARRAY = auto() # pd.array
|
216
|
+
PANDAS_DATAFRAME = auto() # pd.DataFrame
|
217
|
+
PANDAS_INDEX = auto() # pd.Index
|
218
|
+
PANDAS_SERIES = auto() # pd.Series
|
219
|
+
PANDAS_STYLER = auto() # pandas Styler
|
220
|
+
POLARS_DATAFRAME = auto() # polars.dataframe.frame.DataFrame
|
221
|
+
POLARS_LAZYFRAME = auto() # polars.lazyframe.frame.LazyFrame
|
222
|
+
POLARS_SERIES = auto() # polars.series.series.Series
|
223
|
+
PYARROW_ARRAY = auto() # pyarrow.Array
|
224
|
+
PYARROW_TABLE = auto() # pyarrow.Table
|
225
|
+
PYSPARK_OBJECT = auto() # pyspark.DataFrame
|
226
|
+
RAY_DATASET = auto() # ray.data.dataset.Dataset, MaterializedDataset
|
227
|
+
SET_OF_VALUES = auto() # Set[Scalar]
|
228
|
+
SNOWPANDAS_OBJECT = auto() # Snowpandas DataFrame, Series
|
229
|
+
SNOWPARK_OBJECT = auto() # Snowpark DataFrame, Table, List[Row]
|
230
|
+
TUPLE_OF_VALUES = auto() # Tuple[Scalar]
|
231
|
+
XARRAY_DATASET = auto() # xarray.Dataset
|
232
|
+
XARRAY_DATA_ARRAY = auto() # xarray.DataArray
|
233
|
+
|
234
|
+
|
235
|
+
def is_pyarrow_version_less_than(v: str) -> bool:
|
236
|
+
"""Return True if the current Pyarrow version is less than the input version.
|
237
|
+
|
238
|
+
Parameters
|
239
|
+
----------
|
240
|
+
v : str
|
241
|
+
Version string, e.g. "0.25.0"
|
242
|
+
|
243
|
+
Returns
|
244
|
+
-------
|
245
|
+
bool
|
246
|
+
|
247
|
+
|
248
|
+
Raises
|
249
|
+
------
|
250
|
+
InvalidVersion
|
251
|
+
If the version strings are not valid.
|
252
|
+
|
253
|
+
"""
|
254
|
+
import pyarrow as pa
|
255
|
+
|
256
|
+
return is_version_less_than(pa.__version__, v)
|
257
|
+
|
258
|
+
|
259
|
+
def is_pandas_version_less_than(v: str) -> bool:
|
260
|
+
"""Return True if the current Pandas version is less than the input version.
|
261
|
+
|
262
|
+
Parameters
|
263
|
+
----------
|
264
|
+
v : str
|
265
|
+
Version string, e.g. "0.25.0"
|
266
|
+
|
267
|
+
Returns
|
268
|
+
-------
|
269
|
+
bool
|
270
|
+
|
271
|
+
|
272
|
+
Raises
|
273
|
+
------
|
274
|
+
InvalidVersion
|
275
|
+
If the version strings are not valid.
|
276
|
+
"""
|
277
|
+
import pandas as pd
|
278
|
+
|
279
|
+
return is_version_less_than(pd.__version__, v)
|
280
|
+
|
281
|
+
|
282
|
+
def is_dataframe_like(obj: object) -> bool:
|
283
|
+
"""True if the object is a dataframe-like object.
|
284
|
+
|
285
|
+
This does not include basic collection types like list, dict, tuple, etc.
|
286
|
+
"""
|
287
|
+
|
288
|
+
# We exclude list and dict here since there are some cases where a list or dict is
|
289
|
+
# considered a dataframe-like object.
|
290
|
+
if obj is None or isinstance(obj, (tuple, set, str, bytes, int, float, bool)):
|
291
|
+
# Basic types are not considered dataframe-like, so we can
|
292
|
+
# return False early to avoid unnecessary checks.
|
293
|
+
return False
|
294
|
+
|
295
|
+
return determine_data_format(obj) in {
|
296
|
+
DataFormat.COLUMN_SERIES_MAPPING,
|
297
|
+
DataFormat.DASK_OBJECT,
|
298
|
+
DataFormat.DBAPI_CURSOR,
|
299
|
+
DataFormat.MODIN_OBJECT,
|
300
|
+
DataFormat.NUMPY_LIST,
|
301
|
+
DataFormat.NUMPY_MATRIX,
|
302
|
+
DataFormat.PANDAS_ARRAY,
|
303
|
+
DataFormat.PANDAS_DATAFRAME,
|
304
|
+
DataFormat.PANDAS_INDEX,
|
305
|
+
DataFormat.PANDAS_SERIES,
|
306
|
+
DataFormat.PANDAS_STYLER,
|
307
|
+
DataFormat.POLARS_DATAFRAME,
|
308
|
+
DataFormat.POLARS_LAZYFRAME,
|
309
|
+
DataFormat.POLARS_SERIES,
|
310
|
+
DataFormat.PYARROW_ARRAY,
|
311
|
+
DataFormat.PYARROW_TABLE,
|
312
|
+
DataFormat.PYSPARK_OBJECT,
|
313
|
+
DataFormat.RAY_DATASET,
|
314
|
+
DataFormat.SNOWPANDAS_OBJECT,
|
315
|
+
DataFormat.SNOWPARK_OBJECT,
|
316
|
+
DataFormat.XARRAY_DATASET,
|
317
|
+
DataFormat.XARRAY_DATA_ARRAY,
|
318
|
+
}
|
319
|
+
|
320
|
+
|
321
|
+
def is_unevaluated_data_object(obj: object) -> bool:
|
322
|
+
"""True if the object is one of the supported unevaluated data objects:
|
323
|
+
|
324
|
+
Currently supported objects are:
|
325
|
+
- Snowpark DataFrame / Table
|
326
|
+
- PySpark DataFrame
|
327
|
+
- Modin DataFrame / Series
|
328
|
+
- Snowpandas DataFrame / Series / Index
|
329
|
+
- Dask DataFrame / Series / Index
|
330
|
+
- Ray Dataset
|
331
|
+
- Polars LazyFrame
|
332
|
+
- Generator functions
|
333
|
+
- DB API 2.0 Cursor (PEP 249)
|
334
|
+
- DuckDB Relation (Relational API)
|
335
|
+
|
336
|
+
Unevaluated means that the data is not yet in the local memory.
|
337
|
+
Unevaluated data objects are treated differently from other data objects by only
|
338
|
+
requesting a subset of the data instead of loading all data into th memory
|
339
|
+
"""
|
340
|
+
return (
|
341
|
+
is_snowpark_data_object(obj)
|
342
|
+
or is_pyspark_data_object(obj)
|
343
|
+
or is_snowpandas_data_object(obj)
|
344
|
+
or is_modin_data_object(obj)
|
345
|
+
or is_ray_dataset(obj)
|
346
|
+
or is_polars_lazyframe(obj)
|
347
|
+
or is_dask_object(obj)
|
348
|
+
or is_duckdb_relation(obj)
|
349
|
+
or is_dbapi_cursor(obj)
|
350
|
+
or inspect.isgeneratorfunction(obj)
|
351
|
+
)
|
352
|
+
|
353
|
+
|
354
|
+
def is_pandas_data_object(obj: object) -> bool:
|
355
|
+
"""True if obj is a Pandas object (e.g. DataFrame, Series, Index, Styler, ...)."""
|
356
|
+
return is_type(obj, _PANDAS_DATA_OBJECT_TYPE_RE)
|
357
|
+
|
358
|
+
|
359
|
+
def is_snowpark_data_object(obj: object) -> bool:
|
360
|
+
"""True if obj is a Snowpark DataFrame or Table."""
|
361
|
+
return is_type(obj, _SNOWPARK_TABLE_TYPE_STR) or is_type(obj, _SNOWPARK_DF_TYPE_STR)
|
362
|
+
|
363
|
+
|
364
|
+
def is_snowpark_row_list(obj: object) -> bool:
|
365
|
+
"""True if obj is a list of snowflake.snowpark.row.Row."""
|
366
|
+
return (
|
367
|
+
isinstance(obj, list)
|
368
|
+
and len(obj) > 0
|
369
|
+
and is_type(obj[0], _SNOWPARK_DF_ROW_TYPE_STR)
|
370
|
+
and has_callable_attr(obj[0], "as_dict")
|
371
|
+
)
|
372
|
+
|
373
|
+
|
374
|
+
def is_pyspark_data_object(obj: object) -> bool:
|
375
|
+
"""True if obj is a PySpark or PySpark Connect dataframe"""
|
376
|
+
return (
|
377
|
+
is_type(obj, _PYSPARK_DF_TYPE_STR) or is_type(obj, _PYSPARK_CONNECT_DF_TYPE_STR)
|
378
|
+
) and has_callable_attr(obj, "toPandas")
|
379
|
+
|
380
|
+
|
381
|
+
def is_dask_object(obj: object) -> bool:
|
382
|
+
"""True if obj is a Dask DataFrame, Series, or Index."""
|
383
|
+
return (
|
384
|
+
is_type(obj, _DASK_DATAFRAME)
|
385
|
+
or is_type(obj, _DASK_DATAFRAME_LEGACY)
|
386
|
+
or is_type(obj, _DASK_SERIES)
|
387
|
+
or is_type(obj, _DASK_SERIES_LEGACY)
|
388
|
+
or is_type(obj, _DASK_INDEX)
|
389
|
+
or is_type(obj, _DASK_INDEX_LEGACY)
|
390
|
+
)
|
391
|
+
|
392
|
+
|
393
|
+
def is_modin_data_object(obj: object) -> bool:
|
394
|
+
"""True if obj is of Modin Dataframe or Series"""
|
395
|
+
return is_type(obj, _MODIN_DF_TYPE_STR) or is_type(obj, _MODIN_SERIES_TYPE_STR)
|
396
|
+
|
397
|
+
|
398
|
+
def is_snowpandas_data_object(obj: object) -> bool:
|
399
|
+
"""True if obj is a Snowpark Pandas DataFrame or Series."""
|
400
|
+
return (
|
401
|
+
is_type(obj, _SNOWPANDAS_DF_TYPE_STR)
|
402
|
+
or is_type(obj, _SNOWPANDAS_SERIES_TYPE_STR)
|
403
|
+
or is_type(obj, _SNOWPANDAS_INDEX_TYPE_STR)
|
404
|
+
)
|
405
|
+
|
406
|
+
|
407
|
+
def is_polars_dataframe(obj: object) -> bool:
|
408
|
+
"""True if obj is a Polars Dataframe."""
|
409
|
+
return is_type(obj, _POLARS_DATAFRAME)
|
410
|
+
|
411
|
+
|
412
|
+
def is_xarray_dataset(obj: object) -> bool:
|
413
|
+
"""True if obj is a Xarray Dataset."""
|
414
|
+
return is_type(obj, _XARRAY_DATASET_TYPE_STR)
|
415
|
+
|
416
|
+
|
417
|
+
def is_xarray_data_array(obj: object) -> bool:
|
418
|
+
"""True if obj is a Xarray DataArray."""
|
419
|
+
return is_type(obj, _XARRAY_DATA_ARRAY_TYPE_STR)
|
420
|
+
|
421
|
+
|
422
|
+
def is_polars_series(obj: object) -> bool:
|
423
|
+
"""True if obj is a Polars Series."""
|
424
|
+
return is_type(obj, _POLARS_SERIES)
|
425
|
+
|
426
|
+
|
427
|
+
def is_polars_lazyframe(obj: object) -> bool:
|
428
|
+
"""True if obj is a Polars Lazyframe."""
|
429
|
+
return is_type(obj, _POLARS_LAZYFRAME)
|
430
|
+
|
431
|
+
|
432
|
+
def is_ray_dataset(obj: object) -> bool:
|
433
|
+
"""True if obj is a Ray Dataset."""
|
434
|
+
return is_type(obj, _RAY_DATASET) or is_type(obj, _RAY_MATERIALIZED_DATASET)
|
435
|
+
|
436
|
+
|
437
|
+
def is_pandas_styler(obj: object) -> TypeGuard[Styler]:
|
438
|
+
"""True if obj is a pandas Styler."""
|
439
|
+
return is_type(obj, _PANDAS_STYLER_TYPE_STR)
|
440
|
+
|
441
|
+
|
442
|
+
def is_dbapi_cursor(obj: object) -> TypeGuard[DBAPICursor]:
|
443
|
+
"""True if obj looks like a DB API 2.0 Cursor.
|
444
|
+
|
445
|
+
https://peps.python.org/pep-0249/
|
446
|
+
"""
|
447
|
+
return isinstance(obj, DBAPICursor)
|
448
|
+
|
449
|
+
|
450
|
+
def is_duckdb_relation(obj: object) -> bool:
|
451
|
+
"""True if obj is a DuckDB relation.
|
452
|
+
|
453
|
+
https://duckdb.org/docs/api/python/relational_api
|
454
|
+
"""
|
455
|
+
|
456
|
+
return is_type(obj, _DUCKDB_RELATION)
|
457
|
+
|
458
|
+
|
459
|
+
def _is_list_of_scalars(data: Iterable[Any]) -> bool:
|
460
|
+
"""Check if the list only contains scalar values."""
|
461
|
+
from pandas.api.types import infer_dtype
|
462
|
+
|
463
|
+
# Overview on all value that are interpreted as scalar:
|
464
|
+
# https://pandas.pydata.org/docs/reference/api/pandas.api.types.is_scalar.html
|
465
|
+
return infer_dtype(data, skipna=True) not in ["mixed", "unknown-array"]
|
466
|
+
|
467
|
+
|
468
|
+
def _iterable_to_list(
|
469
|
+
iterable: Iterable[Any], max_iterations: int | None = None
|
470
|
+
) -> list[Any]:
|
471
|
+
"""Convert an iterable to a list.
|
472
|
+
|
473
|
+
Parameters
|
474
|
+
----------
|
475
|
+
iterable : Iterable
|
476
|
+
The iterable to convert to a list.
|
477
|
+
|
478
|
+
max_iterations : int or None
|
479
|
+
The maximum number of iterations to perform. If None, all iterations are performed.
|
480
|
+
|
481
|
+
Returns
|
482
|
+
-------
|
483
|
+
list
|
484
|
+
The converted list.
|
485
|
+
"""
|
486
|
+
if max_iterations is None:
|
487
|
+
return list(iterable)
|
488
|
+
|
489
|
+
result = []
|
490
|
+
for i, item in enumerate(iterable):
|
491
|
+
if i >= max_iterations:
|
492
|
+
break
|
493
|
+
result.append(item)
|
494
|
+
return result
|
495
|
+
|
496
|
+
|
497
|
+
def _fix_column_naming(data_df: DataFrame) -> DataFrame:
|
498
|
+
"""Rename the first column to "value" if it is not named
|
499
|
+
and if there is only one column in the dataframe.
|
500
|
+
|
501
|
+
The default name of the first column is 0 if it is not named
|
502
|
+
which is not very descriptive.
|
503
|
+
"""
|
504
|
+
|
505
|
+
if len(data_df.columns) == 1 and data_df.columns[0] == 0:
|
506
|
+
# Pandas automatically names the first column with 0 if it is not named.
|
507
|
+
# We rename it to "value" to make it more descriptive if there is only
|
508
|
+
# one column in the dataframe.
|
509
|
+
data_df = data_df.rename(columns={0: "value"})
|
510
|
+
return data_df
|
511
|
+
|
512
|
+
|
513
|
+
def _dict_to_pandas_df(data: dict[Any, Any]) -> DataFrame:
|
514
|
+
"""Convert a key-value dict to a Pandas DataFrame.
|
515
|
+
|
516
|
+
Parameters
|
517
|
+
----------
|
518
|
+
data : dict
|
519
|
+
The dict to convert to a Pandas DataFrame.
|
520
|
+
|
521
|
+
Returns
|
522
|
+
-------
|
523
|
+
pandas.DataFrame
|
524
|
+
The converted Pandas DataFrame.
|
525
|
+
"""
|
526
|
+
import pandas as pd
|
527
|
+
|
528
|
+
return _fix_column_naming(pd.DataFrame.from_dict(data, orient="index"))
|
529
|
+
|
530
|
+
|
531
|
+
def convert_anything_to_pandas_df(
|
532
|
+
data: Any,
|
533
|
+
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
534
|
+
ensure_copy: bool = False,
|
535
|
+
) -> DataFrame:
|
536
|
+
"""Try to convert different formats to a Pandas Dataframe.
|
537
|
+
|
538
|
+
Parameters
|
539
|
+
----------
|
540
|
+
data : dataframe-, array-, or collections-like object
|
541
|
+
The data to convert to a Pandas DataFrame.
|
542
|
+
|
543
|
+
max_unevaluated_rows: int
|
544
|
+
If unevaluated data is detected this func will evaluate it,
|
545
|
+
taking max_unevaluated_rows, defaults to 10k.
|
546
|
+
|
547
|
+
ensure_copy: bool
|
548
|
+
If True, make sure to always return a copy of the data. If False, it depends on
|
549
|
+
the type of the data. For example, a Pandas DataFrame will be returned as-is.
|
550
|
+
|
551
|
+
Returns
|
552
|
+
-------
|
553
|
+
pandas.DataFrame
|
554
|
+
|
555
|
+
"""
|
556
|
+
import array
|
557
|
+
|
558
|
+
import numpy as np
|
559
|
+
import pandas as pd
|
560
|
+
|
561
|
+
if isinstance(data, pd.DataFrame):
|
562
|
+
return data.copy() if ensure_copy else cast(pd.DataFrame, data)
|
563
|
+
|
564
|
+
if isinstance(data, (pd.Series, pd.Index, pd.api.extensions.ExtensionArray)):
|
565
|
+
return pd.DataFrame(data)
|
566
|
+
|
567
|
+
if is_pandas_styler(data):
|
568
|
+
return cast(pd.DataFrame, data.data.copy() if ensure_copy else data.data)
|
569
|
+
|
570
|
+
if isinstance(data, np.ndarray):
|
571
|
+
return (
|
572
|
+
pd.DataFrame([])
|
573
|
+
if len(data.shape) == 0
|
574
|
+
else _fix_column_naming(pd.DataFrame(data))
|
575
|
+
)
|
576
|
+
|
577
|
+
if is_polars_dataframe(data):
|
578
|
+
data = data.clone() if ensure_copy else data
|
579
|
+
return data.to_pandas()
|
580
|
+
|
581
|
+
if is_polars_series(data):
|
582
|
+
data = data.clone() if ensure_copy else data
|
583
|
+
return data.to_pandas().to_frame()
|
584
|
+
|
585
|
+
if is_polars_lazyframe(data):
|
586
|
+
data = data.limit(max_unevaluated_rows).collect().to_pandas()
|
587
|
+
if data.shape[0] == max_unevaluated_rows:
|
588
|
+
_show_data_information(
|
589
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
590
|
+
"rows. Call `collect()` on the dataframe to show more."
|
591
|
+
)
|
592
|
+
return cast(pd.DataFrame, data)
|
593
|
+
|
594
|
+
if is_xarray_dataset(data):
|
595
|
+
if ensure_copy:
|
596
|
+
data = data.copy(deep=True)
|
597
|
+
return data.to_dataframe()
|
598
|
+
|
599
|
+
if is_xarray_data_array(data):
|
600
|
+
if ensure_copy:
|
601
|
+
data = data.copy(deep=True)
|
602
|
+
return data.to_series().to_frame()
|
603
|
+
|
604
|
+
if is_dask_object(data):
|
605
|
+
data = data.head(max_unevaluated_rows, compute=True)
|
606
|
+
|
607
|
+
# Dask returns a Pandas object (DataFrame, Series, Index) when
|
608
|
+
# executing operations like `head`.
|
609
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
610
|
+
data = data.to_frame()
|
611
|
+
|
612
|
+
if data.shape[0] == max_unevaluated_rows:
|
613
|
+
_show_data_information(
|
614
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
615
|
+
"rows. Call `compute()` on the data object to show more."
|
616
|
+
)
|
617
|
+
return cast(pd.DataFrame, data)
|
618
|
+
|
619
|
+
if is_ray_dataset(data):
|
620
|
+
data = data.limit(max_unevaluated_rows).to_pandas()
|
621
|
+
|
622
|
+
if data.shape[0] == max_unevaluated_rows:
|
623
|
+
_show_data_information(
|
624
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
625
|
+
"rows. Call `to_pandas()` on the dataset to show more."
|
626
|
+
)
|
627
|
+
return cast(pd.DataFrame, data)
|
628
|
+
|
629
|
+
if is_modin_data_object(data):
|
630
|
+
data = data.head(max_unevaluated_rows)._to_pandas()
|
631
|
+
|
632
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
633
|
+
data = data.to_frame()
|
634
|
+
|
635
|
+
if data.shape[0] == max_unevaluated_rows:
|
636
|
+
_show_data_information(
|
637
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
638
|
+
"rows. Call `_to_pandas()` on the data object to show more."
|
639
|
+
)
|
640
|
+
return cast(pd.DataFrame, data)
|
641
|
+
|
642
|
+
if is_pyspark_data_object(data):
|
643
|
+
data = data.limit(max_unevaluated_rows).toPandas()
|
644
|
+
if data.shape[0] == max_unevaluated_rows:
|
645
|
+
_show_data_information(
|
646
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
647
|
+
"rows. Call `toPandas()` on the data object to show more."
|
648
|
+
)
|
649
|
+
return cast(pd.DataFrame, data)
|
650
|
+
|
651
|
+
if is_snowpandas_data_object(data):
|
652
|
+
data = data[:max_unevaluated_rows].to_pandas()
|
653
|
+
|
654
|
+
if isinstance(data, (pd.Series, pd.Index)):
|
655
|
+
data = data.to_frame()
|
656
|
+
|
657
|
+
if data.shape[0] == max_unevaluated_rows:
|
658
|
+
_show_data_information(
|
659
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
660
|
+
"rows. Call `to_pandas()` on the data object to show more."
|
661
|
+
)
|
662
|
+
return cast(pd.DataFrame, data)
|
663
|
+
|
664
|
+
if is_snowpark_data_object(data):
|
665
|
+
data = data.limit(max_unevaluated_rows).to_pandas()
|
666
|
+
if data.shape[0] == max_unevaluated_rows:
|
667
|
+
_show_data_information(
|
668
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
669
|
+
"rows. Call `to_pandas()` on the data object to show more."
|
670
|
+
)
|
671
|
+
return cast(pd.DataFrame, data)
|
672
|
+
|
673
|
+
if is_duckdb_relation(data):
|
674
|
+
data = data.limit(max_unevaluated_rows).df()
|
675
|
+
if data.shape[0] == max_unevaluated_rows:
|
676
|
+
_show_data_information(
|
677
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
678
|
+
"rows. Call `df()` on the relation to show more."
|
679
|
+
)
|
680
|
+
return data
|
681
|
+
|
682
|
+
if is_dbapi_cursor(data):
|
683
|
+
# Based on the specification, the first item in the description is the
|
684
|
+
# column name (if available)
|
685
|
+
columns = (
|
686
|
+
[d[0] if d else "" for d in data.description] if data.description else None
|
687
|
+
)
|
688
|
+
data = pd.DataFrame(data.fetchmany(max_unevaluated_rows), columns=columns)
|
689
|
+
if data.shape[0] == max_unevaluated_rows:
|
690
|
+
_show_data_information(
|
691
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
692
|
+
"rows. Call `fetchall()` on the Cursor to show more."
|
693
|
+
)
|
694
|
+
return data
|
695
|
+
|
696
|
+
if is_snowpark_row_list(data):
|
697
|
+
return pd.DataFrame([row.as_dict() for row in data])
|
698
|
+
|
699
|
+
if has_callable_attr(data, "to_pandas"):
|
700
|
+
return pd.DataFrame(data.to_pandas())
|
701
|
+
|
702
|
+
# Check for dataframe interchange protocol
|
703
|
+
# Only available in pandas >= 1.5.0
|
704
|
+
# https://pandas.pydata.org/docs/whatsnew/v1.5.0.html#dataframe-interchange-protocol-implementation
|
705
|
+
if (
|
706
|
+
has_callable_attr(data, "__dataframe__")
|
707
|
+
and is_pandas_version_less_than("1.5.0") is False
|
708
|
+
):
|
709
|
+
data_df = pd.api.interchange.from_dataframe(data)
|
710
|
+
return data_df.copy() if ensure_copy else data_df
|
711
|
+
|
712
|
+
# Support for generator functions
|
713
|
+
if inspect.isgeneratorfunction(data):
|
714
|
+
data = _fix_column_naming(
|
715
|
+
pd.DataFrame(_iterable_to_list(data(), max_iterations=max_unevaluated_rows))
|
716
|
+
)
|
717
|
+
|
718
|
+
if data.shape[0] == max_unevaluated_rows:
|
719
|
+
_show_data_information(
|
720
|
+
f"⚠️ Showing only {string_util.simplify_number(max_unevaluated_rows)} "
|
721
|
+
"rows. Convert the data to a list to show more."
|
722
|
+
)
|
723
|
+
return data
|
724
|
+
|
725
|
+
if isinstance(data, EnumMeta):
|
726
|
+
# Support for enum classes
|
727
|
+
return _fix_column_naming(pd.DataFrame([c.value for c in data])) # type: ignore
|
728
|
+
|
729
|
+
# Support for some list like objects
|
730
|
+
if isinstance(data, (deque, map, array.ArrayType, UserList)):
|
731
|
+
return _fix_column_naming(pd.DataFrame(list(data)))
|
732
|
+
|
733
|
+
# Support for Streamlit's custom dict-like objects
|
734
|
+
if is_custom_dict(data):
|
735
|
+
return _dict_to_pandas_df(data.to_dict())
|
736
|
+
|
737
|
+
# Support for named tuples
|
738
|
+
if is_namedtuple(data):
|
739
|
+
return _dict_to_pandas_df(data._asdict())
|
740
|
+
|
741
|
+
# Support for dataclass instances
|
742
|
+
if is_dataclass_instance(data):
|
743
|
+
return _dict_to_pandas_df(dataclasses.asdict(data))
|
744
|
+
|
745
|
+
# Support for dict-like objects
|
746
|
+
if isinstance(data, (ChainMap, MappingProxyType, UserDict)) or is_pydantic_model(
|
747
|
+
data
|
748
|
+
):
|
749
|
+
return _dict_to_pandas_df(dict(data))
|
750
|
+
|
751
|
+
# Try to convert to pandas.DataFrame. This will raise an error is df is not
|
752
|
+
# compatible with the pandas.DataFrame constructor.
|
753
|
+
try:
|
754
|
+
return _fix_column_naming(pd.DataFrame(data))
|
755
|
+
except ValueError as ex:
|
756
|
+
if isinstance(data, dict):
|
757
|
+
with contextlib.suppress(ValueError):
|
758
|
+
# Try to use index orient as back-up to support key-value dicts
|
759
|
+
return _dict_to_pandas_df(data)
|
760
|
+
raise errors.StreamlitAPIException(
|
761
|
+
f"""
|
762
|
+
Unable to convert object of type `{type(data)}` to `pandas.DataFrame`.
|
763
|
+
Offending object:
|
764
|
+
```py
|
765
|
+
{data}
|
766
|
+
```"""
|
767
|
+
) from ex
|
768
|
+
|
769
|
+
|
770
|
+
def convert_arrow_table_to_arrow_bytes(table: pa.Table) -> bytes:
|
771
|
+
"""Serialize pyarrow.Table to Arrow IPC bytes.
|
772
|
+
|
773
|
+
Parameters
|
774
|
+
----------
|
775
|
+
table : pyarrow.Table
|
776
|
+
A table to convert.
|
777
|
+
|
778
|
+
Returns
|
779
|
+
-------
|
780
|
+
bytes
|
781
|
+
The serialized Arrow IPC bytes.
|
782
|
+
"""
|
783
|
+
try:
|
784
|
+
table = _maybe_truncate_table(table)
|
785
|
+
except RecursionError as err:
|
786
|
+
# This is a very unlikely edge case, but we want to make sure that
|
787
|
+
# it doesn't lead to unexpected behavior.
|
788
|
+
# If there is a recursion error, we just return the table as-is
|
789
|
+
# which will lead to the normal message limit exceed error.
|
790
|
+
_LOGGER.warning(
|
791
|
+
"Recursion error while truncating Arrow table. This is not "
|
792
|
+
"supposed to happen.",
|
793
|
+
exc_info=err,
|
794
|
+
)
|
795
|
+
|
796
|
+
import pyarrow as pa
|
797
|
+
|
798
|
+
# Convert table to bytes
|
799
|
+
sink = pa.BufferOutputStream()
|
800
|
+
writer = pa.RecordBatchStreamWriter(sink, table.schema)
|
801
|
+
writer.write_table(table)
|
802
|
+
writer.close()
|
803
|
+
return cast(bytes, sink.getvalue().to_pybytes())
|
804
|
+
|
805
|
+
|
806
|
+
def convert_pandas_df_to_arrow_bytes(df: DataFrame) -> bytes:
|
807
|
+
"""Serialize pandas.DataFrame to Arrow IPC bytes.
|
808
|
+
|
809
|
+
Parameters
|
810
|
+
----------
|
811
|
+
df : pandas.DataFrame
|
812
|
+
A dataframe to convert.
|
813
|
+
|
814
|
+
Returns
|
815
|
+
-------
|
816
|
+
bytes
|
817
|
+
The serialized Arrow IPC bytes.
|
818
|
+
"""
|
819
|
+
import pyarrow as pa
|
820
|
+
|
821
|
+
try:
|
822
|
+
table = pa.Table.from_pandas(df)
|
823
|
+
except (pa.ArrowTypeError, pa.ArrowInvalid, pa.ArrowNotImplementedError) as ex:
|
824
|
+
_LOGGER.info(
|
825
|
+
"Serialization of dataframe to Arrow table was unsuccessful. "
|
826
|
+
"Applying automatic fixes for column types to make the dataframe "
|
827
|
+
"Arrow-compatible.",
|
828
|
+
exc_info=ex,
|
829
|
+
)
|
830
|
+
df = fix_arrow_incompatible_column_types(df)
|
831
|
+
table = pa.Table.from_pandas(df)
|
832
|
+
return convert_arrow_table_to_arrow_bytes(table)
|
833
|
+
|
834
|
+
|
835
|
+
def convert_arrow_bytes_to_pandas_df(source: bytes) -> DataFrame:
|
836
|
+
"""Convert Arrow bytes (IPC format) to pandas.DataFrame.
|
837
|
+
|
838
|
+
Using this function in production needs to make sure that
|
839
|
+
the pyarrow version >= 14.0.1, because of a critical
|
840
|
+
security vulnerability in pyarrow < 14.0.1.
|
841
|
+
|
842
|
+
Parameters
|
843
|
+
----------
|
844
|
+
source : bytes
|
845
|
+
A bytes object to convert.
|
846
|
+
|
847
|
+
Returns
|
848
|
+
-------
|
849
|
+
pandas.DataFrame
|
850
|
+
The converted dataframe.
|
851
|
+
"""
|
852
|
+
import pyarrow as pa
|
853
|
+
|
854
|
+
reader = pa.RecordBatchStreamReader(source)
|
855
|
+
return reader.read_pandas()
|
856
|
+
|
857
|
+
|
858
|
+
def _show_data_information(msg: str) -> None:
|
859
|
+
"""Show a message to the user with important information
|
860
|
+
about the processed dataset."""
|
861
|
+
from streamlit.delta_generator_singletons import get_dg_singleton_instance
|
862
|
+
|
863
|
+
get_dg_singleton_instance().main_dg.caption(msg)
|
864
|
+
|
865
|
+
|
866
|
+
def convert_anything_to_arrow_bytes(
|
867
|
+
data: Any,
|
868
|
+
max_unevaluated_rows: int = _MAX_UNEVALUATED_DF_ROWS,
|
869
|
+
) -> bytes:
|
870
|
+
"""Try to convert different formats to Arrow IPC format (bytes).
|
871
|
+
|
872
|
+
This method tries to directly convert the input data to Arrow bytes
|
873
|
+
for some supported formats, but falls back to conversion to a Pandas
|
874
|
+
DataFrame and then to Arrow bytes.
|
875
|
+
|
876
|
+
Parameters
|
877
|
+
----------
|
878
|
+
data : dataframe-, array-, or collections-like object
|
879
|
+
The data to convert to Arrow bytes.
|
880
|
+
|
881
|
+
max_unevaluated_rows: int
|
882
|
+
If unevaluated data is detected this func will evaluate it,
|
883
|
+
taking max_unevaluated_rows, defaults to 10k.
|
884
|
+
|
885
|
+
Returns
|
886
|
+
-------
|
887
|
+
bytes
|
888
|
+
The serialized Arrow IPC bytes.
|
889
|
+
"""
|
890
|
+
|
891
|
+
import pyarrow as pa
|
892
|
+
|
893
|
+
if isinstance(data, pa.Table):
|
894
|
+
return convert_arrow_table_to_arrow_bytes(data)
|
895
|
+
|
896
|
+
# TODO(lukasmasuch): Add direct conversion to Arrow for supported formats here
|
897
|
+
|
898
|
+
# Fallback: try to convert to pandas DataFrame
|
899
|
+
# and then to Arrow bytes.
|
900
|
+
df = convert_anything_to_pandas_df(data, max_unevaluated_rows)
|
901
|
+
return convert_pandas_df_to_arrow_bytes(df)
|
902
|
+
|
903
|
+
|
904
|
+
def convert_anything_to_list(obj: OptionSequence[V_co]) -> list[V_co]:
|
905
|
+
"""Try to convert different formats to a list.
|
906
|
+
|
907
|
+
If the input is a dataframe-like object, we just select the first
|
908
|
+
column to iterate over. Non sequence-like objects and scalar types,
|
909
|
+
will just be wrapped into a list.
|
910
|
+
|
911
|
+
Parameters
|
912
|
+
----------
|
913
|
+
|
914
|
+
obj : dataframe-, array-, or collections-like object
|
915
|
+
The object to convert to a list.
|
916
|
+
|
917
|
+
Returns
|
918
|
+
-------
|
919
|
+
list
|
920
|
+
The converted list.
|
921
|
+
"""
|
922
|
+
if obj is None:
|
923
|
+
return [] # type: ignore
|
924
|
+
|
925
|
+
if isinstance(obj, (str, int, float, bool)):
|
926
|
+
# Wrap basic objects into a list
|
927
|
+
return [obj]
|
928
|
+
|
929
|
+
if isinstance(obj, EnumMeta):
|
930
|
+
# Support for enum classes. For string enums, we return the string value
|
931
|
+
# of the enum members. For other enums, we just return the enum member.
|
932
|
+
return [member.value if isinstance(member, str) else member for member in obj] # type: ignore
|
933
|
+
|
934
|
+
if isinstance(obj, Mapping):
|
935
|
+
return list(obj.keys())
|
936
|
+
|
937
|
+
if is_list_like(obj) and not is_snowpark_row_list(obj):
|
938
|
+
# This also ensures that the sequence is copied to prevent
|
939
|
+
# potential mutations to the original object.
|
940
|
+
return list(obj)
|
941
|
+
|
942
|
+
# Fallback to our DataFrame conversion logic:
|
943
|
+
try:
|
944
|
+
# We use ensure_copy here because the return value of this function is
|
945
|
+
# saved in a widget serde class instance to be used in later script runs,
|
946
|
+
# and we don't want mutations to the options object passed to a
|
947
|
+
# widget affect the widget.
|
948
|
+
# (See https://github.com/streamlit/streamlit/issues/7534)
|
949
|
+
data_df = convert_anything_to_pandas_df(obj, ensure_copy=True)
|
950
|
+
# Return first column as a list:
|
951
|
+
return (
|
952
|
+
[]
|
953
|
+
if data_df.empty
|
954
|
+
else cast(list[V_co], list(data_df.iloc[:, 0].to_list()))
|
955
|
+
)
|
956
|
+
except errors.StreamlitAPIException:
|
957
|
+
# Wrap the object into a list
|
958
|
+
return [obj] # type: ignore
|
959
|
+
|
960
|
+
|
961
|
+
def _maybe_truncate_table(
|
962
|
+
table: pa.Table, truncated_rows: int | None = None
|
963
|
+
) -> pa.Table:
|
964
|
+
"""Experimental feature to automatically truncate tables that
|
965
|
+
are larger than the maximum allowed message size. It needs to be enabled
|
966
|
+
via the server.enableArrowTruncation config option.
|
967
|
+
|
968
|
+
Parameters
|
969
|
+
----------
|
970
|
+
table : pyarrow.Table
|
971
|
+
A table to truncate.
|
972
|
+
|
973
|
+
truncated_rows : int or None
|
974
|
+
The number of rows that have been truncated so far. This is used by
|
975
|
+
the recursion logic to keep track of the total number of truncated
|
976
|
+
rows.
|
977
|
+
|
978
|
+
"""
|
979
|
+
|
980
|
+
if config.get_option("server.enableArrowTruncation"):
|
981
|
+
# This is an optimization problem: We don't know at what row
|
982
|
+
# the perfect cut-off is to comply with the max size. But we want to figure
|
983
|
+
# it out in as few iterations as possible. We almost always will cut out
|
984
|
+
# more than required to keep the iterations low.
|
985
|
+
|
986
|
+
# The maximum size allowed for protobuf messages in bytes:
|
987
|
+
max_message_size = int(config.get_option("server.maxMessageSize") * 1e6)
|
988
|
+
# We add 1 MB for other overhead related to the protobuf message.
|
989
|
+
# This is a very conservative estimate, but it should be good enough.
|
990
|
+
table_size = int(table.nbytes + 1 * 1e6)
|
991
|
+
table_rows = table.num_rows
|
992
|
+
|
993
|
+
if table_rows > 1 and table_size > max_message_size:
|
994
|
+
# targeted rows == the number of rows the table should be truncated to.
|
995
|
+
# Calculate an approximation of how many rows we need to truncate to.
|
996
|
+
targeted_rows = math.ceil(table_rows * (max_message_size / table_size))
|
997
|
+
# Make sure to cut out at least a couple of rows to avoid running
|
998
|
+
# this logic too often since it is quite inefficient and could lead
|
999
|
+
# to infinity recursions without these precautions.
|
1000
|
+
targeted_rows = math.floor(
|
1001
|
+
max(
|
1002
|
+
min(
|
1003
|
+
# Cut out:
|
1004
|
+
# an additional 5% of the estimated num rows to cut out:
|
1005
|
+
targeted_rows - math.floor((table_rows - targeted_rows) * 0.05),
|
1006
|
+
# at least 1% of table size:
|
1007
|
+
table_rows - (table_rows * 0.01),
|
1008
|
+
# at least 5 rows:
|
1009
|
+
table_rows - 5,
|
1010
|
+
),
|
1011
|
+
1, # but it should always have at least 1 row
|
1012
|
+
)
|
1013
|
+
)
|
1014
|
+
sliced_table = table.slice(0, targeted_rows)
|
1015
|
+
return _maybe_truncate_table(
|
1016
|
+
sliced_table, (truncated_rows or 0) + (table_rows - targeted_rows)
|
1017
|
+
)
|
1018
|
+
|
1019
|
+
if truncated_rows:
|
1020
|
+
displayed_rows = string_util.simplify_number(table.num_rows)
|
1021
|
+
total_rows = string_util.simplify_number(table.num_rows + truncated_rows)
|
1022
|
+
|
1023
|
+
if displayed_rows == total_rows:
|
1024
|
+
# If the simplified numbers are the same,
|
1025
|
+
# we just display the exact numbers.
|
1026
|
+
displayed_rows = str(table.num_rows)
|
1027
|
+
total_rows = str(table.num_rows + truncated_rows)
|
1028
|
+
_show_data_information(
|
1029
|
+
f"⚠️ Showing {displayed_rows} out of {total_rows} "
|
1030
|
+
"rows due to data size limitations."
|
1031
|
+
)
|
1032
|
+
|
1033
|
+
return table
|
1034
|
+
|
1035
|
+
|
1036
|
+
def is_colum_type_arrow_incompatible(column: Series[Any] | Index) -> bool:
|
1037
|
+
"""Return True if the column type is known to cause issues during
|
1038
|
+
Arrow conversion."""
|
1039
|
+
from pandas.api.types import infer_dtype, is_dict_like, is_list_like
|
1040
|
+
|
1041
|
+
if column.dtype.kind in [
|
1042
|
+
"c", # complex64, complex128, complex256
|
1043
|
+
]:
|
1044
|
+
return True
|
1045
|
+
|
1046
|
+
if str(column.dtype) in {
|
1047
|
+
# These period types are not yet supported by our frontend impl.
|
1048
|
+
# See comments in Quiver.ts for more details.
|
1049
|
+
"period[B]",
|
1050
|
+
"period[N]",
|
1051
|
+
"period[ns]",
|
1052
|
+
"period[U]",
|
1053
|
+
"period[us]",
|
1054
|
+
"geometry",
|
1055
|
+
}:
|
1056
|
+
return True
|
1057
|
+
|
1058
|
+
if column.dtype == "object":
|
1059
|
+
# The dtype of mixed type columns is always object, the actual type of the column
|
1060
|
+
# values can be determined via the infer_dtype function:
|
1061
|
+
# https://pandas.pydata.org/docs/reference/api/pandas.api.types.infer_dtype.html
|
1062
|
+
inferred_type = infer_dtype(column, skipna=True)
|
1063
|
+
|
1064
|
+
if inferred_type in [
|
1065
|
+
"mixed-integer",
|
1066
|
+
"complex",
|
1067
|
+
]:
|
1068
|
+
return True
|
1069
|
+
elif inferred_type == "mixed":
|
1070
|
+
# This includes most of the more complex/custom types (objects, dicts,
|
1071
|
+
# lists, ...)
|
1072
|
+
if len(column) == 0 or not hasattr(column, "iloc"):
|
1073
|
+
# The column seems to be invalid, so we assume it is incompatible.
|
1074
|
+
# But this would most likely never happen since empty columns
|
1075
|
+
# cannot be mixed.
|
1076
|
+
return True
|
1077
|
+
|
1078
|
+
# Get the first value to check if it is a supported list-like type.
|
1079
|
+
first_value = column.iloc[0]
|
1080
|
+
|
1081
|
+
if (
|
1082
|
+
not is_list_like(first_value)
|
1083
|
+
# dicts are list-like, but have issues in Arrow JS (see comments in
|
1084
|
+
# Quiver.ts)
|
1085
|
+
or is_dict_like(first_value)
|
1086
|
+
# Frozensets are list-like, but are not compatible with pyarrow.
|
1087
|
+
or isinstance(first_value, frozenset)
|
1088
|
+
):
|
1089
|
+
# This seems to be an incompatible list-like type
|
1090
|
+
return True
|
1091
|
+
return False
|
1092
|
+
# We did not detect an incompatible type, so we assume it is compatible:
|
1093
|
+
return False
|
1094
|
+
|
1095
|
+
|
1096
|
+
def fix_arrow_incompatible_column_types(
|
1097
|
+
df: DataFrame, selected_columns: list[str] | None = None
|
1098
|
+
) -> DataFrame:
|
1099
|
+
"""Fix column types that are not supported by Arrow table.
|
1100
|
+
|
1101
|
+
This includes mixed types (e.g. mix of integers and strings)
|
1102
|
+
as well as complex numbers (complex128 type). These types will cause
|
1103
|
+
errors during conversion of the dataframe to an Arrow table.
|
1104
|
+
It is fixed by converting all values of the column to strings
|
1105
|
+
This is sufficient for displaying the data on the frontend.
|
1106
|
+
|
1107
|
+
Parameters
|
1108
|
+
----------
|
1109
|
+
df : pandas.DataFrame
|
1110
|
+
A dataframe to fix.
|
1111
|
+
|
1112
|
+
selected_columns: List[str] or None
|
1113
|
+
A list of columns to fix. If None, all columns are evaluated.
|
1114
|
+
|
1115
|
+
Returns
|
1116
|
+
-------
|
1117
|
+
The fixed dataframe.
|
1118
|
+
"""
|
1119
|
+
import pandas as pd
|
1120
|
+
|
1121
|
+
# Make a copy, but only initialize if necessary to preserve memory.
|
1122
|
+
df_copy: DataFrame | None = None
|
1123
|
+
for col in selected_columns or df.columns:
|
1124
|
+
if is_colum_type_arrow_incompatible(df[col]):
|
1125
|
+
if df_copy is None:
|
1126
|
+
df_copy = df.copy()
|
1127
|
+
df_copy[col] = df[col].astype("string")
|
1128
|
+
|
1129
|
+
# The index can also contain mixed types
|
1130
|
+
# causing Arrow issues during conversion.
|
1131
|
+
# Skipping multi-indices since they won't return
|
1132
|
+
# the correct value from infer_dtype
|
1133
|
+
if not selected_columns and (
|
1134
|
+
not isinstance(
|
1135
|
+
df.index,
|
1136
|
+
pd.MultiIndex,
|
1137
|
+
)
|
1138
|
+
and is_colum_type_arrow_incompatible(df.index)
|
1139
|
+
):
|
1140
|
+
if df_copy is None:
|
1141
|
+
df_copy = df.copy()
|
1142
|
+
df_copy.index = df.index.astype("string")
|
1143
|
+
return df_copy if df_copy is not None else df
|
1144
|
+
|
1145
|
+
|
1146
|
+
def determine_data_format(input_data: Any) -> DataFormat:
|
1147
|
+
"""Determine the data format of the input data.
|
1148
|
+
|
1149
|
+
Parameters
|
1150
|
+
----------
|
1151
|
+
input_data : Any
|
1152
|
+
The input data to determine the data format of.
|
1153
|
+
|
1154
|
+
Returns
|
1155
|
+
-------
|
1156
|
+
DataFormat
|
1157
|
+
The data format of the input data.
|
1158
|
+
"""
|
1159
|
+
|
1160
|
+
import numpy as np
|
1161
|
+
import pandas as pd
|
1162
|
+
import pyarrow as pa
|
1163
|
+
|
1164
|
+
if input_data is None:
|
1165
|
+
return DataFormat.EMPTY
|
1166
|
+
elif isinstance(input_data, pd.DataFrame):
|
1167
|
+
return DataFormat.PANDAS_DATAFRAME
|
1168
|
+
elif isinstance(input_data, np.ndarray):
|
1169
|
+
if len(cast(NumpyShape, input_data.shape)) == 1:
|
1170
|
+
# For technical reasons, we need to distinguish one
|
1171
|
+
# one-dimensional numpy array from multidimensional ones.
|
1172
|
+
return DataFormat.NUMPY_LIST
|
1173
|
+
return DataFormat.NUMPY_MATRIX
|
1174
|
+
elif isinstance(input_data, pa.Table):
|
1175
|
+
return DataFormat.PYARROW_TABLE
|
1176
|
+
elif isinstance(input_data, pa.Array):
|
1177
|
+
return DataFormat.PYARROW_ARRAY
|
1178
|
+
elif isinstance(input_data, pd.Series):
|
1179
|
+
return DataFormat.PANDAS_SERIES
|
1180
|
+
elif isinstance(input_data, pd.Index):
|
1181
|
+
return DataFormat.PANDAS_INDEX
|
1182
|
+
elif is_pandas_styler(input_data):
|
1183
|
+
return DataFormat.PANDAS_STYLER
|
1184
|
+
elif isinstance(input_data, pd.api.extensions.ExtensionArray):
|
1185
|
+
return DataFormat.PANDAS_ARRAY
|
1186
|
+
elif is_polars_series(input_data):
|
1187
|
+
return DataFormat.POLARS_SERIES
|
1188
|
+
elif is_polars_dataframe(input_data):
|
1189
|
+
return DataFormat.POLARS_DATAFRAME
|
1190
|
+
elif is_polars_lazyframe(input_data):
|
1191
|
+
return DataFormat.POLARS_LAZYFRAME
|
1192
|
+
elif is_modin_data_object(input_data):
|
1193
|
+
return DataFormat.MODIN_OBJECT
|
1194
|
+
elif is_snowpandas_data_object(input_data):
|
1195
|
+
return DataFormat.SNOWPANDAS_OBJECT
|
1196
|
+
elif is_pyspark_data_object(input_data):
|
1197
|
+
return DataFormat.PYSPARK_OBJECT
|
1198
|
+
elif is_xarray_dataset(input_data):
|
1199
|
+
return DataFormat.XARRAY_DATASET
|
1200
|
+
elif is_xarray_data_array(input_data):
|
1201
|
+
return DataFormat.XARRAY_DATA_ARRAY
|
1202
|
+
elif is_ray_dataset(input_data):
|
1203
|
+
return DataFormat.RAY_DATASET
|
1204
|
+
elif is_dask_object(input_data):
|
1205
|
+
return DataFormat.DASK_OBJECT
|
1206
|
+
elif is_snowpark_data_object(input_data) or is_snowpark_row_list(input_data):
|
1207
|
+
return DataFormat.SNOWPARK_OBJECT
|
1208
|
+
elif is_duckdb_relation(input_data):
|
1209
|
+
return DataFormat.DUCKDB_RELATION
|
1210
|
+
elif is_dbapi_cursor(input_data):
|
1211
|
+
return DataFormat.DBAPI_CURSOR
|
1212
|
+
elif (
|
1213
|
+
isinstance(
|
1214
|
+
input_data,
|
1215
|
+
(ChainMap, UserDict, MappingProxyType),
|
1216
|
+
)
|
1217
|
+
or is_dataclass_instance(input_data)
|
1218
|
+
or is_namedtuple(input_data)
|
1219
|
+
or is_custom_dict(input_data)
|
1220
|
+
or is_pydantic_model(input_data)
|
1221
|
+
):
|
1222
|
+
return DataFormat.KEY_VALUE_DICT
|
1223
|
+
elif isinstance(input_data, (ItemsView, enumerate)):
|
1224
|
+
return DataFormat.LIST_OF_ROWS
|
1225
|
+
elif isinstance(input_data, (list, tuple, set, frozenset)):
|
1226
|
+
if _is_list_of_scalars(input_data):
|
1227
|
+
# -> one-dimensional data structure
|
1228
|
+
if isinstance(input_data, tuple):
|
1229
|
+
return DataFormat.TUPLE_OF_VALUES
|
1230
|
+
if isinstance(input_data, (set, frozenset)):
|
1231
|
+
return DataFormat.SET_OF_VALUES
|
1232
|
+
return DataFormat.LIST_OF_VALUES
|
1233
|
+
else:
|
1234
|
+
# -> Multi-dimensional data structure
|
1235
|
+
# This should always contain at least one element,
|
1236
|
+
# otherwise the values type from infer_dtype would have been empty
|
1237
|
+
first_element = next(iter(input_data))
|
1238
|
+
if isinstance(first_element, dict):
|
1239
|
+
return DataFormat.LIST_OF_RECORDS
|
1240
|
+
if isinstance(first_element, (list, tuple, set, frozenset)):
|
1241
|
+
return DataFormat.LIST_OF_ROWS
|
1242
|
+
elif isinstance(input_data, (dict, Mapping)):
|
1243
|
+
if not input_data:
|
1244
|
+
return DataFormat.KEY_VALUE_DICT
|
1245
|
+
if len(input_data) > 0:
|
1246
|
+
first_value = next(iter(input_data.values()))
|
1247
|
+
# In the future, we could potentially also support tight & split formats
|
1248
|
+
if isinstance(first_value, dict):
|
1249
|
+
return DataFormat.COLUMN_INDEX_MAPPING
|
1250
|
+
if isinstance(first_value, (list, tuple)):
|
1251
|
+
return DataFormat.COLUMN_VALUE_MAPPING
|
1252
|
+
if isinstance(first_value, pd.Series):
|
1253
|
+
return DataFormat.COLUMN_SERIES_MAPPING
|
1254
|
+
# Use key-value dict as fallback. However, if the values of the dict
|
1255
|
+
# contains mixed types, it will become non-editable in the frontend.
|
1256
|
+
return DataFormat.KEY_VALUE_DICT
|
1257
|
+
elif is_list_like(input_data):
|
1258
|
+
return DataFormat.LIST_OF_VALUES
|
1259
|
+
|
1260
|
+
return DataFormat.UNKNOWN
|
1261
|
+
|
1262
|
+
|
1263
|
+
def _unify_missing_values(df: DataFrame) -> DataFrame:
|
1264
|
+
"""Unify all missing values in a DataFrame to None.
|
1265
|
+
|
1266
|
+
Pandas uses a variety of values to represent missing values, including np.nan,
|
1267
|
+
NaT, None, and pd.NA. This function replaces all of these values with None,
|
1268
|
+
which is the only missing value type that is supported by all data
|
1269
|
+
"""
|
1270
|
+
import numpy as np
|
1271
|
+
import pandas as pd
|
1272
|
+
|
1273
|
+
# Replace all recognized nulls (np.nan, pd.NA, NaT) with None
|
1274
|
+
# then infer objects without creating a separate copy:
|
1275
|
+
# For performance reasons, we could use copy=False here.
|
1276
|
+
# However, this is only available in pandas >=2.
|
1277
|
+
return df.replace([pd.NA, pd.NaT, np.nan], None).infer_objects()
|
1278
|
+
|
1279
|
+
|
1280
|
+
def _pandas_df_to_series(df: DataFrame) -> Series[Any]:
|
1281
|
+
"""Convert a Pandas DataFrame to a Pandas Series by selecting the first column.
|
1282
|
+
|
1283
|
+
Raises
|
1284
|
+
------
|
1285
|
+
ValueError
|
1286
|
+
If the DataFrame has more than one column.
|
1287
|
+
"""
|
1288
|
+
# Select first column in dataframe and create a new series based on the values
|
1289
|
+
if len(df.columns) != 1:
|
1290
|
+
raise ValueError(
|
1291
|
+
f"DataFrame is expected to have a single column but has {len(df.columns)}."
|
1292
|
+
)
|
1293
|
+
return df[df.columns[0]]
|
1294
|
+
|
1295
|
+
|
1296
|
+
def convert_pandas_df_to_data_format(
|
1297
|
+
df: DataFrame, data_format: DataFormat
|
1298
|
+
) -> (
|
1299
|
+
DataFrame
|
1300
|
+
| Series[Any]
|
1301
|
+
| pa.Table
|
1302
|
+
| pa.Array
|
1303
|
+
| np.ndarray[Any, np.dtype[Any]]
|
1304
|
+
| tuple[Any]
|
1305
|
+
| list[Any]
|
1306
|
+
| set[Any]
|
1307
|
+
| dict[str, Any]
|
1308
|
+
):
|
1309
|
+
"""Convert a Pandas DataFrame to the specified data format.
|
1310
|
+
|
1311
|
+
Parameters
|
1312
|
+
----------
|
1313
|
+
df : pd.DataFrame
|
1314
|
+
The dataframe to convert.
|
1315
|
+
|
1316
|
+
data_format : DataFormat
|
1317
|
+
The data format to convert to.
|
1318
|
+
|
1319
|
+
Returns
|
1320
|
+
-------
|
1321
|
+
pd.DataFrame, pd.Series, pyarrow.Table, np.ndarray, xarray.Dataset, xarray.DataArray, polars.Dataframe, polars.Series, list, set, tuple, or dict.
|
1322
|
+
The converted dataframe.
|
1323
|
+
"""
|
1324
|
+
|
1325
|
+
if data_format in {
|
1326
|
+
DataFormat.EMPTY,
|
1327
|
+
DataFormat.DASK_OBJECT,
|
1328
|
+
DataFormat.DBAPI_CURSOR,
|
1329
|
+
DataFormat.DUCKDB_RELATION,
|
1330
|
+
DataFormat.MODIN_OBJECT,
|
1331
|
+
DataFormat.PANDAS_ARRAY,
|
1332
|
+
DataFormat.PANDAS_DATAFRAME,
|
1333
|
+
DataFormat.PANDAS_INDEX,
|
1334
|
+
DataFormat.PANDAS_STYLER,
|
1335
|
+
DataFormat.PYSPARK_OBJECT,
|
1336
|
+
DataFormat.RAY_DATASET,
|
1337
|
+
DataFormat.SNOWPANDAS_OBJECT,
|
1338
|
+
DataFormat.SNOWPARK_OBJECT,
|
1339
|
+
}:
|
1340
|
+
return df
|
1341
|
+
elif data_format == DataFormat.NUMPY_LIST:
|
1342
|
+
import numpy as np
|
1343
|
+
|
1344
|
+
# It's a 1-dimensional array, so we only return
|
1345
|
+
# the first column as numpy array
|
1346
|
+
# Calling to_numpy() on the full DataFrame would result in:
|
1347
|
+
# [[1], [2]] instead of [1, 2]
|
1348
|
+
return np.ndarray(0) if df.empty else df.iloc[:, 0].to_numpy()
|
1349
|
+
elif data_format == DataFormat.NUMPY_MATRIX:
|
1350
|
+
import numpy as np
|
1351
|
+
|
1352
|
+
return np.ndarray(0) if df.empty else df.to_numpy()
|
1353
|
+
elif data_format == DataFormat.PYARROW_TABLE:
|
1354
|
+
import pyarrow as pa
|
1355
|
+
|
1356
|
+
return pa.Table.from_pandas(df)
|
1357
|
+
elif data_format == DataFormat.PYARROW_ARRAY:
|
1358
|
+
import pyarrow as pa
|
1359
|
+
|
1360
|
+
return pa.Array.from_pandas(_pandas_df_to_series(df))
|
1361
|
+
elif data_format == DataFormat.PANDAS_SERIES:
|
1362
|
+
return _pandas_df_to_series(df)
|
1363
|
+
elif data_format in {DataFormat.POLARS_DATAFRAME, DataFormat.POLARS_LAZYFRAME}:
|
1364
|
+
import polars as pl # type: ignore[import-not-found]
|
1365
|
+
|
1366
|
+
return pl.from_pandas(df)
|
1367
|
+
elif data_format == DataFormat.POLARS_SERIES:
|
1368
|
+
import polars as pl
|
1369
|
+
|
1370
|
+
return pl.from_pandas(_pandas_df_to_series(df))
|
1371
|
+
elif data_format == DataFormat.XARRAY_DATASET:
|
1372
|
+
import xarray as xr # type: ignore[import-not-found]
|
1373
|
+
|
1374
|
+
return xr.Dataset.from_dataframe(df)
|
1375
|
+
elif data_format == DataFormat.XARRAY_DATA_ARRAY:
|
1376
|
+
import xarray as xr
|
1377
|
+
|
1378
|
+
return xr.DataArray.from_series(_pandas_df_to_series(df))
|
1379
|
+
elif data_format == DataFormat.LIST_OF_RECORDS:
|
1380
|
+
return _unify_missing_values(df).to_dict(orient="records")
|
1381
|
+
elif data_format == DataFormat.LIST_OF_ROWS:
|
1382
|
+
# to_numpy converts the dataframe to a list of rows
|
1383
|
+
return _unify_missing_values(df).to_numpy().tolist()
|
1384
|
+
elif data_format == DataFormat.COLUMN_INDEX_MAPPING:
|
1385
|
+
return _unify_missing_values(df).to_dict(orient="dict")
|
1386
|
+
elif data_format == DataFormat.COLUMN_VALUE_MAPPING:
|
1387
|
+
return _unify_missing_values(df).to_dict(orient="list")
|
1388
|
+
elif data_format == DataFormat.COLUMN_SERIES_MAPPING:
|
1389
|
+
return df.to_dict(orient="series")
|
1390
|
+
elif data_format in [
|
1391
|
+
DataFormat.LIST_OF_VALUES,
|
1392
|
+
DataFormat.TUPLE_OF_VALUES,
|
1393
|
+
DataFormat.SET_OF_VALUES,
|
1394
|
+
]:
|
1395
|
+
df = _unify_missing_values(df)
|
1396
|
+
return_list = []
|
1397
|
+
if len(df.columns) == 1:
|
1398
|
+
# Get the first column and convert to list
|
1399
|
+
return_list = df[df.columns[0]].tolist()
|
1400
|
+
elif len(df.columns) >= 1:
|
1401
|
+
raise ValueError(
|
1402
|
+
"DataFrame is expected to have a single column but "
|
1403
|
+
f"has {len(df.columns)}."
|
1404
|
+
)
|
1405
|
+
if data_format == DataFormat.TUPLE_OF_VALUES:
|
1406
|
+
return tuple(return_list)
|
1407
|
+
if data_format == DataFormat.SET_OF_VALUES:
|
1408
|
+
return set(return_list)
|
1409
|
+
return return_list
|
1410
|
+
elif data_format == DataFormat.KEY_VALUE_DICT:
|
1411
|
+
df = _unify_missing_values(df)
|
1412
|
+
# The key is expected to be the index -> this will return the first column
|
1413
|
+
# as a dict with index as key.
|
1414
|
+
return {} if df.empty else df.iloc[:, 0].to_dict()
|
1415
|
+
|
1416
|
+
raise ValueError(f"Unsupported input data format: {data_format}")
|