snowpark-checkpoints-collectors 0.1.0rc2__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/LICENSE +0 -25
- snowpark_checkpoints_collectors-0.1.1/PKG-INFO +143 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/README.md +1 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/pyproject.toml +17 -9
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/__init__.py +22 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/collection_common.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py +24 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/column_collection/__init__.py +22 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/__init__.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/array_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/binary_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/boolean_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/column_collector_base.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/date_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/day_time_interval_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/decimal_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/empty_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/map_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/null_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/numeric_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/string_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/struct_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_column_collector.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_ntz_column_collector.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py +20 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/singleton.py +23 -0
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py +20 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/summary_stats_collector.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py +53 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/utils/extra_config.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/src/snowflake/snowpark_checkpoints_collector/utils/file_utils.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/utils/telemetry.py +889 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/.coveragerc +1 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/telemetry_compare_utils.py +20 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1.py +14 -3
- snowpark_checkpoints_collectors-0.1.1/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values_telemetry.json +18 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type_telemetry.json +6 -5
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values_telemetry.json +6 -5
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_object_column_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema_telemetry.json +6 -5
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_full_df_telemetry.json +6 -5
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2_expected/test_collect_checkpoint_mode_2_parquet_directory _telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2_expected/test_collect_checkpoint_mode_2_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2_expected/test_collect_empty_dataframe_with_schema_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2_expected/test_collect_invalid_mode_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2_expected/test_generate_parquet_for_spark_df_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_2_expected/test_spark_df_mode_dataframe_telemetry.json +5 -4
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collection_result_file.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_snow_connection_int.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_column_collection.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_extra_config.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_file_utils.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_snow_connection.py +14 -3
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_summary_stats_collector.py +14 -3
- snowpark_checkpoints_collectors-0.1.0rc2/PKG-INFO +0 -347
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/__init__.py +0 -11
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py +0 -13
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/column_collection/__init__.py +0 -11
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py +0 -9
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/singleton.py +0 -12
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py +0 -9
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py +0 -49
- snowpark_checkpoints_collectors-0.1.0rc2/src/snowflake/snowpark_checkpoints_collector/utils/telemetry.py +0 -1
- snowpark_checkpoints_collectors-0.1.0rc2/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values_telemetry.json +0 -17
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/.gitignore +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/CHANGELOG.md +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/snowpark-testdf-schema.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_checkpoint_name.py +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_object_column.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_full_df.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type.json +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_checkpoint_name_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_collection_point_result.py +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_collection_point_result_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/test/unit/test_pandera_column_check_manager.py +0 -0
@@ -175,28 +175,3 @@
|
|
175
175
|
of your accepting any such warranty or additional liability.
|
176
176
|
|
177
177
|
END OF TERMS AND CONDITIONS
|
178
|
-
|
179
|
-
APPENDIX: How to apply the Apache License to your work.
|
180
|
-
|
181
|
-
To apply the Apache License to your work, attach the following
|
182
|
-
boilerplate notice, with the fields enclosed by brackets "[]"
|
183
|
-
replaced with your own identifying information. (Don't include
|
184
|
-
the brackets!) The text should be enclosed in the appropriate
|
185
|
-
comment syntax for the file format. We also recommend that a
|
186
|
-
file or class name and description of purpose be included on the
|
187
|
-
same "printed page" as the copyright notice for easier
|
188
|
-
identification within third-party archives.
|
189
|
-
|
190
|
-
Copyright 2025 Snowflake
|
191
|
-
|
192
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
193
|
-
you may not use this file except in compliance with the License.
|
194
|
-
You may obtain a copy of the License at
|
195
|
-
|
196
|
-
http://www.apache.org/licenses/LICENSE-2.0
|
197
|
-
|
198
|
-
Unless required by applicable law or agreed to in writing, software
|
199
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
200
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201
|
-
See the License for the specific language governing permissions and
|
202
|
-
limitations under the License.
|
@@ -0,0 +1,143 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: snowpark-checkpoints-collectors
|
3
|
+
Version: 0.1.1
|
4
|
+
Summary: Snowpark column and table statistics collection
|
5
|
+
Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
|
6
|
+
Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
|
7
|
+
Author-email: "Snowflake, Inc." <snowflake-python-libraries-dl@snowflake.com>
|
8
|
+
License: Apache License, Version 2.0
|
9
|
+
License-File: LICENSE
|
10
|
+
Keywords: Snowflake,Snowpark,analytics,cloud,database,db
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Environment :: Console
|
13
|
+
Classifier: Environment :: Other Environment
|
14
|
+
Classifier: Intended Audience :: Developers
|
15
|
+
Classifier: Intended Audience :: Education
|
16
|
+
Classifier: Intended Audience :: Information Technology
|
17
|
+
Classifier: Intended Audience :: System Administrators
|
18
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
19
|
+
Classifier: Operating System :: OS Independent
|
20
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
21
|
+
Classifier: Programming Language :: SQL
|
22
|
+
Classifier: Topic :: Database
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
24
|
+
Classifier: Topic :: Software Development
|
25
|
+
Classifier: Topic :: Software Development :: Libraries
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
27
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28
|
+
Requires-Python: <3.12,>=3.9
|
29
|
+
Requires-Dist: pandera[io]==0.20.4
|
30
|
+
Requires-Dist: pyspark
|
31
|
+
Requires-Dist: snowflake-connector-python==3.13.0
|
32
|
+
Requires-Dist: snowflake-snowpark-python==1.26.0
|
33
|
+
Provides-Extra: development
|
34
|
+
Requires-Dist: coverage>=7.6.7; extra == 'development'
|
35
|
+
Requires-Dist: deepdiff>=8.0.0; extra == 'development'
|
36
|
+
Requires-Dist: hatchling==1.25.0; extra == 'development'
|
37
|
+
Requires-Dist: pre-commit>=4.0.1; extra == 'development'
|
38
|
+
Requires-Dist: pyarrow>=18.0.0; extra == 'development'
|
39
|
+
Requires-Dist: pytest-cov>=6.0.0; extra == 'development'
|
40
|
+
Requires-Dist: pytest>=8.3.3; extra == 'development'
|
41
|
+
Requires-Dist: setuptools>=70.0.0; extra == 'development'
|
42
|
+
Requires-Dist: twine==5.1.1; extra == 'development'
|
43
|
+
Description-Content-Type: text/markdown
|
44
|
+
|
45
|
+
# snowpark-checkpoints-collectors
|
46
|
+
|
47
|
+
---
|
48
|
+
**NOTE**
|
49
|
+
This package is on Public Preview.
|
50
|
+
---
|
51
|
+
**snowpark-checkpoints-collector** package offers a function for extracting information from PySpark dataframes. We can then use that data to validate against the converted Snowpark dataframes to ensure that behavioral equivalence has been achieved.
|
52
|
+
## Features
|
53
|
+
|
54
|
+
- Schema inference collected data mode (Schema): This is the default mode, which leverages Pandera schema inference to obtain the metadata and checks that will be evaluated for the specified dataframe. This mode also collects custom data from columns of the DataFrame based on the PySpark type.
|
55
|
+
- DataFrame collected data mode (DataFrame): This mode collects the data of the PySpark dataframe. In this case, the mechanism saves all data of the given dataframe in parquet format. Using the default user Snowflake connection, it tries to upload the parquet files into the Snowflake temporal stage and create a table based on the information in the stage. The name of the file and the table is the same as the checkpoint.
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
## Functionalities
|
60
|
+
|
61
|
+
### Collect DataFrame Checkpoint
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
```python
|
66
|
+
from pyspark.sql import DataFrame as SparkDataFrame
|
67
|
+
from snowflake.snowpark_checkpoints_collector.collection_common import CheckpointMode
|
68
|
+
from typing import Optional
|
69
|
+
|
70
|
+
# Signature of the function
|
71
|
+
def collect_dataframe_checkpoint(
|
72
|
+
df: SparkDataFrame,
|
73
|
+
checkpoint_name: str,
|
74
|
+
sample: Optional[float] = None,
|
75
|
+
mode: Optional[CheckpointMode] = None,
|
76
|
+
output_path: Optional[str] = None,
|
77
|
+
) -> None:
|
78
|
+
...
|
79
|
+
```
|
80
|
+
|
81
|
+
- `df`: The input Spark dataframe to collect.
|
82
|
+
- `checkpoint_name`: Name of the checkpoint schema file or dataframe.
|
83
|
+
- `sample`: Fraction of DataFrame to sample for schema inference, defaults to 1.0.
|
84
|
+
- `mode`: The mode to execution the collection (Schema or Dataframe), defaults to CheckpointMode.Schema.
|
85
|
+
- `output_path`: The output path to save the checkpoint, defaults to current working directory.
|
86
|
+
|
87
|
+
|
88
|
+
## Usage Example
|
89
|
+
|
90
|
+
### Schema mode
|
91
|
+
|
92
|
+
```python
|
93
|
+
from pyspark.sql import SparkSession
|
94
|
+
from snowflake.snowpark_checkpoints_collector import collect_dataframe_checkpoint
|
95
|
+
from snowflake.snowpark_checkpoints_collector.collection_common import CheckpointMode
|
96
|
+
|
97
|
+
spark_session = SparkSession.builder.getOrCreate()
|
98
|
+
sample_size = 1.0
|
99
|
+
|
100
|
+
pyspark_df = spark_session.createDataFrame(
|
101
|
+
[("apple", 21), ("lemon", 34), ("banana", 50)], schema="fruit string, age integer"
|
102
|
+
)
|
103
|
+
|
104
|
+
collect_dataframe_checkpoint(
|
105
|
+
pyspark_df,
|
106
|
+
checkpoint_name="collect_checkpoint_mode_1",
|
107
|
+
sample=sample_size,
|
108
|
+
mode=CheckpointMode.SCHEMA,
|
109
|
+
)
|
110
|
+
```
|
111
|
+
|
112
|
+
|
113
|
+
### Dataframe mode
|
114
|
+
|
115
|
+
```python
|
116
|
+
from pyspark.sql import SparkSession
|
117
|
+
from snowflake.snowpark_checkpoints_collector import collect_dataframe_checkpoint
|
118
|
+
from snowflake.snowpark_checkpoints_collector.collection_common import CheckpointMode
|
119
|
+
from pyspark.sql.types import StructType, StructField, ByteType, StringType, IntegerType
|
120
|
+
|
121
|
+
spark_schema = StructType(
|
122
|
+
[
|
123
|
+
StructField("BYTE", ByteType(), True),
|
124
|
+
StructField("STRING", StringType(), True),
|
125
|
+
StructField("INTEGER", IntegerType(), True)
|
126
|
+
]
|
127
|
+
)
|
128
|
+
|
129
|
+
data = [(1, "apple", 21), (2, "lemon", 34), (3, "banana", 50)]
|
130
|
+
|
131
|
+
spark_session = SparkSession.builder.getOrCreate()
|
132
|
+
pyspark_df = spark_session.createDataFrame(data, schema=spark_schema).orderBy(
|
133
|
+
"INTEGER"
|
134
|
+
)
|
135
|
+
|
136
|
+
collect_dataframe_checkpoint(
|
137
|
+
pyspark_df,
|
138
|
+
checkpoint_name="collect_checkpoint_mode_2",
|
139
|
+
mode=CheckpointMode.DATAFRAME,
|
140
|
+
)
|
141
|
+
```
|
142
|
+
|
143
|
+
------
|
{snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/README.md
RENAMED
@@ -2,11 +2,8 @@
|
|
2
2
|
|
3
3
|
---
|
4
4
|
**NOTE**
|
5
|
-
|
6
|
-
This package is on Private Preview.
|
7
|
-
|
5
|
+
This package is on Public Preview.
|
8
6
|
---
|
9
|
-
|
10
7
|
**snowpark-checkpoints-collector** package offers a function for extracting information from PySpark dataframes. We can then use that data to validate against the converted Snowpark dataframes to ensure that behavioral equivalence has been achieved.
|
11
8
|
## Features
|
12
9
|
|
{snowpark_checkpoints_collectors-0.1.0rc2 → snowpark_checkpoints_collectors-0.1.1}/pyproject.toml
RENAMED
@@ -3,7 +3,9 @@ build-backend = "hatchling.build"
|
|
3
3
|
requires = ["hatchling"]
|
4
4
|
|
5
5
|
[project]
|
6
|
-
authors = [
|
6
|
+
authors = [
|
7
|
+
{name = "Snowflake, Inc.", email = "snowflake-python-libraries-dl@snowflake.com"},
|
8
|
+
]
|
7
9
|
classifiers = [
|
8
10
|
"Development Status :: 4 - Beta",
|
9
11
|
"Environment :: Console",
|
@@ -24,12 +26,13 @@ classifiers = [
|
|
24
26
|
"Topic :: Scientific/Engineering :: Information Analysis",
|
25
27
|
]
|
26
28
|
dependencies = [
|
27
|
-
"snowflake-snowpark-python",
|
28
|
-
"snowflake-connector-python",
|
29
|
+
"snowflake-snowpark-python==1.26.0",
|
30
|
+
"snowflake-connector-python==3.13.0",
|
29
31
|
"pyspark",
|
30
32
|
"pandera[io]==0.20.4",
|
31
33
|
]
|
32
34
|
description = "Snowpark column and table statistics collection"
|
35
|
+
dynamic = ['version']
|
33
36
|
keywords = [
|
34
37
|
'Snowflake',
|
35
38
|
'analytics',
|
@@ -38,11 +41,10 @@ keywords = [
|
|
38
41
|
'db',
|
39
42
|
'Snowpark',
|
40
43
|
]
|
41
|
-
license = {
|
44
|
+
license = {text = "Apache License, Version 2.0"}
|
42
45
|
name = "snowpark-checkpoints-collectors"
|
43
46
|
readme = "README.md"
|
44
47
|
requires-python = '>=3.9,<3.12'
|
45
|
-
dynamic = ['version']
|
46
48
|
|
47
49
|
[project.optional-dependencies]
|
48
50
|
development = [
|
@@ -74,15 +76,22 @@ where = ["src/"]
|
|
74
76
|
dev-mode-dirs = ['src']
|
75
77
|
directory = 'snowpark-checkpoints-collectors'
|
76
78
|
|
79
|
+
[[tool.hatch.sources]]
|
80
|
+
dir = "src/snowflake/snowpark_checkpoints_collector"
|
81
|
+
name = "snowpark-checkpoints-collectors"
|
82
|
+
type = "package"
|
83
|
+
|
77
84
|
[tool.hatch.build.targets.wheel]
|
78
85
|
directory = "dist"
|
79
|
-
packages = [
|
86
|
+
packages = [
|
87
|
+
"src/snowflake",
|
88
|
+
]
|
80
89
|
|
81
90
|
[tool.hatch.build.targets.sdist]
|
82
91
|
directory = "dist"
|
83
92
|
exclude = ["/.github", "/.idea"]
|
84
93
|
include = [
|
85
|
-
'src
|
94
|
+
'src/**',
|
86
95
|
'README.md',
|
87
96
|
'LICENSE',
|
88
97
|
'test/',
|
@@ -113,7 +122,6 @@ exclude_lines = [
|
|
113
122
|
"if __name__ == .__main__.:",
|
114
123
|
]
|
115
124
|
|
116
|
-
|
117
125
|
[tool.hatch.envs.linter.scripts]
|
118
126
|
check = [
|
119
127
|
'ruff check --fix .',
|
@@ -121,7 +129,7 @@ check = [
|
|
121
129
|
|
122
130
|
[tool.hatch.envs.test.scripts]
|
123
131
|
check = [
|
124
|
-
"pip install -e ../snowpark-checkpoints-configuration"
|
132
|
+
"pip install -e ../snowpark-checkpoints-configuration",
|
125
133
|
'pytest -v --junitxml=test/outcome/test-results.xml --cov=. --cov-config=test/.coveragerc --cov-report=xml:test/outcome/coverage-{matrix:python:{env:PYTHON_VERSION:unset}}.xml {args:test} --cov-report=term --cov-report=json:test/outcome/coverage-{matrix:python:{env:PYTHON_VERSION:unset}}.json',
|
126
134
|
]
|
127
135
|
|
snowpark_checkpoints_collectors-0.1.1/src/snowflake/snowpark_checkpoints_collector/__init__.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
__all__ = ["collect_dataframe_checkpoint", "CheckpointMode"]
|
17
|
+
|
18
|
+
from snowflake.snowpark_checkpoints_collector.summary_stats_collector import (
|
19
|
+
collect_dataframe_checkpoint,
|
20
|
+
)
|
21
|
+
|
22
|
+
from snowflake.snowpark_checkpoints_collector.collection_common import CheckpointMode
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
import locale
|
6
17
|
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
__all__ = ["CollectionPointResult", "CollectionResult", "CollectionPointResultManager"]
|
17
|
+
|
18
|
+
from snowflake.snowpark_checkpoints_collector.collection_result.model.collection_point_result import (
|
19
|
+
CollectionPointResult,
|
20
|
+
CollectionResult,
|
21
|
+
)
|
22
|
+
from snowflake.snowpark_checkpoints_collector.collection_result.model.collection_point_result_manager import (
|
23
|
+
CollectionPointResultManager,
|
24
|
+
)
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
from datetime import datetime
|
5
16
|
from enum import Enum
|
6
17
|
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
import json
|
5
16
|
|
6
17
|
from typing import Optional
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
|
16
|
+
__all__ = [
|
17
|
+
"ColumnCollectorManager",
|
18
|
+
]
|
19
|
+
|
20
|
+
from snowflake.snowpark_checkpoints_collector.column_collection.column_collector_manager import (
|
21
|
+
ColumnCollectorManager,
|
22
|
+
)
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
from pyspark.sql import DataFrame as SparkDataFrame
|
5
16
|
from pyspark.sql.types import StructField
|
6
17
|
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
__all__ = [
|
6
17
|
"ArrayColumnCollector",
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
from statistics import mean
|
5
16
|
|
6
17
|
from pyspark.sql import DataFrame as SparkDataFrame
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
from statistics import mean
|
5
16
|
|
6
17
|
from pyspark.sql import DataFrame as SparkDataFrame
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
from pyspark.sql import DataFrame as SparkDataFrame
|
6
17
|
from pyspark.sql.types import StructField
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
from abc import ABC, abstractmethod
|
6
17
|
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
from pyspark.sql import DataFrame as SparkDataFrame
|
5
16
|
from pyspark.sql.functions import col as spark_col
|
6
17
|
from pyspark.sql.functions import max as spark_max
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
from pyspark.sql import DataFrame as SparkDataFrame
|
6
17
|
from pyspark.sql.functions import col as spark_col
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
from pyspark.sql import DataFrame as SparkDataFrame
|
6
17
|
from pyspark.sql.functions import col as spark_col
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
|
5
16
|
from pyspark.sql import DataFrame as SparkDataFrame
|
6
17
|
from pyspark.sql.types import StructField
|
@@ -1,6 +1,17 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
|
1
|
+
# Copyright 2025 Snowflake Inc.
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
3
|
+
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
4
15
|
from statistics import mean
|
5
16
|
|
6
17
|
from pyspark.sql import DataFrame as SparkDataFrame
|