snowpark-checkpoints-collectors 0.3.3__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/PKG-INFO +1 -1
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/pyproject.toml +1 -1
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/__version__.py +1 -1
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_common.py +5 -2
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py +21 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/summary_stats_collector.py +88 -9
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/.coveragerc +5 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1.py +2 -2
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_full_df.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type.json +1 -0
- snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_io_strategy.json +1 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_snow_connection.py +11 -7
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_full_df.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type.json +0 -1
- snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_io_strategy.json +0 -1
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/.gitignore +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/CHANGELOG.md +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/LICENSE +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/README.md +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/snowpark-testdf-schema.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/array_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/binary_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/boolean_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/column_collector_base.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/date_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/day_time_interval_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/decimal_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/empty_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/map_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/null_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/numeric_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/string_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/struct_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_ntz_column_collector.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/io_default_strategy.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/io_env_strategy.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/io_file_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/singleton.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/extra_config.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/file_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/logging_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/telemetry.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/telemetry_compare_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_checkpoint_name.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_object_column.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_object_column_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_full_df_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2_expected/test_collect_checkpoint_mode_2_parquet_directory _telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2_expected/test_collect_checkpoint_mode_2_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2_expected/test_collect_empty_dataframe_with_schema_telemetry.json +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collection_result_file.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_snow_connection_int.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/io_utils/test_default_strategy.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_checkpoint_name_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_collection_point_result.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_collection_point_result_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_column_collection.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_extra_config.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_file_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_logger.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_logging_utils.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_pandera_column_check_manager.py +0 -0
- {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_summary_stats_collector.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: snowpark-checkpoints-collectors
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.4.0
|
4
4
|
Summary: Snowpark column and table statistics collection
|
5
5
|
Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
|
6
6
|
Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
|
{snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/pyproject.toml
RENAMED
@@ -129,7 +129,7 @@ exclude_lines = [
|
|
129
129
|
|
130
130
|
[tool.hatch.envs.linter.scripts]
|
131
131
|
check = [
|
132
|
-
'ruff check --fix .',
|
132
|
+
"echo 'Running linting checks...' && ruff check --config=../ruff.toml --statistics --verbose . || (echo '❌ LINTING FAILED: Please fix the above linting issues before proceeding. Use \"ruff check --config=../ruff.toml --fix .\" to auto-fix some issues, or fix them manually.' && exit 1)",
|
133
133
|
]
|
134
134
|
|
135
135
|
[tool.hatch.envs.test.scripts]
|
@@ -48,11 +48,12 @@ STRUCT_COLUMN_TYPE = "struct"
|
|
48
48
|
TIMESTAMP_COLUMN_TYPE = "timestamp"
|
49
49
|
TIMESTAMP_NTZ_COLUMN_TYPE = "timestamp_ntz"
|
50
50
|
|
51
|
-
PANDAS_BOOLEAN_DTYPE = "
|
51
|
+
PANDAS_BOOLEAN_DTYPE = "boolean"
|
52
52
|
PANDAS_DATETIME_DTYPE = "datetime64[ns]"
|
53
53
|
PANDAS_FLOAT_DTYPE = "float64"
|
54
|
-
PANDAS_INTEGER_DTYPE = "
|
54
|
+
PANDAS_INTEGER_DTYPE = "Int64"
|
55
55
|
PANDAS_OBJECT_DTYPE = "object"
|
56
|
+
PANDAS_STRING_DTYPE = "string[python]"
|
56
57
|
PANDAS_TIMEDELTA_DTYPE = "timedelta64[ns]"
|
57
58
|
|
58
59
|
NUMERIC_TYPE_COLLECTION = [
|
@@ -142,6 +143,8 @@ BACKSLASH_TOKEN = "\\"
|
|
142
143
|
SLASH_TOKEN = "/"
|
143
144
|
PYSPARK_NONE_SIZE_VALUE = -1
|
144
145
|
PANDAS_LONG_TYPE = "Int64"
|
146
|
+
PANDAS_STRING_TYPE = "string"
|
147
|
+
PANDAS_FLOAT_TYPE = "float64"
|
145
148
|
|
146
149
|
# ENVIRONMENT VARIABLES
|
147
150
|
SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR = (
|
@@ -22,6 +22,8 @@ from pathlib import Path
|
|
22
22
|
from typing import Callable, Optional
|
23
23
|
|
24
24
|
from snowflake.snowpark import Session
|
25
|
+
from snowflake.snowpark.functions import col, expr
|
26
|
+
from snowflake.snowpark.types import TimestampType
|
25
27
|
from snowflake.snowpark_checkpoints_collector.collection_common import (
|
26
28
|
DOT_PARQUET_EXTENSION,
|
27
29
|
)
|
@@ -195,9 +197,28 @@ class SnowConnection:
|
|
195
197
|
stage_directory_path,
|
196
198
|
)
|
197
199
|
dataframe = self.session.read.parquet(path=stage_directory_path)
|
200
|
+
dataframe = convert_timestamps_to_utc_date(dataframe)
|
198
201
|
LOGGER.info("Creating table '%s' from parquet files", table_name)
|
199
202
|
dataframe.write.save_as_table(table_name=table_name, mode="overwrite")
|
200
203
|
|
201
204
|
def _create_snowpark_session(self) -> Session:
|
202
205
|
LOGGER.info("Creating a Snowpark session using the default connection")
|
203
206
|
return Session.builder.getOrCreate()
|
207
|
+
|
208
|
+
|
209
|
+
def convert_timestamps_to_utc_date(df):
|
210
|
+
"""Convert all timestamp columns to UTC normalized timestamps.
|
211
|
+
|
212
|
+
Reading a parquet written by spark from a snowpark session modifies the original timestamps,
|
213
|
+
so this function normalizes timestamps for comparison.
|
214
|
+
"""
|
215
|
+
new_cols = []
|
216
|
+
for field in df.schema.fields:
|
217
|
+
if isinstance(field.datatype, TimestampType):
|
218
|
+
utc_normalized_ts = expr(
|
219
|
+
f"convert_timezone('UTC', cast(to_date({field.name}) as timestamp_tz))"
|
220
|
+
).alias(field.name)
|
221
|
+
new_cols.append(utc_normalized_ts)
|
222
|
+
else:
|
223
|
+
new_cols.append(col(field.name))
|
224
|
+
return df.select(new_cols)
|
@@ -23,9 +23,15 @@ import pandera as pa
|
|
23
23
|
|
24
24
|
from pyspark.sql import DataFrame as SparkDataFrame
|
25
25
|
from pyspark.sql.functions import col
|
26
|
-
from pyspark.sql.types import
|
26
|
+
from pyspark.sql.types import BinaryType as SparkBinaryType
|
27
|
+
from pyspark.sql.types import BooleanType as SparkBooleanType
|
28
|
+
from pyspark.sql.types import DateType as SparkDateType
|
27
29
|
from pyspark.sql.types import DoubleType as SparkDoubleType
|
30
|
+
from pyspark.sql.types import FloatType as SparkFloatType
|
31
|
+
from pyspark.sql.types import IntegerType as SparkIntegerType
|
28
32
|
from pyspark.sql.types import StringType as SparkStringType
|
33
|
+
from pyspark.sql.types import StructField as SparkStructField
|
34
|
+
from pyspark.sql.types import TimestampType as SparkTimestampType
|
29
35
|
|
30
36
|
from snowflake.snowpark_checkpoints_collector.collection_common import (
|
31
37
|
CHECKPOINT_JSON_OUTPUT_FILE_NAME_FORMAT,
|
@@ -36,8 +42,10 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
|
|
36
42
|
DOT_PARQUET_EXTENSION,
|
37
43
|
INTEGER_TYPE_COLLECTION,
|
38
44
|
NULL_COLUMN_TYPE,
|
45
|
+
PANDAS_FLOAT_TYPE,
|
39
46
|
PANDAS_LONG_TYPE,
|
40
47
|
PANDAS_OBJECT_TYPE_COLLECTION,
|
48
|
+
PANDAS_STRING_TYPE,
|
41
49
|
CheckpointMode,
|
42
50
|
)
|
43
51
|
from snowflake.snowpark_checkpoints_collector.collection_result.model import (
|
@@ -73,11 +81,13 @@ from snowflake.snowpark_checkpoints_collector.utils.telemetry import report_tele
|
|
73
81
|
LOGGER = logging.getLogger(__name__)
|
74
82
|
|
75
83
|
default_null_types = {
|
76
|
-
|
77
|
-
|
84
|
+
SparkIntegerType(): 0,
|
85
|
+
SparkFloatType(): 0.0,
|
78
86
|
SparkDoubleType(): 0.0,
|
79
87
|
SparkStringType(): "",
|
80
|
-
|
88
|
+
SparkBooleanType(): False,
|
89
|
+
SparkTimestampType(): None,
|
90
|
+
SparkDateType(): None,
|
81
91
|
}
|
82
92
|
|
83
93
|
|
@@ -345,7 +355,7 @@ def normalize_missing_values(df: SparkDataFrame) -> SparkDataFrame:
|
|
345
355
|
return df
|
346
356
|
|
347
357
|
|
348
|
-
def _get_spark_column_types(df: SparkDataFrame) -> dict[str,
|
358
|
+
def _get_spark_column_types(df: SparkDataFrame) -> dict[str, SparkStructField]:
|
349
359
|
schema = df.schema
|
350
360
|
column_type_collection = {}
|
351
361
|
for field in schema.fields:
|
@@ -475,14 +485,83 @@ def _to_pandas(sampled_df: SparkDataFrame) -> pandas.DataFrame:
|
|
475
485
|
LOGGER.debug("Converting Spark DataFrame to Pandas DataFrame")
|
476
486
|
pandas_df = sampled_df.toPandas()
|
477
487
|
for field in sampled_df.schema.fields:
|
478
|
-
has_nan = pandas_df[field.name].isna().any()
|
479
488
|
is_integer = field.dataType.typeName() in INTEGER_TYPE_COLLECTION
|
480
|
-
|
489
|
+
is_spark_string = isinstance(field.dataType, SparkStringType)
|
490
|
+
is_spark_binary = isinstance(field.dataType, SparkBinaryType)
|
491
|
+
is_spark_timestamp = isinstance(field.dataType, SparkTimestampType)
|
492
|
+
is_spark_float = isinstance(field.dataType, SparkFloatType)
|
493
|
+
is_spark_boolean = isinstance(field.dataType, SparkBooleanType)
|
494
|
+
is_spark_date = isinstance(field.dataType, SparkDateType)
|
495
|
+
if is_integer:
|
481
496
|
LOGGER.debug(
|
482
|
-
"Converting column '%s' to '%s' type",
|
497
|
+
"Converting Spark integer column '%s' to Pandas nullable '%s' type",
|
483
498
|
field.name,
|
484
499
|
PANDAS_LONG_TYPE,
|
485
500
|
)
|
486
|
-
pandas_df[field.name] =
|
501
|
+
pandas_df[field.name] = (
|
502
|
+
pandas_df[field.name].astype(PANDAS_LONG_TYPE).fillna(0)
|
503
|
+
)
|
504
|
+
elif is_spark_string or is_spark_binary:
|
505
|
+
LOGGER.debug(
|
506
|
+
"Converting Spark string column '%s' to Pandas nullable '%s' type",
|
507
|
+
field.name,
|
508
|
+
PANDAS_STRING_TYPE,
|
509
|
+
)
|
510
|
+
pandas_df[field.name] = (
|
511
|
+
pandas_df[field.name].astype(PANDAS_STRING_TYPE).fillna("")
|
512
|
+
)
|
513
|
+
elif is_spark_timestamp:
|
514
|
+
LOGGER.debug(
|
515
|
+
"Converting Spark timestamp column '%s' to UTC naive Pandas datetime",
|
516
|
+
field.name,
|
517
|
+
)
|
518
|
+
pandas_df[field.name] = convert_all_to_utc_naive(
|
519
|
+
pandas_df[field.name]
|
520
|
+
).fillna(pandas.NaT)
|
521
|
+
elif is_spark_float:
|
522
|
+
LOGGER.debug(
|
523
|
+
"Converting Spark float column '%s' to Pandas nullable float",
|
524
|
+
field.name,
|
525
|
+
)
|
526
|
+
pandas_df[field.name] = (
|
527
|
+
pandas_df[field.name].astype(PANDAS_FLOAT_TYPE).fillna(0.0)
|
528
|
+
)
|
529
|
+
elif is_spark_boolean:
|
530
|
+
LOGGER.debug(
|
531
|
+
"Converting Spark boolean column '%s' to Pandas nullable boolean",
|
532
|
+
field.name,
|
533
|
+
)
|
534
|
+
pandas_df[field.name] = (
|
535
|
+
pandas_df[field.name].astype("boolean").fillna(False)
|
536
|
+
)
|
537
|
+
elif is_spark_date:
|
538
|
+
LOGGER.debug(
|
539
|
+
"Converting Spark date column '%s' to Pandas nullable datetime",
|
540
|
+
field.name,
|
541
|
+
)
|
542
|
+
pandas_df[field.name] = pandas_df[field.name].fillna(pandas.NaT)
|
487
543
|
|
488
544
|
return pandas_df
|
545
|
+
|
546
|
+
|
547
|
+
def convert_all_to_utc_naive(series: pandas.Series) -> pandas.Series:
|
548
|
+
"""Convert all timezone-aware or naive timestamps in a series to UTC naive.
|
549
|
+
|
550
|
+
Naive timestamps are assumed to be in UTC and localized accordingly.
|
551
|
+
Timezone-aware timestamps are converted to UTC and then made naive.
|
552
|
+
|
553
|
+
Args:
|
554
|
+
series (pandas.Series): A Pandas Series of `pd.Timestamp` objects,
|
555
|
+
either naive or timezone-aware.
|
556
|
+
|
557
|
+
Returns:
|
558
|
+
pandas.Series: A Series of UTC-normalized naive timestamps (`tzinfo=None`).
|
559
|
+
|
560
|
+
"""
|
561
|
+
|
562
|
+
def convert(ts):
|
563
|
+
if ts.tz is None:
|
564
|
+
ts = ts.tz_localize("UTC")
|
565
|
+
return ts.tz_convert("UTC").tz_localize(None)
|
566
|
+
|
567
|
+
return series.apply(convert)
|
@@ -66,7 +66,7 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
|
|
66
66
|
PANDAS_DATETIME_DTYPE,
|
67
67
|
PANDAS_FLOAT_DTYPE,
|
68
68
|
PANDAS_INTEGER_DTYPE,
|
69
|
-
|
69
|
+
PANDAS_STRING_DTYPE,
|
70
70
|
PANDAS_TIMEDELTA_DTYPE,
|
71
71
|
PANDERA_COLUMN_TYPE_KEY,
|
72
72
|
SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
|
@@ -428,7 +428,7 @@ def test_collect_sampled_dataframe(spark_session, output_path):
|
|
428
428
|
schema_contract_output_json = json.loads(schema_contract_output)
|
429
429
|
|
430
430
|
pandera_column_type_collection_expected = [
|
431
|
-
|
431
|
+
PANDAS_STRING_DTYPE,
|
432
432
|
PANDAS_INTEGER_DTYPE,
|
433
433
|
PANDAS_FLOAT_DTYPE,
|
434
434
|
PANDAS_DATETIME_DTYPE,
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "date", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": true, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 1, "false_count": 2}, {"name": "b", "type": "byte", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 1, "max": 1, "mean": 1.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 2.1, "mean": 1.4000000000000001, "decimal_precision": 1, "margin_error": 1.2124355652982142}, {"name": "f", "type": "float", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 3.109999895095825, "mean": 2.073333263397217, "decimal_precision": 15, "margin_error": 1.7955592766132826}, {"name": "g", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0, "max": 4, "mean": 2.6666666666666665, "decimal_precision": 0, "margin_error": 2.309401076758503}, {"name": "h", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 5, "max": 5, "mean": 5.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 6, "max": 6, "mean": 6.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 0, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "value_type": "string", "allow_null": true, "null_value_proportion": 10.0, "max_size": 5, "min_size": 0, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "q", "type": "binary", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "max_size": 6, "min_size": 0, "mean_size": 2.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 0.0, "max_size": 5, "min_size": 0, "mean_size": 3, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 1, "rows_null_count": 2}]}]}}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 2, "max_value": 2}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 2, "max_length": 2}, {"name": "Value", "type": "decimal", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": "1.123456789", "max": "5.123450000", "mean": "3.1234552538000", "decimal_precision": 9}]}}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 6}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 51, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "active": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 0, "max_length": 6}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": 0, "max": 51, "mean": 21.8, "decimal_precision": 0, "margin_error": 22.241852440837743}, {"name": "active", "type": "boolean", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "true_count": 2, "false_count": 3}]}}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Description": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 0}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Price": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 0.0, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Description", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 0, "max_length": 0}, {"name": "Price", "type": "double", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 0.0, "max": 0.0, "mean": 0.0, "decimal_precision": 1, "margin_error": null}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "true_count": 0, "false_count": 1}]}}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Code": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": null, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Code", "type": "long", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}]}}
|
snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_full_df.json
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 4, "max_value": 4}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 50, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 4, "max_length": 4}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 21, "max": 50, "mean": 35.0, "decimal_precision": 0, "margin_error": 14.52583904633395}]}}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "date", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": false, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 2.1, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 3.109999895095825, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 4, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 2, "false_count": 1}, {"name": "b", "type": "byte", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 1, "max": 1, "mean": 1.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 2.1, "max": 2.1, "mean": 2.1, "decimal_precision": 1, "margin_error": 0.0}, {"name": "f", "type": "float", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 3.109999895095825, "max": 3.109999895095825, "mean": 3.109999895095825, "decimal_precision": 15, "margin_error": 0.0}, {"name": "g", "type": "integer", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 4, "max": 4, "mean": 4.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "h", "type": "long", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 5, "max": 5, "mean": 5.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 6, "max": 6, "mean": 6.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "value_type": "string", "allow_null": true, "null_value_proportion": 13.333333333333334, "max_size": 5, "min_size": 5, "mean_size": 5, "is_unique_size": true}, {"name": "q", "type": "binary", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "max_size": 6, "min_size": 2, "mean_size": 3.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 20.0, "max_size": 5, "min_size": 1, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}]}]}}
|
@@ -0,0 +1 @@
|
|
1
|
+
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 21, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 21, "max": 21, "mean": 21.0, "decimal_precision": 0, "margin_error": null}]}}
|
@@ -56,13 +56,17 @@ def test_create_snowflake_table_from_parquet(input_path):
|
|
56
56
|
with mock.patch(
|
57
57
|
"snowflake.snowpark_checkpoints_collector.io_utils.io_default_strategy.IODefaultStrategy.read_bytes"
|
58
58
|
) as read_bytes_mock:
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
59
|
+
with mock.patch(
|
60
|
+
"snowflake.snowpark_checkpoints_collector.snow_connection_model.snow_connection.convert_timestamps_to_utc_date"
|
61
|
+
) as mock_convert:
|
62
|
+
mock_convert.return_value = mock_df
|
63
|
+
read_bytes_mock.return_value = b"test"
|
64
|
+
isfile_mock.return_value = True
|
65
|
+
glob_mock.return_value = [parquet_file_path]
|
66
|
+
snow_connection = SnowConnection(mocked_session)
|
67
|
+
snow_connection.create_snowflake_table_from_local_parquet(
|
68
|
+
checkpoint_name, input_path, stage_path=checkpoint_name
|
69
|
+
)
|
66
70
|
|
67
71
|
stage_name = stage_name.format(snow_connection.stage_id)
|
68
72
|
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "Int64", "nullable": true, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": true, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float32", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "Int64", "nullable": true, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "Int64", "nullable": true, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 1, "false_count": 2}, {"name": "b", "type": "byte", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 1, "max": 1, "mean": 1.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 2.1, "mean": 1.4000000000000001, "decimal_precision": 1, "margin_error": 1.2124355652982142}, {"name": "f", "type": "float", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 3.109999895095825, "mean": 2.073333263397217, "decimal_precision": 15, "margin_error": 1.7955592766132826}, {"name": "g", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0, "max": 4, "mean": 2.6666666666666665, "decimal_precision": 0, "margin_error": 2.309401076758503}, {"name": "h", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 5, "max": 5, "mean": 5.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 6, "max": 6, "mean": 6.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 0, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "value_type": "string", "allow_null": true, "null_value_proportion": 10.0, "max_size": 5, "min_size": 0, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "q", "type": "binary", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "max_size": 6, "min_size": 0, "mean_size": 2.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 0.0, "max_size": 5, "min_size": 0, "mean_size": 3, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 1, "rows_null_count": 2}]}]}}
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 2, "max_value": 2}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 2, "max_length": 2}, {"name": "Value", "type": "decimal", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": "1.123456789", "max": "5.123450000", "mean": "3.1234552538000", "decimal_precision": 9}]}}
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 6}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 51, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "active": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 0, "max_length": 6}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": 0, "max": 51, "mean": 21.8, "decimal_precision": 0, "margin_error": 22.241852440837743}, {"name": "active", "type": "boolean", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "true_count": 2, "false_count": 3}]}}
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Description": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 0}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Price": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 0.0, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Description", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 0, "max_length": 0}, {"name": "Price", "type": "double", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 0.0, "max": 0.0, "mean": 0.0, "decimal_precision": 1, "margin_error": null}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "true_count": 0, "false_count": 1}]}}
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Code": {"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": null, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Code", "type": "long", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}]}}
|
snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_full_df.json
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 4, "max_value": 4}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 50, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 4, "max_length": 4}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 21, "max": 50, "mean": 35, "decimal_precision": 0, "margin_error": 14.52583904633395}]}}
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "int8", "nullable": false, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "date", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": false, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 2.1, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float32", "nullable": false, "checks": {"in_range": {"min_value": 3.109999895095825, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 4, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "int16", "nullable": false, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 2, "false_count": 1}, {"name": "b", "type": "byte", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 1, "max": 1, "mean": 1, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 2.1, "max": 2.1, "mean": 2.1, "decimal_precision": 1, "margin_error": 0.0}, {"name": "f", "type": "float", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 3.109999895095825, "max": 3.109999895095825, "mean": 3.109999895095825, "decimal_precision": 15, "margin_error": 0.0}, {"name": "g", "type": "integer", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 4, "max": 4, "mean": 4, "decimal_precision": 0, "margin_error": 0.0}, {"name": "h", "type": "long", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 5, "max": 5, "mean": 5, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 6, "max": 6, "mean": 6, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "value_type": "string", "allow_null": true, "null_value_proportion": 13.333333333333334, "max_size": 5, "min_size": 5, "mean_size": 5, "is_unique_size": true}, {"name": "q", "type": "binary", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "max_size": 6, "min_size": 2, "mean_size": 3.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 20.0, "max_size": 5, "min_size": 1, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}]}]}}
|
@@ -1 +0,0 @@
|
|
1
|
-
{"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 21, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 21, "max": 21, "mean": 21.0, "decimal_precision": 0, "margin_error": null}]}}
|
File without changes
|
{snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/CHANGELOG.md
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|