snowpark-checkpoints-collectors 0.3.3__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/PKG-INFO +1 -1
  2. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/pyproject.toml +1 -1
  3. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/__version__.py +1 -1
  4. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_common.py +5 -2
  5. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/snow_connection.py +21 -0
  6. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/summary_stats_collector.py +88 -9
  7. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/.coveragerc +5 -0
  8. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1.py +2 -2
  9. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values.json +1 -0
  10. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type.json +1 -0
  11. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values.json +1 -0
  12. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values.json +1 -0
  13. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema.json +1 -0
  14. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_full_df.json +1 -0
  15. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type.json +1 -0
  16. snowpark_checkpoints_collectors-0.4.0/test/integ/test_collect_df_mode_1_expected/test_io_strategy.json +1 -0
  17. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_snow_connection.py +11 -7
  18. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values.json +0 -1
  19. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type.json +0 -1
  20. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values.json +0 -1
  21. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values.json +0 -1
  22. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema.json +0 -1
  23. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_full_df.json +0 -1
  24. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type.json +0 -1
  25. snowpark_checkpoints_collectors-0.3.3/test/integ/test_collect_df_mode_1_expected/test_io_strategy.json +0 -1
  26. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/.gitignore +0 -0
  27. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/CHANGELOG.md +0 -0
  28. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/LICENSE +0 -0
  29. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/README.md +0 -0
  30. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/snowpark-testdf-schema.json +0 -0
  31. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/__init__.py +0 -0
  32. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/__init__.py +0 -0
  33. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result.py +0 -0
  34. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/collection_result/model/collection_point_result_manager.py +0 -0
  35. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/__init__.py +0 -0
  36. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/column_collector_manager.py +0 -0
  37. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/__init__.py +0 -0
  38. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/array_column_collector.py +0 -0
  39. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/binary_column_collector.py +0 -0
  40. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/boolean_column_collector.py +0 -0
  41. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/column_collector_base.py +0 -0
  42. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/date_column_collector.py +0 -0
  43. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/day_time_interval_column_collector.py +0 -0
  44. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/decimal_column_collector.py +0 -0
  45. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/empty_column_collector.py +0 -0
  46. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/map_column_collector.py +0 -0
  47. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/null_column_collector.py +0 -0
  48. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/numeric_column_collector.py +0 -0
  49. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/string_column_collector.py +0 -0
  50. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/struct_column_collector.py +0 -0
  51. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_column_collector.py +0 -0
  52. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_collection/model/timestamp_ntz_column_collector.py +0 -0
  53. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/__init__.py +0 -0
  54. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/column_pandera_checks/pandera_column_checks_manager.py +0 -0
  55. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/__init__.py +0 -0
  56. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/io_default_strategy.py +0 -0
  57. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/io_env_strategy.py +0 -0
  58. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/io_utils/io_file_manager.py +0 -0
  59. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/singleton.py +0 -0
  60. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/snow_connection_model/__init__.py +0 -0
  61. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/checkpoint_name_utils.py +0 -0
  62. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/extra_config.py +0 -0
  63. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/file_utils.py +0 -0
  64. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/logging_utils.py +0 -0
  65. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/src/snowflake/snowpark_checkpoints_collector/utils/telemetry.py +0 -0
  66. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/telemetry_compare_utils.py +0 -0
  67. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_checkpoint_name.py +0 -0
  68. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_dataframe_all_column_types_with_null_values_telemetry.json +0 -0
  69. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_dataframe_with_unsupported_pandera_column_type_telemetry.json +0 -0
  70. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_df_with_null_values_telemetry.json +0 -0
  71. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_df_with_only_null_values_telemetry.json +0 -0
  72. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_object_column.json +0 -0
  73. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_object_column_telemetry.json +0 -0
  74. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_empty_df_with_schema_telemetry.json +0 -0
  75. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_full_df_all_column_type_telemetry.json +0 -0
  76. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_1_expected/test_full_df_telemetry.json +0 -0
  77. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2.py +0 -0
  78. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2_expected/test_collect_checkpoint_mode_2_parquet_directory _telemetry.json +0 -0
  79. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2_expected/test_collect_checkpoint_mode_2_telemetry.json +0 -0
  80. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collect_df_mode_2_expected/test_collect_empty_dataframe_with_schema_telemetry.json +0 -0
  81. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_collection_result_file.py +0 -0
  82. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/integ/test_snow_connection_int.py +0 -0
  83. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/io_utils/test_default_strategy.py +0 -0
  84. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_checkpoint_name_utils.py +0 -0
  85. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_collection_point_result.py +0 -0
  86. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_collection_point_result_manager.py +0 -0
  87. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_column_collection.py +0 -0
  88. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_extra_config.py +0 -0
  89. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_file_utils.py +0 -0
  90. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_logger.py +0 -0
  91. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_logging_utils.py +0 -0
  92. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_pandera_column_check_manager.py +0 -0
  93. {snowpark_checkpoints_collectors-0.3.3 → snowpark_checkpoints_collectors-0.4.0}/test/unit/test_summary_stats_collector.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: snowpark-checkpoints-collectors
3
- Version: 0.3.3
3
+ Version: 0.4.0
4
4
  Summary: Snowpark column and table statistics collection
5
5
  Project-URL: Bug Tracker, https://github.com/snowflakedb/snowpark-checkpoints/issues
6
6
  Project-URL: Source code, https://github.com/snowflakedb/snowpark-checkpoints/
@@ -129,7 +129,7 @@ exclude_lines = [
129
129
 
130
130
  [tool.hatch.envs.linter.scripts]
131
131
  check = [
132
- 'ruff check --fix .',
132
+ "echo 'Running linting checks...' && ruff check --config=../ruff.toml --statistics --verbose . || (echo '❌ LINTING FAILED: Please fix the above linting issues before proceeding. Use \"ruff check --config=../ruff.toml --fix .\" to auto-fix some issues, or fix them manually.' && exit 1)",
133
133
  ]
134
134
 
135
135
  [tool.hatch.envs.test.scripts]
@@ -13,4 +13,4 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- __version__ = "0.3.3"
16
+ __version__ = "0.4.0"
@@ -48,11 +48,12 @@ STRUCT_COLUMN_TYPE = "struct"
48
48
  TIMESTAMP_COLUMN_TYPE = "timestamp"
49
49
  TIMESTAMP_NTZ_COLUMN_TYPE = "timestamp_ntz"
50
50
 
51
- PANDAS_BOOLEAN_DTYPE = "bool"
51
+ PANDAS_BOOLEAN_DTYPE = "boolean"
52
52
  PANDAS_DATETIME_DTYPE = "datetime64[ns]"
53
53
  PANDAS_FLOAT_DTYPE = "float64"
54
- PANDAS_INTEGER_DTYPE = "int64"
54
+ PANDAS_INTEGER_DTYPE = "Int64"
55
55
  PANDAS_OBJECT_DTYPE = "object"
56
+ PANDAS_STRING_DTYPE = "string[python]"
56
57
  PANDAS_TIMEDELTA_DTYPE = "timedelta64[ns]"
57
58
 
58
59
  NUMERIC_TYPE_COLLECTION = [
@@ -142,6 +143,8 @@ BACKSLASH_TOKEN = "\\"
142
143
  SLASH_TOKEN = "/"
143
144
  PYSPARK_NONE_SIZE_VALUE = -1
144
145
  PANDAS_LONG_TYPE = "Int64"
146
+ PANDAS_STRING_TYPE = "string"
147
+ PANDAS_FLOAT_TYPE = "float64"
145
148
 
146
149
  # ENVIRONMENT VARIABLES
147
150
  SNOWFLAKE_CHECKPOINT_CONTRACT_FILE_PATH_ENV_VAR = (
@@ -22,6 +22,8 @@ from pathlib import Path
22
22
  from typing import Callable, Optional
23
23
 
24
24
  from snowflake.snowpark import Session
25
+ from snowflake.snowpark.functions import col, expr
26
+ from snowflake.snowpark.types import TimestampType
25
27
  from snowflake.snowpark_checkpoints_collector.collection_common import (
26
28
  DOT_PARQUET_EXTENSION,
27
29
  )
@@ -195,9 +197,28 @@ class SnowConnection:
195
197
  stage_directory_path,
196
198
  )
197
199
  dataframe = self.session.read.parquet(path=stage_directory_path)
200
+ dataframe = convert_timestamps_to_utc_date(dataframe)
198
201
  LOGGER.info("Creating table '%s' from parquet files", table_name)
199
202
  dataframe.write.save_as_table(table_name=table_name, mode="overwrite")
200
203
 
201
204
  def _create_snowpark_session(self) -> Session:
202
205
  LOGGER.info("Creating a Snowpark session using the default connection")
203
206
  return Session.builder.getOrCreate()
207
+
208
+
209
+ def convert_timestamps_to_utc_date(df):
210
+ """Convert all timestamp columns to UTC normalized timestamps.
211
+
212
+ Reading a parquet written by spark from a snowpark session modifies the original timestamps,
213
+ so this function normalizes timestamps for comparison.
214
+ """
215
+ new_cols = []
216
+ for field in df.schema.fields:
217
+ if isinstance(field.datatype, TimestampType):
218
+ utc_normalized_ts = expr(
219
+ f"convert_timezone('UTC', cast(to_date({field.name}) as timestamp_tz))"
220
+ ).alias(field.name)
221
+ new_cols.append(utc_normalized_ts)
222
+ else:
223
+ new_cols.append(col(field.name))
224
+ return df.select(new_cols)
@@ -23,9 +23,15 @@ import pandera as pa
23
23
 
24
24
  from pyspark.sql import DataFrame as SparkDataFrame
25
25
  from pyspark.sql.functions import col
26
- from pyspark.sql.types import BooleanType, FloatType, IntegerType, StructField
26
+ from pyspark.sql.types import BinaryType as SparkBinaryType
27
+ from pyspark.sql.types import BooleanType as SparkBooleanType
28
+ from pyspark.sql.types import DateType as SparkDateType
27
29
  from pyspark.sql.types import DoubleType as SparkDoubleType
30
+ from pyspark.sql.types import FloatType as SparkFloatType
31
+ from pyspark.sql.types import IntegerType as SparkIntegerType
28
32
  from pyspark.sql.types import StringType as SparkStringType
33
+ from pyspark.sql.types import StructField as SparkStructField
34
+ from pyspark.sql.types import TimestampType as SparkTimestampType
29
35
 
30
36
  from snowflake.snowpark_checkpoints_collector.collection_common import (
31
37
  CHECKPOINT_JSON_OUTPUT_FILE_NAME_FORMAT,
@@ -36,8 +42,10 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
36
42
  DOT_PARQUET_EXTENSION,
37
43
  INTEGER_TYPE_COLLECTION,
38
44
  NULL_COLUMN_TYPE,
45
+ PANDAS_FLOAT_TYPE,
39
46
  PANDAS_LONG_TYPE,
40
47
  PANDAS_OBJECT_TYPE_COLLECTION,
48
+ PANDAS_STRING_TYPE,
41
49
  CheckpointMode,
42
50
  )
43
51
  from snowflake.snowpark_checkpoints_collector.collection_result.model import (
@@ -73,11 +81,13 @@ from snowflake.snowpark_checkpoints_collector.utils.telemetry import report_tele
73
81
  LOGGER = logging.getLogger(__name__)
74
82
 
75
83
  default_null_types = {
76
- IntegerType(): 0,
77
- FloatType(): 0.0,
84
+ SparkIntegerType(): 0,
85
+ SparkFloatType(): 0.0,
78
86
  SparkDoubleType(): 0.0,
79
87
  SparkStringType(): "",
80
- BooleanType(): False,
88
+ SparkBooleanType(): False,
89
+ SparkTimestampType(): None,
90
+ SparkDateType(): None,
81
91
  }
82
92
 
83
93
 
@@ -345,7 +355,7 @@ def normalize_missing_values(df: SparkDataFrame) -> SparkDataFrame:
345
355
  return df
346
356
 
347
357
 
348
- def _get_spark_column_types(df: SparkDataFrame) -> dict[str, StructField]:
358
+ def _get_spark_column_types(df: SparkDataFrame) -> dict[str, SparkStructField]:
349
359
  schema = df.schema
350
360
  column_type_collection = {}
351
361
  for field in schema.fields:
@@ -475,14 +485,83 @@ def _to_pandas(sampled_df: SparkDataFrame) -> pandas.DataFrame:
475
485
  LOGGER.debug("Converting Spark DataFrame to Pandas DataFrame")
476
486
  pandas_df = sampled_df.toPandas()
477
487
  for field in sampled_df.schema.fields:
478
- has_nan = pandas_df[field.name].isna().any()
479
488
  is_integer = field.dataType.typeName() in INTEGER_TYPE_COLLECTION
480
- if has_nan and is_integer:
489
+ is_spark_string = isinstance(field.dataType, SparkStringType)
490
+ is_spark_binary = isinstance(field.dataType, SparkBinaryType)
491
+ is_spark_timestamp = isinstance(field.dataType, SparkTimestampType)
492
+ is_spark_float = isinstance(field.dataType, SparkFloatType)
493
+ is_spark_boolean = isinstance(field.dataType, SparkBooleanType)
494
+ is_spark_date = isinstance(field.dataType, SparkDateType)
495
+ if is_integer:
481
496
  LOGGER.debug(
482
- "Converting column '%s' to '%s' type",
497
+ "Converting Spark integer column '%s' to Pandas nullable '%s' type",
483
498
  field.name,
484
499
  PANDAS_LONG_TYPE,
485
500
  )
486
- pandas_df[field.name] = pandas_df[field.name].astype(PANDAS_LONG_TYPE)
501
+ pandas_df[field.name] = (
502
+ pandas_df[field.name].astype(PANDAS_LONG_TYPE).fillna(0)
503
+ )
504
+ elif is_spark_string or is_spark_binary:
505
+ LOGGER.debug(
506
+ "Converting Spark string column '%s' to Pandas nullable '%s' type",
507
+ field.name,
508
+ PANDAS_STRING_TYPE,
509
+ )
510
+ pandas_df[field.name] = (
511
+ pandas_df[field.name].astype(PANDAS_STRING_TYPE).fillna("")
512
+ )
513
+ elif is_spark_timestamp:
514
+ LOGGER.debug(
515
+ "Converting Spark timestamp column '%s' to UTC naive Pandas datetime",
516
+ field.name,
517
+ )
518
+ pandas_df[field.name] = convert_all_to_utc_naive(
519
+ pandas_df[field.name]
520
+ ).fillna(pandas.NaT)
521
+ elif is_spark_float:
522
+ LOGGER.debug(
523
+ "Converting Spark float column '%s' to Pandas nullable float",
524
+ field.name,
525
+ )
526
+ pandas_df[field.name] = (
527
+ pandas_df[field.name].astype(PANDAS_FLOAT_TYPE).fillna(0.0)
528
+ )
529
+ elif is_spark_boolean:
530
+ LOGGER.debug(
531
+ "Converting Spark boolean column '%s' to Pandas nullable boolean",
532
+ field.name,
533
+ )
534
+ pandas_df[field.name] = (
535
+ pandas_df[field.name].astype("boolean").fillna(False)
536
+ )
537
+ elif is_spark_date:
538
+ LOGGER.debug(
539
+ "Converting Spark date column '%s' to Pandas nullable datetime",
540
+ field.name,
541
+ )
542
+ pandas_df[field.name] = pandas_df[field.name].fillna(pandas.NaT)
487
543
 
488
544
  return pandas_df
545
+
546
+
547
+ def convert_all_to_utc_naive(series: pandas.Series) -> pandas.Series:
548
+ """Convert all timezone-aware or naive timestamps in a series to UTC naive.
549
+
550
+ Naive timestamps are assumed to be in UTC and localized accordingly.
551
+ Timezone-aware timestamps are converted to UTC and then made naive.
552
+
553
+ Args:
554
+ series (pandas.Series): A Pandas Series of `pd.Timestamp` objects,
555
+ either naive or timezone-aware.
556
+
557
+ Returns:
558
+ pandas.Series: A Series of UTC-normalized naive timestamps (`tzinfo=None`).
559
+
560
+ """
561
+
562
+ def convert(ts):
563
+ if ts.tz is None:
564
+ ts = ts.tz_localize("UTC")
565
+ return ts.tz_convert("UTC").tz_localize(None)
566
+
567
+ return series.apply(convert)
@@ -1,4 +1,5 @@
1
1
  [run]
2
+ source = src/
2
3
  omit =
3
4
  */__init__.py
4
5
  **/__init__.py
@@ -8,6 +9,10 @@ omit =
8
9
  */test/*
9
10
  */.venv/*
10
11
  */.env/*
12
+ */.hatch/*
13
+ **/.hatch/*
14
+ */site-packages/*
15
+ **/site-packages/*
11
16
  **/telemetry.py
12
17
 
13
18
  [report]
@@ -66,7 +66,7 @@ from snowflake.snowpark_checkpoints_collector.collection_common import (
66
66
  PANDAS_DATETIME_DTYPE,
67
67
  PANDAS_FLOAT_DTYPE,
68
68
  PANDAS_INTEGER_DTYPE,
69
- PANDAS_OBJECT_DTYPE,
69
+ PANDAS_STRING_DTYPE,
70
70
  PANDAS_TIMEDELTA_DTYPE,
71
71
  PANDERA_COLUMN_TYPE_KEY,
72
72
  SNOWPARK_CHECKPOINTS_OUTPUT_DIRECTORY_NAME,
@@ -428,7 +428,7 @@ def test_collect_sampled_dataframe(spark_session, output_path):
428
428
  schema_contract_output_json = json.loads(schema_contract_output)
429
429
 
430
430
  pandera_column_type_collection_expected = [
431
- PANDAS_OBJECT_DTYPE,
431
+ PANDAS_STRING_DTYPE,
432
432
  PANDAS_INTEGER_DTYPE,
433
433
  PANDAS_FLOAT_DTYPE,
434
434
  PANDAS_DATETIME_DTYPE,
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "date", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": true, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 1, "false_count": 2}, {"name": "b", "type": "byte", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 1, "max": 1, "mean": 1.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 2.1, "mean": 1.4000000000000001, "decimal_precision": 1, "margin_error": 1.2124355652982142}, {"name": "f", "type": "float", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 3.109999895095825, "mean": 2.073333263397217, "decimal_precision": 15, "margin_error": 1.7955592766132826}, {"name": "g", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0, "max": 4, "mean": 2.6666666666666665, "decimal_precision": 0, "margin_error": 2.309401076758503}, {"name": "h", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 5, "max": 5, "mean": 5.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 6, "max": 6, "mean": 6.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 0, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "value_type": "string", "allow_null": true, "null_value_proportion": 10.0, "max_size": 5, "min_size": 0, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "q", "type": "binary", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "max_size": 6, "min_size": 0, "mean_size": 2.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 0.0, "max_size": 5, "min_size": 0, "mean_size": 3, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 1, "rows_null_count": 2}]}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 2, "max_value": 2}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 2, "max_length": 2}, {"name": "Value", "type": "decimal", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": "1.123456789", "max": "5.123450000", "mean": "3.1234552538000", "decimal_precision": 9}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 6}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 51, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "active": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 0, "max_length": 6}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": 0, "max": 51, "mean": 21.8, "decimal_precision": 0, "margin_error": 22.241852440837743}, {"name": "active", "type": "boolean", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "true_count": 2, "false_count": 3}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Description": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 0}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Price": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 0.0, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Description", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 0, "max_length": 0}, {"name": "Price", "type": "double", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 0.0, "max": 0.0, "mean": 0.0, "decimal_precision": 1, "margin_error": null}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "true_count": 0, "false_count": 1}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Code": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": null, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Code", "type": "long", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 4, "max_value": 4}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 50, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 4, "max_length": 4}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 21, "max": 50, "mean": 35.0, "decimal_precision": 0, "margin_error": 14.52583904633395}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "boolean", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "date", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": false, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 2.1, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 3.109999895095825, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 4, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 2, "false_count": 1}, {"name": "b", "type": "byte", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 1, "max": 1, "mean": 1.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 2.1, "max": 2.1, "mean": 2.1, "decimal_precision": 1, "margin_error": 0.0}, {"name": "f", "type": "float", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 3.109999895095825, "max": 3.109999895095825, "mean": 3.109999895095825, "decimal_precision": 15, "margin_error": 0.0}, {"name": "g", "type": "integer", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 4, "max": 4, "mean": 4.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "h", "type": "long", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 5, "max": 5, "mean": 5.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 6, "max": 6, "mean": 6.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "value_type": "string", "allow_null": true, "null_value_proportion": 13.333333333333334, "max_size": 5, "min_size": 5, "mean_size": 5, "is_unique_size": true}, {"name": "q", "type": "binary", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "max_size": 6, "min_size": 2, "mean_size": 3.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 20.0, "max_size": 5, "min_size": 1, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}]}]}}
@@ -0,0 +1 @@
1
+ {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "string[python]", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "Int64", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 21, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 21, "max": 21, "mean": 21.0, "decimal_precision": 0, "margin_error": null}]}}
@@ -56,13 +56,17 @@ def test_create_snowflake_table_from_parquet(input_path):
56
56
  with mock.patch(
57
57
  "snowflake.snowpark_checkpoints_collector.io_utils.io_default_strategy.IODefaultStrategy.read_bytes"
58
58
  ) as read_bytes_mock:
59
- read_bytes_mock.return_value = b"test"
60
- isfile_mock.return_value = True
61
- glob_mock.return_value = [parquet_file_path]
62
- snow_connection = SnowConnection(mocked_session)
63
- snow_connection.create_snowflake_table_from_local_parquet(
64
- checkpoint_name, input_path, stage_path=checkpoint_name
65
- )
59
+ with mock.patch(
60
+ "snowflake.snowpark_checkpoints_collector.snow_connection_model.snow_connection.convert_timestamps_to_utc_date"
61
+ ) as mock_convert:
62
+ mock_convert.return_value = mock_df
63
+ read_bytes_mock.return_value = b"test"
64
+ isfile_mock.return_value = True
65
+ glob_mock.return_value = [parquet_file_path]
66
+ snow_connection = SnowConnection(mocked_session)
67
+ snow_connection.create_snowflake_table_from_local_parquet(
68
+ checkpoint_name, input_path, stage_path=checkpoint_name
69
+ )
66
70
 
67
71
  stage_name = stage_name.format(snow_connection.stage_id)
68
72
 
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "Int64", "nullable": true, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": true, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float32", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "Int64", "nullable": true, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "Int64", "nullable": true, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": true, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 1, "false_count": 2}, {"name": "b", "type": "byte", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 1, "max": 1, "mean": 1.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 2.1, "mean": 1.4000000000000001, "decimal_precision": 1, "margin_error": 1.2124355652982142}, {"name": "f", "type": "float", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0.0, "max": 3.109999895095825, "mean": 2.073333263397217, "decimal_precision": 15, "margin_error": 1.7955592766132826}, {"name": "g", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 0, "max": 4, "mean": 2.6666666666666665, "decimal_precision": 0, "margin_error": 2.309401076758503}, {"name": "h", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 5, "max": 5, "mean": 5.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": 6, "max": 6, "mean": 6.0, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 0, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "value_type": "string", "allow_null": true, "null_value_proportion": 10.0, "max_size": 5, "min_size": 0, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "q", "type": "binary", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "max_size": 6, "min_size": 0, "mean_size": 2.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 0.0, "max_size": 5, "min_size": 0, "mean_size": 3, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": true, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 2, "rows_null_count": 1}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 1, "rows_null_count": 2}]}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 2, "max_value": 2}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 2, "max_length": 2}, {"name": "Value", "type": "decimal", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": "1.123456789", "max": "5.123450000", "mean": "3.1234552538000", "decimal_precision": 9}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 6}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 0, "max_value": 51, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "active": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 4.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min_length": 0, "max_length": 6}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "min": 0, "max": 51, "mean": 21.8, "decimal_precision": 0, "margin_error": 22.241852440837743}, {"name": "active", "type": "boolean", "nullable": true, "rows_count": 5, "rows_not_null_count": 5, "rows_null_count": 0, "true_count": 2, "false_count": 3}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Description": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 0, "max_value": 0}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Price": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 0.0, "max_value": 0.0, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Description", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 0, "max_length": 0}, {"name": "Price", "type": "double", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 0.0, "max": 0.0, "mean": 0.0, "decimal_precision": 1, "margin_error": null}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "true_count": 0, "false_count": 1}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"Code": {"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "Active": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": null, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "Code", "type": "long", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}, {"name": "Active", "type": "boolean", "nullable": true, "rows_count": 0, "rows_not_null_count": 0, "rows_null_count": 0}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 4, "max_value": 4}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 50, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 4, "max_length": 4}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 21, "max": 50, "mean": 35, "decimal_precision": 0, "margin_error": 14.52583904633395}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"a": {"title": null, "description": null, "dtype": "bool", "nullable": false, "checks": {"isin": [true, false]}, "unique": false, "coerce": false, "required": true, "regex": false}, "b": {"title": null, "description": null, "dtype": "int8", "nullable": false, "checks": {"in_range": {"min_value": 1, "max_value": 1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "c": {"title": null, "description": null, "dtype": "date", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "d": {"title": null, "description": null, "dtype": "timedelta64[ns]", "nullable": false, "checks": {"in_range": {"min_value": 1123200000000000, "max_value": 1123200000000000, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "e": {"title": null, "description": null, "dtype": "float64", "nullable": false, "checks": {"in_range": {"min_value": 2.1, "max_value": 2.1, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "f": {"title": null, "description": null, "dtype": "float32", "nullable": false, "checks": {"in_range": {"min_value": 3.109999895095825, "max_value": 3.109999895095825, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "g": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 4, "max_value": 4, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "h": {"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"in_range": {"min_value": 5, "max_value": 5, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "i": {"title": null, "description": null, "dtype": "int16", "nullable": false, "checks": {"in_range": {"min_value": 6, "max_value": 6, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "j": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "m": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:53:00", "max_value": "2000-01-01 12:53:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "n": {"title": null, "description": null, "dtype": "datetime64[ns]", "nullable": false, "checks": {"in_range": {"min_value": "2000-01-01 12:00:00", "max_value": "2000-01-01 12:00:00", "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}, "p": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "q": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "r": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "s": {"title": null, "description": null, "dtype": "object", "nullable": true, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}, "t": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": null, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 2.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "a", "type": "boolean", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "true_count": 2, "false_count": 1}, {"name": "b", "type": "byte", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 1, "max": 1, "mean": 1, "decimal_precision": 0, "margin_error": 0.0}, {"name": "c", "type": "date", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01", "max": "2000-01-01", "format": "%Y-%m-%d"}, {"name": "d", "type": "daytimeinterval", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "13 days, 0:00:00", "max": "13 days, 0:00:00"}, {"name": "e", "type": "double", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 2.1, "max": 2.1, "mean": 2.1, "decimal_precision": 1, "margin_error": 0.0}, {"name": "f", "type": "float", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 3.109999895095825, "max": 3.109999895095825, "mean": 3.109999895095825, "decimal_precision": 15, "margin_error": 0.0}, {"name": "g", "type": "integer", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 4, "max": 4, "mean": 4, "decimal_precision": 0, "margin_error": 0.0}, {"name": "h", "type": "long", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 5, "max": 5, "mean": 5, "decimal_precision": 0, "margin_error": 0.0}, {"name": "i", "type": "short", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": 6, "max": 6, "mean": 6, "decimal_precision": 0, "margin_error": 0.0}, {"name": "j", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "m", "type": "timestamp", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:53:00", "max": "2000-01-01 12:53:00", "format": "%Y-%m-%dT%H:%M:%S%z"}, {"name": "n", "type": "timestamp_ntz", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "2000-01-01 12:00:00", "max": "2000-01-01 12:00:00", "format": "%Y-%m-%dH:%M:%S"}, {"name": "o", "type": "decimal", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "min": "3.1415161718190", "max": "3.1415161718190", "mean": "3.14151617181900000", "decimal_precision": 13}, {"name": "p", "type": "array", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "value_type": "string", "allow_null": true, "null_value_proportion": 13.333333333333334, "max_size": 5, "min_size": 5, "mean_size": 5, "is_unique_size": true}, {"name": "q", "type": "binary", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "max_size": 6, "min_size": 2, "mean_size": 3.6666666666666665, "is_unique_size": false}, {"name": "r", "type": "map", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "key_type": "string", "value_type": "string", "allow_null": true, "null_value_proportion": 20.0, "max_size": 5, "min_size": 1, "mean_size": 3.3333333333333335, "is_unique_size": false}, {"name": "s", "type": "void", "nullable": true, "rows_count": 3, "rows_not_null_count": 0, "rows_null_count": 3}, {"name": "t", "type": "struct", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0, "metadata": [{"name": "inner1", "type": "string", "nullable": false, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}, {"name": "inner2", "type": "long", "nullable": true, "rows_count": 3, "rows_not_null_count": 3, "rows_null_count": 0}]}]}}
@@ -1 +0,0 @@
1
- {"pandera_schema": {"schema_type": "dataframe", "version": "0.20.4", "columns": {"name": {"title": null, "description": null, "dtype": "object", "nullable": false, "checks": {"str_length": {"min_value": 7, "max_value": 7}}, "unique": false, "coerce": false, "required": true, "regex": false}, "age": {"title": null, "description": null, "dtype": "int32", "nullable": false, "checks": {"in_range": {"min_value": 21, "max_value": 21, "include_min": true, "include_max": true}}, "unique": false, "coerce": false, "required": true, "regex": false}}, "checks": null, "index": [{"title": null, "description": null, "dtype": "int64", "nullable": false, "checks": {"greater_than_or_equal_to": 0.0, "less_than_or_equal_to": 0.0}, "name": null, "unique": false, "coerce": false}], "dtype": null, "coerce": true, "strict": false, "name": null, "ordered": false, "unique": null, "report_duplicates": "all", "unique_column_names": false, "add_missing_columns": false, "title": null, "description": null}, "custom_data": {"columns": [{"name": "name", "type": "string", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min_length": 7, "max_length": 7}, {"name": "age", "type": "integer", "nullable": true, "rows_count": 1, "rows_not_null_count": 1, "rows_null_count": 0, "min": 21, "max": 21, "mean": 21.0, "decimal_precision": 0, "margin_error": null}]}}