structurize 3.0.1__tar.gz → 3.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {structurize-3.0.1/structurize.egg-info → structurize-3.1.0}/PKG-INFO +1 -1
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/_version.py +3 -3
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotize.py +4 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotoiceberg.py +111 -13
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotots.py +62 -7
- structurize-3.1.0/avrotize/avrovalidator.py +518 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/commands.json +485 -2
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/dependencies/typescript/node22/package.json +1 -1
- structurize-3.1.0/avrotize/jsontoschema.py +151 -0
- structurize-3.1.0/avrotize/schema_inference.py +825 -0
- structurize-3.1.0/avrotize/sqltoavro.py +1159 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretodb.py +1 -1
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretoiceberg.py +113 -13
- structurize-3.1.0/avrotize/validate.py +242 -0
- structurize-3.1.0/avrotize/xmltoschema.py +122 -0
- {structurize-3.0.1 → structurize-3.1.0/structurize.egg-info}/PKG-INFO +1 -1
- {structurize-3.0.1 → structurize-3.1.0}/structurize.egg-info/SOURCES.txt +12 -0
- {structurize-3.0.1 → structurize-3.1.0}/.gitignore +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/LICENSE +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/MANIFEST.in +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/README.md +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/__init__.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/__main__.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/asn1toavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotocpp.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotocsharp.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotocsv.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotodatapackage.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotodb.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotogo.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotographql.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotojava.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotojs.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotojsons.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotojstruct.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotokusto.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotomd.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotools.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotoparquet.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotoproto.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotopython.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotorust.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/avrotoxsd.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/cddltostructure.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/common.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/constants.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/csvtoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/datapackagetoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/dependency_resolver.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/dependency_version.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/jsonstoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/jsonstostructure.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/jstructtoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/kstructtoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/kustotoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/openapitostructure.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/parquettoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/proto2parser.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/proto3parser.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/prototoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretocddl.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretocpp.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretocsharp.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretocsv.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretodatapackage.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretogo.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretographql.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretojava.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretojs.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretojsons.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretokusto.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretomd.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretoproto.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretopython.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretorust.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretots.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/structuretoxsd.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/avrotize/xsdtoavro.py +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/build.ps1 +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/build.sh +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/pyproject.toml +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/setup.cfg +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/structurize.egg-info/dependency_links.txt +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/structurize.egg-info/entry_points.txt +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/structurize.egg-info/requires.txt +0 -0
- {structurize-3.0.1 → structurize-3.1.0}/structurize.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: structurize
|
|
3
|
-
Version: 3.0
|
|
3
|
+
Version: 3.1.0
|
|
4
4
|
Summary: Tools to convert from and to JSON Structure from various other schema languages.
|
|
5
5
|
Author-email: Clemens Vasters <clemensv@microsoft.com>
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '3.0
|
|
32
|
-
__version_tuple__ = version_tuple = (3,
|
|
31
|
+
__version__ = version = '3.1.0'
|
|
32
|
+
__version_tuple__ = version_tuple = (3, 1, 0)
|
|
33
33
|
|
|
34
|
-
__commit_id__ = commit_id = '
|
|
34
|
+
__commit_id__ = commit_id = 'g67cffc312'
|
|
@@ -38,6 +38,10 @@ def create_subparsers(subparsers, commands):
|
|
|
38
38
|
kwargs['choices'] = arg['choices']
|
|
39
39
|
if 'default' in arg:
|
|
40
40
|
kwargs['default'] = arg['default']
|
|
41
|
+
# Handle dest for optional arguments only (positional args can't have dest)
|
|
42
|
+
arg_is_positional = not arg['name'].startswith('-')
|
|
43
|
+
if 'dest' in arg and not arg_is_positional:
|
|
44
|
+
kwargs['dest'] = arg['dest']
|
|
41
45
|
if arg['type'] == 'bool':
|
|
42
46
|
kwargs['action'] = 'store_true'
|
|
43
47
|
del kwargs['type']
|
|
@@ -5,6 +5,7 @@ import sys
|
|
|
5
5
|
from typing import Dict, List
|
|
6
6
|
import pyarrow as pa
|
|
7
7
|
from pyiceberg.schema import Schema, NestedField
|
|
8
|
+
from pyiceberg.io.pyarrow import PyArrowFileIO, schema_to_pyarrow
|
|
8
9
|
from pyiceberg.types import (
|
|
9
10
|
BooleanType,
|
|
10
11
|
IntegerType,
|
|
@@ -21,11 +22,74 @@ from pyiceberg.types import (
|
|
|
21
22
|
MapType,
|
|
22
23
|
StructType
|
|
23
24
|
)
|
|
24
|
-
from pyiceberg.io.pyarrow import PyArrowFileIO, schema_to_pyarrow
|
|
25
25
|
|
|
26
26
|
JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
|
|
27
27
|
|
|
28
28
|
|
|
29
|
+
def iceberg_type_to_json(iceberg_type) -> str | Dict:
|
|
30
|
+
"""
|
|
31
|
+
Serialize an Iceberg type to JSON per Iceberg Table Spec Appendix C.
|
|
32
|
+
|
|
33
|
+
Primitive types are serialized as strings. Complex types (struct, list, map)
|
|
34
|
+
are serialized as JSON objects with their nested structure.
|
|
35
|
+
"""
|
|
36
|
+
# Primitive types map to simple strings
|
|
37
|
+
if isinstance(iceberg_type, BooleanType):
|
|
38
|
+
return "boolean"
|
|
39
|
+
elif isinstance(iceberg_type, IntegerType):
|
|
40
|
+
return "int"
|
|
41
|
+
elif isinstance(iceberg_type, LongType):
|
|
42
|
+
return "long"
|
|
43
|
+
elif isinstance(iceberg_type, FloatType):
|
|
44
|
+
return "float"
|
|
45
|
+
elif isinstance(iceberg_type, DoubleType):
|
|
46
|
+
return "double"
|
|
47
|
+
elif isinstance(iceberg_type, StringType):
|
|
48
|
+
return "string"
|
|
49
|
+
elif isinstance(iceberg_type, BinaryType):
|
|
50
|
+
return "binary"
|
|
51
|
+
elif isinstance(iceberg_type, DateType):
|
|
52
|
+
return "date"
|
|
53
|
+
elif isinstance(iceberg_type, TimestampType):
|
|
54
|
+
return "timestamp"
|
|
55
|
+
elif isinstance(iceberg_type, DecimalType):
|
|
56
|
+
return f"decimal({iceberg_type.precision},{iceberg_type.scale})"
|
|
57
|
+
elif isinstance(iceberg_type, FixedType):
|
|
58
|
+
return f"fixed[{iceberg_type.length}]"
|
|
59
|
+
elif isinstance(iceberg_type, ListType):
|
|
60
|
+
return {
|
|
61
|
+
"type": "list",
|
|
62
|
+
"element-id": iceberg_type.element_id,
|
|
63
|
+
"element-required": iceberg_type.element_required,
|
|
64
|
+
"element": iceberg_type_to_json(iceberg_type.element_type)
|
|
65
|
+
}
|
|
66
|
+
elif isinstance(iceberg_type, MapType):
|
|
67
|
+
return {
|
|
68
|
+
"type": "map",
|
|
69
|
+
"key-id": iceberg_type.key_id,
|
|
70
|
+
"key": iceberg_type_to_json(iceberg_type.key_type),
|
|
71
|
+
"value-id": iceberg_type.value_id,
|
|
72
|
+
"value-required": iceberg_type.value_required,
|
|
73
|
+
"value": iceberg_type_to_json(iceberg_type.value_type)
|
|
74
|
+
}
|
|
75
|
+
elif isinstance(iceberg_type, StructType):
|
|
76
|
+
return {
|
|
77
|
+
"type": "struct",
|
|
78
|
+
"fields": [
|
|
79
|
+
{
|
|
80
|
+
"id": field.field_id,
|
|
81
|
+
"name": field.name,
|
|
82
|
+
"required": field.required,
|
|
83
|
+
"type": iceberg_type_to_json(field.field_type)
|
|
84
|
+
}
|
|
85
|
+
for field in iceberg_type.fields
|
|
86
|
+
]
|
|
87
|
+
}
|
|
88
|
+
else:
|
|
89
|
+
# Fallback for unknown types
|
|
90
|
+
return str(iceberg_type)
|
|
91
|
+
|
|
92
|
+
|
|
29
93
|
class AvroToIcebergConverter:
|
|
30
94
|
"""Class to convert Avro schema to Iceberg schema."""
|
|
31
95
|
|
|
@@ -42,8 +106,16 @@ class AvroToIcebergConverter:
|
|
|
42
106
|
"""Get the full name of a record type."""
|
|
43
107
|
return f"{namespace}.{name}" if namespace else name
|
|
44
108
|
|
|
45
|
-
def convert_avro_to_iceberg(self, avro_schema_path: str, avro_record_type: str, output_path: str, emit_cloudevents_columns: bool=False):
|
|
46
|
-
"""Convert an Avro schema to an Iceberg schema.
|
|
109
|
+
def convert_avro_to_iceberg(self, avro_schema_path: str, avro_record_type: str, output_path: str, emit_cloudevents_columns: bool=False, output_format: str="arrow"):
|
|
110
|
+
"""Convert an Avro schema to an Iceberg schema.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
avro_schema_path: Path to the Avro schema file
|
|
114
|
+
avro_record_type: Record type to convert (or None for the root)
|
|
115
|
+
output_path: Path to write the Iceberg schema
|
|
116
|
+
emit_cloudevents_columns: Whether to add CloudEvents columns
|
|
117
|
+
output_format: Output format - 'arrow' for binary Arrow IPC (default), 'schema' for JSON
|
|
118
|
+
"""
|
|
47
119
|
schema_file = avro_schema_path
|
|
48
120
|
if not schema_file:
|
|
49
121
|
print("Please specify the avro schema file")
|
|
@@ -96,14 +168,32 @@ class AvroToIcebergConverter:
|
|
|
96
168
|
])
|
|
97
169
|
|
|
98
170
|
iceberg_schema = Schema(*iceberg_fields)
|
|
99
|
-
|
|
100
|
-
print(f"Iceberg schema created: {arrow_schema}")
|
|
171
|
+
print(f"Iceberg schema created: {iceberg_schema}")
|
|
101
172
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
173
|
+
if output_format == "arrow":
|
|
174
|
+
# Write as binary PyArrow schema
|
|
175
|
+
arrow_schema = schema_to_pyarrow(iceberg_schema)
|
|
176
|
+
file_io = PyArrowFileIO()
|
|
177
|
+
output_file = file_io.new_output("file://" + output_path)
|
|
178
|
+
with output_file.create(overwrite=True) as f:
|
|
179
|
+
pa.output_stream(f).write(arrow_schema.serialize().to_pybytes())
|
|
180
|
+
else:
|
|
181
|
+
# Write Iceberg schema as spec-compliant JSON (per Iceberg Table Spec Appendix C)
|
|
182
|
+
schema_json = {
|
|
183
|
+
"type": "struct",
|
|
184
|
+
"schema-id": 0,
|
|
185
|
+
"fields": [
|
|
186
|
+
{
|
|
187
|
+
"id": field.field_id,
|
|
188
|
+
"name": field.name,
|
|
189
|
+
"required": field.required,
|
|
190
|
+
"type": iceberg_type_to_json(field.field_type)
|
|
191
|
+
}
|
|
192
|
+
for field in iceberg_schema.fields
|
|
193
|
+
]
|
|
194
|
+
}
|
|
195
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
196
|
+
json.dump(schema_json, f, indent=2)
|
|
107
197
|
|
|
108
198
|
def convert_avro_type_to_iceberg_type(self, avro_type):
|
|
109
199
|
"""Convert an Avro type to an Iceberg type."""
|
|
@@ -203,8 +293,16 @@ class AvroToIcebergConverter:
|
|
|
203
293
|
return StringType()
|
|
204
294
|
|
|
205
295
|
|
|
206
|
-
def convert_avro_to_iceberg(avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns=False):
|
|
207
|
-
"""Convert an Avro schema to an Iceberg schema.
|
|
296
|
+
def convert_avro_to_iceberg(avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns=False, output_format="arrow"):
|
|
297
|
+
"""Convert an Avro schema to an Iceberg schema.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
avro_schema_path: Path to the Avro schema file
|
|
301
|
+
avro_record_type: Record type to convert (or None for the root)
|
|
302
|
+
output_path: Path to write the Iceberg schema
|
|
303
|
+
emit_cloudevents_columns: Whether to add CloudEvents columns
|
|
304
|
+
output_format: Output format - 'arrow' for binary Arrow IPC (default), 'schema' for JSON
|
|
305
|
+
"""
|
|
208
306
|
converter = AvroToIcebergConverter()
|
|
209
307
|
converter.convert_avro_to_iceberg(
|
|
210
|
-
avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns)
|
|
308
|
+
avro_schema_path, avro_record_type, output_path, emit_cloudevents_columns, output_format)
|
|
@@ -500,10 +500,11 @@ class AvroToTypeScript:
|
|
|
500
500
|
"""Generate TypeScript type declaration file for avro-js module."""
|
|
501
501
|
avro_js_types = '''declare module 'avro-js' {
|
|
502
502
|
/**
|
|
503
|
-
* Avro Type
|
|
503
|
+
* Avro Type interface.
|
|
504
|
+
* Represents the structure of Type instances returned by avro.parse().
|
|
504
505
|
* Provides methods for encoding, decoding, and validating Avro data.
|
|
505
506
|
*/
|
|
506
|
-
export
|
|
507
|
+
export interface Type {
|
|
507
508
|
/**
|
|
508
509
|
* Encode a value to a Buffer.
|
|
509
510
|
* @param obj - Value to encode
|
|
@@ -575,12 +576,66 @@ class AvroToTypeScript:
|
|
|
575
576
|
}
|
|
576
577
|
|
|
577
578
|
/**
|
|
578
|
-
*
|
|
579
|
-
*
|
|
580
|
-
* @param options - Parse options
|
|
581
|
-
* @returns Type instance
|
|
579
|
+
* avro-js default export interface.
|
|
580
|
+
* This module is CommonJS, so in ESM context it only has a default export.
|
|
582
581
|
*/
|
|
583
|
-
export
|
|
582
|
+
export interface Avro {
|
|
583
|
+
/**
|
|
584
|
+
* Type class constructor.
|
|
585
|
+
*/
|
|
586
|
+
Type: any;
|
|
587
|
+
|
|
588
|
+
/**
|
|
589
|
+
* Parse an Avro schema and return a Type instance.
|
|
590
|
+
* @param schema - Schema as string or object
|
|
591
|
+
* @param options - Parse options
|
|
592
|
+
* @returns Type instance
|
|
593
|
+
*/
|
|
594
|
+
parse(schema: string | any, options?: any): Type;
|
|
595
|
+
|
|
596
|
+
/**
|
|
597
|
+
* Protocol class constructor.
|
|
598
|
+
*/
|
|
599
|
+
Protocol: any;
|
|
600
|
+
|
|
601
|
+
/**
|
|
602
|
+
* Create a file decoder.
|
|
603
|
+
*/
|
|
604
|
+
createFileDecoder(path: string, options?: any): any;
|
|
605
|
+
|
|
606
|
+
/**
|
|
607
|
+
* Create a file encoder.
|
|
608
|
+
*/
|
|
609
|
+
createFileEncoder(path: string, schema: any, options?: any): any;
|
|
610
|
+
|
|
611
|
+
/**
|
|
612
|
+
* Extract file header.
|
|
613
|
+
*/
|
|
614
|
+
extractFileHeader(buffer: Buffer): any;
|
|
615
|
+
|
|
616
|
+
/**
|
|
617
|
+
* Streams utilities.
|
|
618
|
+
*/
|
|
619
|
+
streams: any;
|
|
620
|
+
|
|
621
|
+
/**
|
|
622
|
+
* Built-in types.
|
|
623
|
+
*/
|
|
624
|
+
types: any;
|
|
625
|
+
|
|
626
|
+
/**
|
|
627
|
+
* Validator (deprecated).
|
|
628
|
+
*/
|
|
629
|
+
Validator: any;
|
|
630
|
+
|
|
631
|
+
/**
|
|
632
|
+
* ProtocolValidator (deprecated).
|
|
633
|
+
*/
|
|
634
|
+
ProtocolValidator: any;
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
const avro: Avro;
|
|
638
|
+
export default avro;
|
|
584
639
|
}
|
|
585
640
|
'''
|
|
586
641
|
|