structurize 2.16.2__py3-none-any.whl → 2.16.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. structurize-2.16.6.dist-info/METADATA +107 -0
  48. structurize-2.16.6.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/METADATA +0 -805
  51. structurize-2.16.2.dist-info/RECORD +0 -51
  52. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/WHEEL +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/entry_points.txt +0 -0
  54. {structurize-2.16.2.dist-info → structurize-2.16.6.dist-info}/top_level.txt +0 -0
avrotize/parquettoavro.py CHANGED
@@ -1,157 +1,157 @@
1
- # coding: utf-8
2
- """
3
- Module to convert Parquet schema to Avro schema.
4
- """
5
-
6
- import json
7
- import os
8
- import pyarrow as pa
9
- import pyarrow.parquet as pq
10
-
11
- from avrotize.common import avro_name
12
-
13
- class ParquetToAvroConverter:
14
- """
15
- Class to convert Parquet schema to Avro schema.
16
- """
17
-
18
- def __init__(self, parquet_file_path, avro_schema_path, namespace=""):
19
- """
20
- Initialize the converter with file paths and namespace.
21
-
22
- :param parquet_file_path: Path to the Parquet file.
23
- :param avro_schema_path: Path to save the Avro schema file.
24
- :param namespace: Namespace for Avro records.
25
- """
26
- self.parquet_file_path = parquet_file_path
27
- self.avro_schema_path = avro_schema_path
28
- self.namespace = namespace
29
-
30
- def convert(self):
31
- """
32
- Convert Parquet schema to Avro schema and save to file.
33
- """
34
- parquet_table = pq.read_table(self.parquet_file_path)
35
- schema = parquet_table.schema
36
-
37
- # Infer the name of the schema from the parquet file name
38
- schema_name = avro_name(os.path.basename(self.parquet_file_path).split(".")[0])
39
-
40
- # Update the avro_schema dictionary
41
- avro_schema = {
42
- "type": "record",
43
- "name": schema_name,
44
- "namespace": self.namespace,
45
- "fields": []
46
- }
47
-
48
- for field in schema:
49
- avro_field = self.convert_parquet_field_to_avro_field(field)
50
- avro_schema["fields"].append(avro_field)
51
-
52
- with open(self.avro_schema_path, "w", encoding="utf-8") as file:
53
- json.dump(avro_schema, file, indent=2)
54
-
55
- def convert_parquet_field_to_avro_field(self, field):
56
- """
57
- Convert a Parquet field to an Avro field.
58
-
59
- :param field: Parquet field to convert.
60
- :return: Avro field as a dictionary.
61
- """
62
- avro_type = self.convert_parquet_type_to_avro_type(field.type, field.name)
63
- avro_field = {
64
- "name": field.name,
65
- "type": avro_type
66
- }
67
- if field.metadata and b'description' in field.metadata:
68
- avro_field["doc"] = field.metadata[b'description'].decode("utf-8")
69
- return avro_field
70
-
71
- def convert_parquet_type_to_avro_type(self, parquet_type, field_name):
72
- """
73
- Convert a Parquet type to an Avro type.
74
-
75
- :param parquet_type: Parquet type to convert.
76
- :param field_name: Name of the field being converted.
77
- :return: Avro type as a string or dictionary.
78
- """
79
- if pa.types.is_int8(parquet_type):
80
- return "int"
81
- if pa.types.is_int16(parquet_type):
82
- return "int"
83
- if pa.types.is_int32(parquet_type):
84
- return "int"
85
- if pa.types.is_int64(parquet_type):
86
- return "long"
87
- if pa.types.is_uint8(parquet_type):
88
- return "int"
89
- if pa.types.is_uint16(parquet_type):
90
- return "int"
91
- if pa.types.is_uint32(parquet_type):
92
- return "long"
93
- if pa.types.is_uint64(parquet_type):
94
- return "long"
95
- if pa.types.is_float32(parquet_type):
96
- return "float"
97
- if pa.types.is_float64(parquet_type):
98
- return "double"
99
- if pa.types.is_boolean(parquet_type):
100
- return "boolean"
101
- if pa.types.is_binary(parquet_type):
102
- return "bytes"
103
- if pa.types.is_string(parquet_type):
104
- return "string"
105
- if pa.types.is_timestamp(parquet_type):
106
- return {"type": "long", "logicalType": "timestamp-millis"}
107
- if pa.types.is_date32(parquet_type):
108
- return {"type": "int", "logicalType": "date"}
109
- if pa.types.is_date64(parquet_type):
110
- return {"type": "long", "logicalType": "timestamp-millis"}
111
- if pa.types.is_list(parquet_type):
112
- return {
113
- "type": "array",
114
- "items": self.convert_parquet_type_to_avro_type(parquet_type.value_type, field_name)
115
- }
116
- if pa.types.is_map(parquet_type):
117
- return {
118
- "type": "map",
119
- "values": self.convert_parquet_type_to_avro_type(parquet_type.item_type, field_name)
120
- }
121
- if pa.types.is_struct(parquet_type):
122
- fields = [
123
- {
124
- "name": nested_field.name,
125
- "type": self.convert_parquet_type_to_avro_type(nested_field.type, nested_field.name)
126
- } for nested_field in parquet_type
127
- ]
128
- return {
129
- "type": "record",
130
- "name": f"{field_name}Type",
131
- "namespace": self.namespace,
132
- "fields": fields
133
- }
134
- if pa.types.is_decimal(parquet_type):
135
- return {
136
- "type": "bytes",
137
- "logicalType": "decimal",
138
- "precision": parquet_type.precision,
139
- "scale": parquet_type.scale
140
- }
141
- return "string"
142
-
143
- def convert_parquet_to_avro(parquet_file_path, avro_file_path, namespace=""):
144
- """
145
- Convert a Parquet file to an Avro schema file.
146
-
147
- :param parquet_file_path: Path to the Parquet file.
148
- :param avro_file_path: Path to save the Avro schema file.
149
- :param namespace: Namespace for Avro records.
150
- """
151
-
152
- if not os.path.exists(parquet_file_path):
153
- raise FileNotFoundError(f"Parquet file not found: {parquet_file_path}")
154
-
155
- converter = ParquetToAvroConverter(parquet_file_path, avro_file_path, namespace)
156
- converter.convert()
157
-
1
+ # coding: utf-8
2
+ """
3
+ Module to convert Parquet schema to Avro schema.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ import pyarrow as pa
9
+ import pyarrow.parquet as pq
10
+
11
+ from avrotize.common import avro_name
12
+
13
+ class ParquetToAvroConverter:
14
+ """
15
+ Class to convert Parquet schema to Avro schema.
16
+ """
17
+
18
+ def __init__(self, parquet_file_path, avro_schema_path, namespace=""):
19
+ """
20
+ Initialize the converter with file paths and namespace.
21
+
22
+ :param parquet_file_path: Path to the Parquet file.
23
+ :param avro_schema_path: Path to save the Avro schema file.
24
+ :param namespace: Namespace for Avro records.
25
+ """
26
+ self.parquet_file_path = parquet_file_path
27
+ self.avro_schema_path = avro_schema_path
28
+ self.namespace = namespace
29
+
30
+ def convert(self):
31
+ """
32
+ Convert Parquet schema to Avro schema and save to file.
33
+ """
34
+ parquet_table = pq.read_table(self.parquet_file_path)
35
+ schema = parquet_table.schema
36
+
37
+ # Infer the name of the schema from the parquet file name
38
+ schema_name = avro_name(os.path.basename(self.parquet_file_path).split(".")[0])
39
+
40
+ # Update the avro_schema dictionary
41
+ avro_schema = {
42
+ "type": "record",
43
+ "name": schema_name,
44
+ "namespace": self.namespace,
45
+ "fields": []
46
+ }
47
+
48
+ for field in schema:
49
+ avro_field = self.convert_parquet_field_to_avro_field(field)
50
+ avro_schema["fields"].append(avro_field)
51
+
52
+ with open(self.avro_schema_path, "w", encoding="utf-8") as file:
53
+ json.dump(avro_schema, file, indent=2)
54
+
55
+ def convert_parquet_field_to_avro_field(self, field):
56
+ """
57
+ Convert a Parquet field to an Avro field.
58
+
59
+ :param field: Parquet field to convert.
60
+ :return: Avro field as a dictionary.
61
+ """
62
+ avro_type = self.convert_parquet_type_to_avro_type(field.type, field.name)
63
+ avro_field = {
64
+ "name": field.name,
65
+ "type": avro_type
66
+ }
67
+ if field.metadata and b'description' in field.metadata:
68
+ avro_field["doc"] = field.metadata[b'description'].decode("utf-8")
69
+ return avro_field
70
+
71
+ def convert_parquet_type_to_avro_type(self, parquet_type, field_name):
72
+ """
73
+ Convert a Parquet type to an Avro type.
74
+
75
+ :param parquet_type: Parquet type to convert.
76
+ :param field_name: Name of the field being converted.
77
+ :return: Avro type as a string or dictionary.
78
+ """
79
+ if pa.types.is_int8(parquet_type):
80
+ return "int"
81
+ if pa.types.is_int16(parquet_type):
82
+ return "int"
83
+ if pa.types.is_int32(parquet_type):
84
+ return "int"
85
+ if pa.types.is_int64(parquet_type):
86
+ return "long"
87
+ if pa.types.is_uint8(parquet_type):
88
+ return "int"
89
+ if pa.types.is_uint16(parquet_type):
90
+ return "int"
91
+ if pa.types.is_uint32(parquet_type):
92
+ return "long"
93
+ if pa.types.is_uint64(parquet_type):
94
+ return "long"
95
+ if pa.types.is_float32(parquet_type):
96
+ return "float"
97
+ if pa.types.is_float64(parquet_type):
98
+ return "double"
99
+ if pa.types.is_boolean(parquet_type):
100
+ return "boolean"
101
+ if pa.types.is_binary(parquet_type):
102
+ return "bytes"
103
+ if pa.types.is_string(parquet_type):
104
+ return "string"
105
+ if pa.types.is_timestamp(parquet_type):
106
+ return {"type": "long", "logicalType": "timestamp-millis"}
107
+ if pa.types.is_date32(parquet_type):
108
+ return {"type": "int", "logicalType": "date"}
109
+ if pa.types.is_date64(parquet_type):
110
+ return {"type": "long", "logicalType": "timestamp-millis"}
111
+ if pa.types.is_list(parquet_type):
112
+ return {
113
+ "type": "array",
114
+ "items": self.convert_parquet_type_to_avro_type(parquet_type.value_type, field_name)
115
+ }
116
+ if pa.types.is_map(parquet_type):
117
+ return {
118
+ "type": "map",
119
+ "values": self.convert_parquet_type_to_avro_type(parquet_type.item_type, field_name)
120
+ }
121
+ if pa.types.is_struct(parquet_type):
122
+ fields = [
123
+ {
124
+ "name": nested_field.name,
125
+ "type": self.convert_parquet_type_to_avro_type(nested_field.type, nested_field.name)
126
+ } for nested_field in parquet_type
127
+ ]
128
+ return {
129
+ "type": "record",
130
+ "name": f"{field_name}Type",
131
+ "namespace": self.namespace,
132
+ "fields": fields
133
+ }
134
+ if pa.types.is_decimal(parquet_type):
135
+ return {
136
+ "type": "bytes",
137
+ "logicalType": "decimal",
138
+ "precision": parquet_type.precision,
139
+ "scale": parquet_type.scale
140
+ }
141
+ return "string"
142
+
143
+ def convert_parquet_to_avro(parquet_file_path, avro_file_path, namespace=""):
144
+ """
145
+ Convert a Parquet file to an Avro schema file.
146
+
147
+ :param parquet_file_path: Path to the Parquet file.
148
+ :param avro_file_path: Path to save the Avro schema file.
149
+ :param namespace: Namespace for Avro records.
150
+ """
151
+
152
+ if not os.path.exists(parquet_file_path):
153
+ raise FileNotFoundError(f"Parquet file not found: {parquet_file_path}")
154
+
155
+ converter = ParquetToAvroConverter(parquet_file_path, avro_file_path, namespace)
156
+ converter.convert()
157
+