structurize 2.16.2__py3-none-any.whl → 2.16.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. avrotize/__init__.py +63 -63
  2. avrotize/__main__.py +5 -5
  3. avrotize/_version.py +34 -34
  4. avrotize/asn1toavro.py +160 -160
  5. avrotize/avrotize.py +152 -152
  6. avrotize/avrotocpp.py +483 -483
  7. avrotize/avrotocsharp.py +992 -992
  8. avrotize/avrotocsv.py +121 -121
  9. avrotize/avrotodatapackage.py +173 -173
  10. avrotize/avrotodb.py +1383 -1383
  11. avrotize/avrotogo.py +476 -476
  12. avrotize/avrotographql.py +197 -197
  13. avrotize/avrotoiceberg.py +210 -210
  14. avrotize/avrotojava.py +1023 -1023
  15. avrotize/avrotojs.py +250 -250
  16. avrotize/avrotojsons.py +481 -481
  17. avrotize/avrotojstruct.py +345 -345
  18. avrotize/avrotokusto.py +363 -363
  19. avrotize/avrotomd.py +137 -137
  20. avrotize/avrotools.py +168 -168
  21. avrotize/avrotoparquet.py +208 -208
  22. avrotize/avrotoproto.py +358 -358
  23. avrotize/avrotopython.py +622 -622
  24. avrotize/avrotorust.py +435 -435
  25. avrotize/avrotots.py +598 -598
  26. avrotize/avrotoxsd.py +344 -344
  27. avrotize/commands.json +2493 -2433
  28. avrotize/common.py +828 -828
  29. avrotize/constants.py +4 -4
  30. avrotize/csvtoavro.py +131 -131
  31. avrotize/datapackagetoavro.py +76 -76
  32. avrotize/dependency_resolver.py +348 -348
  33. avrotize/jsonstoavro.py +1698 -1698
  34. avrotize/jsonstostructure.py +2642 -2642
  35. avrotize/jstructtoavro.py +878 -878
  36. avrotize/kstructtoavro.py +93 -93
  37. avrotize/kustotoavro.py +455 -455
  38. avrotize/parquettoavro.py +157 -157
  39. avrotize/proto2parser.py +497 -497
  40. avrotize/proto3parser.py +402 -402
  41. avrotize/prototoavro.py +382 -382
  42. avrotize/structuretocsharp.py +2005 -2005
  43. avrotize/structuretojsons.py +498 -498
  44. avrotize/structuretopython.py +772 -772
  45. avrotize/structuretots.py +653 -0
  46. avrotize/xsdtoavro.py +413 -413
  47. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/METADATA +848 -805
  48. structurize-2.16.5.dist-info/RECORD +52 -0
  49. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/licenses/LICENSE +200 -200
  50. structurize-2.16.2.dist-info/RECORD +0 -51
  51. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/WHEEL +0 -0
  52. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/entry_points.txt +0 -0
  53. {structurize-2.16.2.dist-info → structurize-2.16.5.dist-info}/top_level.txt +0 -0
avrotize/constants.py CHANGED
@@ -1,5 +1,5 @@
1
- """Constants for the avrotize package."""
2
-
3
- AVRO_VERSION = '1.12.0'
4
- JACKSON_VERSION = '2.18.2'
1
+ """Constants for the avrotize package."""
2
+
3
+ AVRO_VERSION = '1.12.0'
4
+ JACKSON_VERSION = '2.18.2'
5
5
  JDK_VERSION = '21'
avrotize/csvtoavro.py CHANGED
@@ -1,132 +1,132 @@
1
- # coding: utf-8
2
- """
3
- Module to convert CSV schema to Avro schema.
4
- """
5
-
6
- import json
7
- import os
8
- import pandas as pd
9
-
10
- from avrotize.common import pascal
11
-
12
- class CSVToAvroConverter:
13
- """
14
- Class to convert CSV schema to Avro schema.
15
- """
16
-
17
- def __init__(self, csv_file_path, avro_schema_path, namespace=""):
18
- """
19
- Initialize the converter with file paths and namespace.
20
-
21
- :param csv_file_path: Path to the CSV file.
22
- :param avro_schema_path: Path to save the Avro schema file.
23
- :param csv_schema_path: Optional path to CSV schema file.
24
- :param namespace: Namespace for Avro records.
25
- """
26
- self.csv_file_path = csv_file_path
27
- self.avro_schema_path = avro_schema_path
28
- self.namespace = namespace
29
-
30
- def convert(self):
31
- """
32
- Convert CSV schema to Avro schema and save to file.
33
- """
34
- schema = self.infer_schema()
35
-
36
- # Infer the name of the schema from the CSV file name
37
- schema_name = os.path.splitext(os.path.basename(self.csv_file_path))[0].replace(" ", "_")
38
-
39
- avro_schema = {}
40
- avro_schema["type"] = "record"
41
- avro_schema["name"] = schema_name
42
- if self.namespace:
43
- avro_schema["namespace"] = self.namespace
44
- avro_schema["fields"] = schema
45
-
46
- with open(self.avro_schema_path, "w", encoding="utf-8") as file:
47
- json.dump(avro_schema, file, indent=2)
48
-
49
- def infer_schema(self):
50
- """
51
- Infer the schema from CSV headers or data.
52
- :return: List of fields in Avro schema format.
53
- """
54
- df = pd.read_csv(self.csv_file_path)
55
- schema = []
56
- for column in df.columns:
57
- avro_field = {
58
- "name": pascal(column),
59
- "type": self.infer_avro_type(df[column])
60
- }
61
- if avro_field["name"] != column:
62
- avro_field["altnames"] = { "csv": column}
63
- schema.append(avro_field)
64
- return schema
65
-
66
- def infer_avro_type(self, series):
67
- """
68
- Infer Avro type from pandas series.
69
- :param series: Pandas series to infer type from.
70
- :return: Avro type as string.
71
- """
72
- if pd.api.types.is_integer_dtype(series):
73
- return "int"
74
- if pd.api.types.is_float_dtype(series):
75
- return "double"
76
- if pd.api.types.is_bool_dtype(series):
77
- return "boolean"
78
- if pd.api.types.is_datetime64_any_dtype(series):
79
- return {"type": "long", "logicalType": "timestamp-millis"}
80
- if pd.api.types.is_object_dtype(series):
81
- return "string"
82
- return "string"
83
-
84
- def map_csv_type_to_avro(self, csv_type):
85
- """
86
- Map CSV type to Avro type.
87
- :param csv_type: CSV type as string.
88
- :return: Avro type as string.
89
- """
90
- type_mapping = {
91
- "string": "string",
92
- "number": "double",
93
- "integer": "int",
94
- "boolean": "boolean",
95
- "date": {"type": "int", "logicalType": "date"},
96
- "timestamp": {"type": "long", "logicalType": "timestamp-millis"}
97
- }
98
- return type_mapping.get(csv_type.lower(), "string")
99
-
100
- def handle_string_format(self, format_type):
101
- """
102
- Handle string format types.
103
- :param format_type: Format type as string.
104
- :return: Avro type as string or dict.
105
- """
106
- format_mapping = {
107
- "email": "string",
108
- "uri": "string",
109
- "uuid": "string",
110
- "ipv4": "string",
111
- "ipv6": "string",
112
- "hostname": "string",
113
- "datetime": {"type": "long", "logicalType": "timestamp-millis"}
114
- }
115
- return format_mapping.get(format_type.lower(), "string")
116
-
117
-
118
- def convert_csv_to_avro(csv_file_path, avro_file_path, namespace=""):
119
- """
120
- Convert a CSV file to an Avro schema file.
121
-
122
- :param csv_file_path: Path to the CSV file.
123
- :param avro_file_path: Path to save the Avro schema file.
124
- :param namespace: Namespace for Avro records.
125
- """
126
-
127
- if not os.path.exists(csv_file_path):
128
- raise FileNotFoundError(f"CSV file not found at: {csv_file_path}")
129
-
130
- converter = CSVToAvroConverter(
131
- csv_file_path, avro_file_path, namespace)
1
+ # coding: utf-8
2
+ """
3
+ Module to convert CSV schema to Avro schema.
4
+ """
5
+
6
+ import json
7
+ import os
8
+ import pandas as pd
9
+
10
+ from avrotize.common import pascal
11
+
12
+ class CSVToAvroConverter:
13
+ """
14
+ Class to convert CSV schema to Avro schema.
15
+ """
16
+
17
+ def __init__(self, csv_file_path, avro_schema_path, namespace=""):
18
+ """
19
+ Initialize the converter with file paths and namespace.
20
+
21
+ :param csv_file_path: Path to the CSV file.
22
+ :param avro_schema_path: Path to save the Avro schema file.
23
+ :param csv_schema_path: Optional path to CSV schema file.
24
+ :param namespace: Namespace for Avro records.
25
+ """
26
+ self.csv_file_path = csv_file_path
27
+ self.avro_schema_path = avro_schema_path
28
+ self.namespace = namespace
29
+
30
+ def convert(self):
31
+ """
32
+ Convert CSV schema to Avro schema and save to file.
33
+ """
34
+ schema = self.infer_schema()
35
+
36
+ # Infer the name of the schema from the CSV file name
37
+ schema_name = os.path.splitext(os.path.basename(self.csv_file_path))[0].replace(" ", "_")
38
+
39
+ avro_schema = {}
40
+ avro_schema["type"] = "record"
41
+ avro_schema["name"] = schema_name
42
+ if self.namespace:
43
+ avro_schema["namespace"] = self.namespace
44
+ avro_schema["fields"] = schema
45
+
46
+ with open(self.avro_schema_path, "w", encoding="utf-8") as file:
47
+ json.dump(avro_schema, file, indent=2)
48
+
49
+ def infer_schema(self):
50
+ """
51
+ Infer the schema from CSV headers or data.
52
+ :return: List of fields in Avro schema format.
53
+ """
54
+ df = pd.read_csv(self.csv_file_path)
55
+ schema = []
56
+ for column in df.columns:
57
+ avro_field = {
58
+ "name": pascal(column),
59
+ "type": self.infer_avro_type(df[column])
60
+ }
61
+ if avro_field["name"] != column:
62
+ avro_field["altnames"] = { "csv": column}
63
+ schema.append(avro_field)
64
+ return schema
65
+
66
+ def infer_avro_type(self, series):
67
+ """
68
+ Infer Avro type from pandas series.
69
+ :param series: Pandas series to infer type from.
70
+ :return: Avro type as string.
71
+ """
72
+ if pd.api.types.is_integer_dtype(series):
73
+ return "int"
74
+ if pd.api.types.is_float_dtype(series):
75
+ return "double"
76
+ if pd.api.types.is_bool_dtype(series):
77
+ return "boolean"
78
+ if pd.api.types.is_datetime64_any_dtype(series):
79
+ return {"type": "long", "logicalType": "timestamp-millis"}
80
+ if pd.api.types.is_object_dtype(series):
81
+ return "string"
82
+ return "string"
83
+
84
+ def map_csv_type_to_avro(self, csv_type):
85
+ """
86
+ Map CSV type to Avro type.
87
+ :param csv_type: CSV type as string.
88
+ :return: Avro type as string.
89
+ """
90
+ type_mapping = {
91
+ "string": "string",
92
+ "number": "double",
93
+ "integer": "int",
94
+ "boolean": "boolean",
95
+ "date": {"type": "int", "logicalType": "date"},
96
+ "timestamp": {"type": "long", "logicalType": "timestamp-millis"}
97
+ }
98
+ return type_mapping.get(csv_type.lower(), "string")
99
+
100
+ def handle_string_format(self, format_type):
101
+ """
102
+ Handle string format types.
103
+ :param format_type: Format type as string.
104
+ :return: Avro type as string or dict.
105
+ """
106
+ format_mapping = {
107
+ "email": "string",
108
+ "uri": "string",
109
+ "uuid": "string",
110
+ "ipv4": "string",
111
+ "ipv6": "string",
112
+ "hostname": "string",
113
+ "datetime": {"type": "long", "logicalType": "timestamp-millis"}
114
+ }
115
+ return format_mapping.get(format_type.lower(), "string")
116
+
117
+
118
+ def convert_csv_to_avro(csv_file_path, avro_file_path, namespace=""):
119
+ """
120
+ Convert a CSV file to an Avro schema file.
121
+
122
+ :param csv_file_path: Path to the CSV file.
123
+ :param avro_file_path: Path to save the Avro schema file.
124
+ :param namespace: Namespace for Avro records.
125
+ """
126
+
127
+ if not os.path.exists(csv_file_path):
128
+ raise FileNotFoundError(f"CSV file not found at: {csv_file_path}")
129
+
130
+ converter = CSVToAvroConverter(
131
+ csv_file_path, avro_file_path, namespace)
132
132
  converter.convert()
@@ -1,76 +1,76 @@
1
- """Convert a Data Package to an Avro schema."""
2
-
3
- import json
4
- import sys
5
- from typing import Dict, List
6
- from datapackage import Package
7
-
8
- JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
9
-
10
-
11
- class DataPackageToAvroConverter:
12
- """Class to convert Data Package to Avro schema."""
13
-
14
- def convert_datapackage_to_avro(self, datapackage_path, avro_schema_path):
15
- """Convert a Data Package to an Avro schema."""
16
- package = Package(datapackage_path)
17
- resources = package.resources
18
-
19
- avro_schemas = []
20
-
21
- for resource in resources:
22
- table_name = resource.descriptor['name']
23
- fields = resource.descriptor['schema']['fields']
24
-
25
- avro_fields = []
26
- for field in fields:
27
- avro_field = {
28
- "name": field["name"],
29
- "type": self.convert_datapackage_type_to_avro_type(field["type"])
30
- }
31
- avro_fields.append(avro_field)
32
-
33
- avro_schema = {
34
- "type": "record",
35
- "name": table_name,
36
- "fields": avro_fields
37
- }
38
- avro_schemas.append(avro_schema)
39
-
40
- # If there's only one schema, write it directly
41
- if len(avro_schemas) == 1:
42
- avro_schema = avro_schemas[0]
43
- else:
44
- # If there are multiple schemas, create a union
45
- avro_schema = avro_schemas
46
-
47
- with open(avro_schema_path, "w", encoding="utf-8") as f:
48
- json.dump(avro_schema, f, indent=2)
49
-
50
- def convert_datapackage_type_to_avro_type(self, datapackage_type):
51
- """Convert a Data Package type to an Avro type."""
52
- if datapackage_type == "string":
53
- return "string"
54
- elif datapackage_type == "number":
55
- return "double"
56
- elif datapackage_type == "integer":
57
- return "int"
58
- elif datapackage_type == "boolean":
59
- return "boolean"
60
- elif datapackage_type == "array":
61
- return {"type": "array", "items": "string"}
62
- elif datapackage_type == "object":
63
- return {"type": "map", "values": "string"}
64
- else:
65
- print(f"WARNING: Unsupported data package type: {datapackage_type}")
66
- return "string"
67
-
68
-
69
- def convert_datapackage_to_avro(datapackage_path, avro_schema_path):
70
- """Convert a Data Package to an Avro schema."""
71
- converter = DataPackageToAvroConverter()
72
- converter.convert_datapackage_to_avro(datapackage_path, avro_schema_path)
73
-
74
-
75
- # Example usage:
76
- # convert_datapackage_to_avro("datapackage.json", "schema.avsc")
1
+ """Convert a Data Package to an Avro schema."""
2
+
3
+ import json
4
+ import sys
5
+ from typing import Dict, List
6
+ from datapackage import Package
7
+
8
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
9
+
10
+
11
+ class DataPackageToAvroConverter:
12
+ """Class to convert Data Package to Avro schema."""
13
+
14
+ def convert_datapackage_to_avro(self, datapackage_path, avro_schema_path):
15
+ """Convert a Data Package to an Avro schema."""
16
+ package = Package(datapackage_path)
17
+ resources = package.resources
18
+
19
+ avro_schemas = []
20
+
21
+ for resource in resources:
22
+ table_name = resource.descriptor['name']
23
+ fields = resource.descriptor['schema']['fields']
24
+
25
+ avro_fields = []
26
+ for field in fields:
27
+ avro_field = {
28
+ "name": field["name"],
29
+ "type": self.convert_datapackage_type_to_avro_type(field["type"])
30
+ }
31
+ avro_fields.append(avro_field)
32
+
33
+ avro_schema = {
34
+ "type": "record",
35
+ "name": table_name,
36
+ "fields": avro_fields
37
+ }
38
+ avro_schemas.append(avro_schema)
39
+
40
+ # If there's only one schema, write it directly
41
+ if len(avro_schemas) == 1:
42
+ avro_schema = avro_schemas[0]
43
+ else:
44
+ # If there are multiple schemas, create a union
45
+ avro_schema = avro_schemas
46
+
47
+ with open(avro_schema_path, "w", encoding="utf-8") as f:
48
+ json.dump(avro_schema, f, indent=2)
49
+
50
+ def convert_datapackage_type_to_avro_type(self, datapackage_type):
51
+ """Convert a Data Package type to an Avro type."""
52
+ if datapackage_type == "string":
53
+ return "string"
54
+ elif datapackage_type == "number":
55
+ return "double"
56
+ elif datapackage_type == "integer":
57
+ return "int"
58
+ elif datapackage_type == "boolean":
59
+ return "boolean"
60
+ elif datapackage_type == "array":
61
+ return {"type": "array", "items": "string"}
62
+ elif datapackage_type == "object":
63
+ return {"type": "map", "values": "string"}
64
+ else:
65
+ print(f"WARNING: Unsupported data package type: {datapackage_type}")
66
+ return "string"
67
+
68
+
69
+ def convert_datapackage_to_avro(datapackage_path, avro_schema_path):
70
+ """Convert a Data Package to an Avro schema."""
71
+ converter = DataPackageToAvroConverter()
72
+ converter.convert_datapackage_to_avro(datapackage_path, avro_schema_path)
73
+
74
+
75
+ # Example usage:
76
+ # convert_datapackage_to_avro("datapackage.json", "schema.avsc")