structurize 2.16.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. avrotize/__init__.py +63 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +992 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +1023 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +622 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/commands.json +2433 -0
  28. avrotize/common.py +829 -0
  29. avrotize/constants.py +5 -0
  30. avrotize/csvtoavro.py +132 -0
  31. avrotize/datapackagetoavro.py +76 -0
  32. avrotize/dependency_resolver.py +348 -0
  33. avrotize/jsonstoavro.py +1698 -0
  34. avrotize/jsonstostructure.py +2642 -0
  35. avrotize/jstructtoavro.py +878 -0
  36. avrotize/kstructtoavro.py +93 -0
  37. avrotize/kustotoavro.py +455 -0
  38. avrotize/parquettoavro.py +157 -0
  39. avrotize/proto2parser.py +498 -0
  40. avrotize/proto3parser.py +403 -0
  41. avrotize/prototoavro.py +382 -0
  42. avrotize/structuretocsharp.py +2005 -0
  43. avrotize/structuretojsons.py +498 -0
  44. avrotize/structuretopython.py +772 -0
  45. avrotize/xsdtoavro.py +413 -0
  46. structurize-2.16.2.dist-info/METADATA +805 -0
  47. structurize-2.16.2.dist-info/RECORD +51 -0
  48. structurize-2.16.2.dist-info/WHEEL +5 -0
  49. structurize-2.16.2.dist-info/entry_points.txt +2 -0
  50. structurize-2.16.2.dist-info/licenses/LICENSE +201 -0
  51. structurize-2.16.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,93 @@
1
+ """
2
+
3
+ Convert a Kafka schema to an Avro schema.
4
+
5
+ """
6
+
7
+ import json
8
+
9
+
10
+ def kafka_type_to_avro_type(kafka_field):
11
+ """Convert a Kafka field type to an Avro field type."""
12
+ kafka_to_avro_types = {
13
+ 'int32': 'int',
14
+ 'int64': 'long',
15
+ 'string': 'string',
16
+ 'boolean': 'boolean',
17
+ 'bytes': 'bytes',
18
+ 'array': 'array',
19
+ 'map': 'map',
20
+ 'struct': 'record'
21
+ }
22
+
23
+ if kafka_field['type'] in kafka_to_avro_types:
24
+ return kafka_to_avro_types[kafka_field['type']]
25
+ elif isinstance(kafka_field['type'], dict): # Nested struct
26
+ return convert_schema(kafka_field['type'])
27
+ else:
28
+ raise ValueError(f"Unsupported Kafka type: {kafka_field['type']}")
29
+
30
+
31
+ def convert_field(field):
32
+ """Convert a Kafka field to an Avro field."""
33
+ avro_field = {
34
+ 'name': field['field'],
35
+ 'type': []
36
+ }
37
+
38
+ if field['optional']:
39
+ avro_field['type'].append('null')
40
+
41
+ kafka_field_type = kafka_type_to_avro_type(field)
42
+
43
+ if field['type'] == 'array':
44
+ item_type = kafka_type_to_avro_type(field['items'])
45
+ avro_field['type'].append({'type': 'array', 'items': item_type})
46
+ elif field['type'] == 'map':
47
+ value_type = kafka_type_to_avro_type(field['values'])
48
+ avro_field['type'].append({'type': 'map', 'values': value_type})
49
+ elif field['type'] == 'struct':
50
+ avro_field['type'].append(convert_schema(field))
51
+ else:
52
+ avro_field['type'].append(kafka_field_type)
53
+
54
+ if len(avro_field['type']) == 1:
55
+ avro_field['type'] = avro_field['type'][0]
56
+
57
+ return avro_field
58
+
59
+
60
+ def convert_schema(kafka_schema):
61
+ """Convert a Kafka schema to an Avro schema."""
62
+ avro_schema = {
63
+ 'type': 'record',
64
+ 'name': kafka_schema.get('name', 'MyRecord'),
65
+ 'fields': []
66
+ }
67
+
68
+ for field in kafka_schema['fields']:
69
+ avro_schema['fields'].append(convert_field(field))
70
+
71
+ return avro_schema
72
+
73
+
74
+ def convert_kafka_struct_to_avro_schema(kafka_schema_file_path, avro_file_path):
75
+ """Read a Kafka schema from a file, convert it to an Avro schema, and save it to another file."""
76
+
77
+ if not kafka_schema_file_path:
78
+ raise ValueError("Kafka schema file path is required.")
79
+
80
+ # Open and read the Kafka schema file
81
+ with open(kafka_schema_file_path, 'r', encoding='utf-8') as kafka_schema_file:
82
+ kafka_schema_data = json.load(kafka_schema_file)
83
+
84
+ # Assuming the whole file content is the schema
85
+ if isinstance(kafka_schema_data, dict) and 'schema' in kafka_schema_data:
86
+ kafka_schema = kafka_schema_data['schema']
87
+ else:
88
+ kafka_schema = kafka_schema_data
89
+ avro_schema = convert_schema(kafka_schema)
90
+
91
+ # Write the converted Avro schema to a file
92
+ with open(avro_file_path, 'w', encoding='utf-8') as avro_file:
93
+ json.dump(avro_schema, avro_file, indent=4)
@@ -0,0 +1,455 @@
1
+ """ Converts Kusto table schemas to Avro schema format. """
2
+
3
+ import copy
4
+ import os
5
+ import json
6
+ from typing import Any, Dict, List, Tuple
7
+ from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
8
+ from avrotize.common import get_tree_hash
9
+ from avrotize.constants import AVRO_VERSION
10
+
11
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
12
+
13
+
14
+ class KustoToAvro:
15
+ """ Converts Kusto table schemas to Avro schema format."""
16
+
17
+ def __init__(self, kusto_uri, kusto_database, table_name: str | None, avro_namespace: str, avro_schema_path, emit_cloudevents: bool, emit_cloudevents_xregistry: bool, token_provider=None):
18
+ """ Initializes the KustoToAvro class with the Kusto URI and database name. """
19
+ kcsb = KustoConnectionStringBuilder.with_az_cli_authentication(kusto_uri) if not token_provider else KustoConnectionStringBuilder.with_token_provider(kusto_uri, token_provider)
20
+ self.client = KustoClient(kcsb)
21
+ self.kusto_database = kusto_database
22
+ self.single_table_name = table_name
23
+ self.avro_namespace = avro_namespace
24
+ self.avro_schema_path = avro_schema_path
25
+ self.emit_xregistry = emit_cloudevents_xregistry
26
+ self.emit_cloudevents = emit_cloudevents or emit_cloudevents_xregistry
27
+ if self.emit_xregistry:
28
+ if not self.avro_namespace:
29
+ raise ValueError(
30
+ "The avro_namespace must be specified when emit_cloudevents_xregistry is True")
31
+ self.generated_types: List[str] = []
32
+
33
+ def fetch_table_schema_and_docs(self, table_name: str):
34
+ """ Fetches the schema and docstrings for a given table."""
35
+ query = f".show table {table_name} schema as json"
36
+ response = self.client.execute(self.kusto_database, query)
37
+ schema_json = response.primary_results[0][0]['Schema']
38
+ schema = json.loads(schema_json)
39
+ return schema
40
+
41
+ def fold_record_types(self, base_record: dict, new_record: dict) -> Tuple[bool, dict]:
42
+ """ Merges two record types."""
43
+ base_fields = copy.deepcopy(base_record).get("fields", [])
44
+ new_fields = new_record.get("fields", [])
45
+
46
+ for field in new_fields:
47
+ base_field = next(
48
+ (f for f in base_fields if f["name"] == field["name"]), None)
49
+ if not base_field:
50
+ base_fields.append(field)
51
+ else:
52
+ if isinstance(base_field["type"], str) and base_field["type"] != field["type"]:
53
+ # If the field already exists, but the types are different, don't fold
54
+ return False, new_record
55
+ elif isinstance(base_field["type"], dict) and isinstance(field["type"], dict) and base_field["type"]["type"] == field["type"]["type"]:
56
+ result, record_type = self.fold_record_types(
57
+ base_field["type"], field["type"])
58
+ if not result:
59
+ return False, new_record
60
+ base_field["type"] = record_type
61
+ return True, base_record
62
+
63
+ def python_type_to_avro_type(self, type_name: str, python_value: Any):
64
+ """ Maps Python types to Avro types."""
65
+ simple_types = {
66
+ int: "int", float: "double", str: "string",
67
+ bool: "boolean", bytes: {"type": "bytes", "logicalType": "decimal"}
68
+ }
69
+ if isinstance(python_value, dict):
70
+ type_name_name = type_name.rsplit('.', 1)[-1]
71
+ type_name_namespace = (type_name.rsplit('.', 1)[0])+"Types" if '.' in type_name else ''
72
+ type_namespace = self.avro_namespace + ('.' if self.avro_namespace and type_name_namespace else '') + type_name_namespace
73
+ record: Dict[str, JsonNode] = {
74
+ "type": "record",
75
+ "name": f"{type_name_name}",
76
+ }
77
+ if type_name_namespace:
78
+ record["namespace"] = type_namespace
79
+ fields: List[JsonNode] = []
80
+ for key, value in python_value.items():
81
+ altname = key
82
+ # replace anything not in [a-zA-Z0-9_] with '_'
83
+ key = ''.join(c if c.isalnum() else '_' for c in key)
84
+ field = {
85
+ "name": key,
86
+ "type": self.python_type_to_avro_type(type_name+key, value)
87
+ }
88
+ if altname != key:
89
+ field["altnames"] = {
90
+ "kql": altname
91
+ }
92
+ fields.append(field)
93
+ record["fields"] = fields
94
+ return record
95
+ if isinstance(python_value, list):
96
+ if len(python_value) > 0:
97
+ item_types = self.consolidated_type_list(
98
+ type_name, python_value)
99
+ else:
100
+ item_types = ["string"]
101
+ if len(item_types) == 1:
102
+ return {"type": "array", "items": item_types[0]}
103
+ else:
104
+ return {"type": "array", "items": item_types}
105
+ return simple_types.get(type(python_value), "string")
106
+
107
+ def consolidated_type_list(self, type_name: str, python_value: list):
108
+ """ Consolidates a list of types."""
109
+ list_types = [self.python_type_to_avro_type(
110
+ type_name, item) for item in python_value]
111
+
112
+ tree_hashes = {}
113
+ # consolidate the list types by eliminating duplicates
114
+ for item in list_types:
115
+ tree_hash = get_tree_hash(item)
116
+ if tree_hash.hash_value not in tree_hashes:
117
+ tree_hashes[tree_hash.hash_value] = item
118
+ list_types = list(tree_hashes.values())
119
+ unique_types = []
120
+ prior_record = None
121
+ for item in list_types:
122
+ if isinstance(item, dict) and "type" in item and item["type"] == "record":
123
+ if prior_record is None:
124
+ prior_record = item
125
+ else:
126
+ folded, record = self.fold_record_types(prior_record, item)
127
+ if not folded:
128
+ unique_types.append(item)
129
+ else:
130
+ prior_record = record
131
+ else:
132
+ unique_types.append(item)
133
+ if prior_record is not None:
134
+ unique_types.append(prior_record)
135
+
136
+ array_types = [item["items"] for item in unique_types if isinstance(
137
+ item, dict) and "type" in item and item["type"] == "array"]
138
+ map_types = [item["values"] for item in unique_types if isinstance(
139
+ item, dict) and "type" in item and item["type"] == "map"]
140
+ list_types = [item for item in unique_types if not isinstance(
141
+ item, dict) or "type" not in item or item["type"] not in ["array", "map"]]
142
+
143
+ item_types = []
144
+ for item2 in array_types:
145
+ if isinstance(item2, list):
146
+ item_types.extend(item2)
147
+ else:
148
+ item_types.append(item2)
149
+ if len(item_types) > 0:
150
+ list_types.append({"type": "array", "items": item_types})
151
+
152
+ value_types = []
153
+ for item3 in map_types:
154
+ if isinstance(item3, list):
155
+ value_types.extend(item3)
156
+ else:
157
+ value_types.append(item3)
158
+ if len(value_types) > 0:
159
+ list_types.append({"type": "map", "values": value_types})
160
+
161
+ return list_types
162
+
163
+ def infer_dynamic_schema(self, table_name: str, column_name: str, type_column: dict | None, type_value: str | None) -> JsonNode:
164
+ """
165
+ Infers the schema for a dynamic column. If a type column is provided, it will infer the schema based
166
+ on constraining the result set by the type column.
167
+
168
+ Args:
169
+ table_name: The name of the table.
170
+ column_name: The name of the column.
171
+ type_column: The type column (if any)
172
+ type_value: The value of the type column (if any)
173
+ """
174
+ type_column_name = type_column['Name'] if type_column else None
175
+ query = f"{table_name}"+(f' | where {type_column_name}=="{type_value}"' if type_column_name and type_value else '') + f" | project {column_name} | take 100"
176
+ rows = self.client.execute(self.kusto_database, query)
177
+ values = [row[column_name] for row in rows.primary_results[0]]
178
+ type_name = type_value if type_value else f"{table_name}.{column_name}"
179
+ unique_types = self.consolidated_type_list(type_name, values)
180
+ if len(unique_types) > 1:
181
+ # Using a union of inferred types
182
+ return unique_types
183
+ elif len(unique_types) == 1:
184
+ # Single type, no need for union
185
+ return unique_types[0]
186
+ else:
187
+ # No values, default to string
188
+ return "string"
189
+
190
+ type_map : Dict[str, JsonNode] = {
191
+ "int": "int",
192
+ "long": "long",
193
+ "string": "string",
194
+ "real": "double",
195
+ "bool": "boolean",
196
+ "datetime": {"type": "long", "logicalType": "timestamp-millis"},
197
+ "timespan": {"type": "fixed", "size": 12, "logicalType": "duration"},
198
+ "decimal": {"type": "fixed", "size": 16, "precision": 38, "logicalType": "decimal"},
199
+ "dynamic": "bytes"
200
+ }
201
+
202
+ def map_kusto_type_to_avro_type(self, kusto_type, table_name, column_name, type_column: dict | None, type_value: str | None) -> JsonNode:
203
+ """ Maps Kusto types to Avro types."""
204
+ if kusto_type == "dynamic":
205
+ return self.infer_dynamic_schema(table_name, column_name, type_column, type_value)
206
+ return self.type_map.get(kusto_type, "string")
207
+
208
+ def kusto_to_avro_schema(self, kusto_schema: dict, table_name: str) -> JsonNode:
209
+ """ Converts a Kusto schema to Avro schema."""
210
+ column_names = set([column['Name'].lstrip('_')
211
+ for column in kusto_schema['OrderedColumns']])
212
+ type_values: List[str|None] = []
213
+ type_column: Dict[str, JsonNode] = {}
214
+ is_cloudevent = False
215
+ if self.emit_cloudevents:
216
+ is_cloudevent = 'type' in column_names and 'source' in column_names and 'data' in column_names and 'id' in column_names
217
+ if is_cloudevent:
218
+ type_column = next(
219
+ (column for column in kusto_schema['OrderedColumns'] if column['Name'].lstrip('_') == 'type'), {})
220
+ type_sampling_query = f"{table_name} | distinct {type_column['Name']}"
221
+ type_sampling_rows = self.client.execute(
222
+ self.kusto_database, type_sampling_query)
223
+ type_values.extend([row[type_column['Name']]
224
+ for row in type_sampling_rows.primary_results[0]])
225
+
226
+ if len(type_values) == 0:
227
+ type_values.append(None)
228
+
229
+ schemas: List[JsonNode] = []
230
+ for type_value in type_values:
231
+ schema: JsonNode = {}
232
+ fields: List[JsonNode] = []
233
+ if type_value and isinstance(type_value, str):
234
+ type_name_name = type_value.rsplit('.', 1)[-1]
235
+ type_name_namespace = type_value.rsplit('.', 1)[0] if '.' in type_value else ''
236
+ type_namespace = self.avro_namespace + ('.' if self.avro_namespace and type_name_namespace else '') + type_name_namespace
237
+ else:
238
+ type_name_name = table_name
239
+ type_namespace = self.avro_namespace
240
+
241
+ if is_cloudevent:
242
+ # get just the 'data' column and infer the schema
243
+ column = next(col for col in kusto_schema['OrderedColumns'] if col['Name'].lstrip('_') == 'data')
244
+ data_schemas: JsonNode = self.map_kusto_type_to_avro_type(
245
+ column['CslType'], table_name, column['Name'], type_column, type_value)
246
+ if isinstance(data_schemas, dict):
247
+ data_schemas = [data_schemas]
248
+ if isinstance(data_schemas, list):
249
+ for schema in data_schemas:
250
+ if not isinstance(schema, dict) or "type" not in schema or schema["type"] != "record":
251
+ schema = self.wrap_schema_in_root_record(schema, type_name_name, type_namespace)
252
+ if self.emit_xregistry:
253
+ ce_attribs: Dict[str, JsonNode] ={}
254
+ for col in [col for col in kusto_schema['OrderedColumns'] if col['Name'].lstrip('_') != 'data']:
255
+ ce_attribs[col['Name'].lstrip('_')] = "string"
256
+ if isinstance(schema, dict):
257
+ schema["ce_attribs"] = ce_attribs
258
+ self.apply_schema_attributes(schema, kusto_schema, table_name, type_value, type_namespace)
259
+ schemas.append(schema)
260
+ else:
261
+ for column in kusto_schema['OrderedColumns']:
262
+ avro_type = self.map_kusto_type_to_avro_type(
263
+ column['CslType'], table_name, column['Name'], type_column, type_value)
264
+ field: Dict[str, JsonNode] = {"name": column['Name'], "type": avro_type}
265
+ doc: JsonNode = column.get('DocString', '')
266
+ if doc:
267
+ field["doc"] = doc
268
+ fields.append(field)
269
+ schema = {
270
+ "type": "record",
271
+ "name": f"{type_name_name}",
272
+ "fields": fields
273
+ }
274
+ self.apply_schema_attributes(schema, kusto_schema, table_name, type_value, type_namespace)
275
+ schemas.append(schema)
276
+
277
+ return schemas if len(schemas) > 1 else schemas[0]
278
+
279
+
280
+ def wrap_schema_in_root_record(self, schema: JsonNode, type_name: str, type_namespace: str):
281
+ """ Wraps a schema in a root record."""
282
+ record: Dict[str, JsonNode] = {
283
+ "type": "record",
284
+ "name": type_name,
285
+ "fields": [
286
+ {
287
+ "name": "data",
288
+ "type": schema,
289
+ "root": True
290
+ }
291
+ ]
292
+ }
293
+ if type_namespace:
294
+ record["namespace"] = type_namespace
295
+ return record
296
+
297
+ def apply_schema_attributes(self, schema, kusto_schema, table_name, type_value, type_namespace):
298
+ """ Applies schema attributes to the schema."""
299
+ if isinstance(schema, dict):
300
+ schema["altnames"] = {
301
+ "kql": table_name
302
+ }
303
+ if self.emit_cloudevents and type_value:
304
+ schema["ce_type"] = type_value
305
+ if type_namespace:
306
+ schema["namespace"] = type_namespace
307
+ doc = kusto_schema.get('DocString', '')
308
+ if doc:
309
+ schema["doc"] = doc
310
+
311
+ def make_type_names_unique(self, item_types: list):
312
+ """ Makes the type names unique."""
313
+ for item in item_types:
314
+ if isinstance(item, dict) and "type" in item and item["type"] == "array":
315
+ if "type" in item["items"] and item["items"]["type"] == "record":
316
+ self.make_type_names_unique([item["items"]])
317
+ elif isinstance(item["items"], list):
318
+ self.make_type_names_unique(item["items"])
319
+ if isinstance(item, dict) and "type" in item and item["type"] == "map":
320
+ if "type" in item["values"] and item["values"]["type"] == "record":
321
+ self.make_type_names_unique([item["values"]])
322
+ elif isinstance(item["values"], list):
323
+ self.make_type_names_unique(item["values"])
324
+ elif isinstance(item, dict) and "type" in item and item["type"] == "record":
325
+ namespace = item.get("namespace", '')
326
+ type_name = base_name = item["name"]
327
+ record_name = item["namespace"]+"." + \
328
+ item["name"] if namespace else item["name"]
329
+ if record_name in self.generated_types:
330
+ i = 0
331
+ while record_name in self.generated_types:
332
+ i += 1
333
+ type_name = f"{base_name}{i}"
334
+ record_name = item["namespace"]+"." + \
335
+ type_name if namespace else type_name
336
+ self.generated_types.append(record_name)
337
+ else:
338
+ self.generated_types.append(record_name)
339
+ item["name"] = type_name
340
+ for field in item.get("fields", []):
341
+ if "type" in field and isinstance(field["type"], dict):
342
+ if field["type"]["type"] in ["record", "array", "map"]:
343
+ self.make_type_names_unique([field["type"]])
344
+ elif "type" in field and isinstance(field["type"], list):
345
+ self.make_type_names_unique(field["type"])
346
+
347
+ def process_all_tables(self):
348
+ """ Processes all tables in the database and returns a union schema."""
349
+ union_schema = []
350
+ tables_query = ".show tables | project TableName"
351
+ if self.single_table_name:
352
+ tables_query += f" | where TableName == '{self.single_table_name}'"
353
+ tables = self.client.execute(self.kusto_database, tables_query)
354
+ for row in tables.primary_results[0]:
355
+ table_name = row['TableName']
356
+ print(f"Processing table: {table_name}")
357
+ kusto_schema = self.fetch_table_schema_and_docs(table_name)
358
+ if kusto_schema:
359
+ avro_schema = self.kusto_to_avro_schema(
360
+ kusto_schema, table_name)
361
+ if isinstance(avro_schema, list):
362
+ union_schema.extend(avro_schema)
363
+ else:
364
+ union_schema.append(avro_schema)
365
+
366
+ output = None
367
+ if self.emit_xregistry:
368
+ xregistry_messages = {}
369
+ xregistry_schemas = {}
370
+ groupname = self.avro_namespace
371
+ for schema in union_schema:
372
+ self.generated_types = []
373
+ self.make_type_names_unique([schema])
374
+ ce_attribs: Dict[str, JsonNode] = {}
375
+ if "ce_attribs" in schema:
376
+ ce_attribs = schema.get("ce_attribs", {})
377
+ del schema["ce_attribs"]
378
+ schemaid = schema['ce_type'] if 'ce_type' in schema else f"{self.avro_namespace}.{schema['name']}"
379
+ schema_name = schemaid.rsplit('.', 1)[-1]
380
+ xregistry_schemas[schemaid] = {
381
+ "id": schemaid,
382
+ "name": schema_name,
383
+ "format": f"Avro/{AVRO_VERSION}",
384
+ "defaultversionid": "1",
385
+ "versions": {
386
+ "1": {
387
+ "id": "1",
388
+ "format": f"Avro/{AVRO_VERSION}",
389
+ "schema": schema
390
+ }
391
+ }
392
+ }
393
+ xregistry_messages[schemaid] = {
394
+ "id": schemaid,
395
+ "name": schema_name,
396
+ "envelope": "CloudEvents/1.0",
397
+ "envelopemetadata": {
398
+ "type": {
399
+ "value": schemaid
400
+ },
401
+ "source": {
402
+ "value": "{source}"
403
+ },
404
+ },
405
+ "schemaformat": f"Avro/{AVRO_VERSION}",
406
+ "schemauri": f"#/schemagroups/{groupname}/schemas/{schemaid}"
407
+ }
408
+ for key, value in ce_attribs.items():
409
+ # skip the required attributes
410
+ if key == "type" or key == "source" or key == "id" or key == "specversion":
411
+ continue
412
+ xregistry_messages[schemaid]["envelopemetadata"][key] = {
413
+ "type": value,
414
+ "required": True
415
+ }
416
+ output = {
417
+ "messagegroups": {
418
+ groupname: {
419
+ "id": groupname,
420
+ "messages": xregistry_messages
421
+ }
422
+ },
423
+ "schemagroups": {
424
+ groupname: {
425
+ "id": groupname,
426
+ "schemas": xregistry_schemas
427
+ }
428
+ }
429
+ }
430
+ else:
431
+ self.generated_types = []
432
+ self.make_type_names_unique(union_schema)
433
+ output = union_schema
434
+
435
+ # create the directory if it doesn't exist
436
+ base_dir = os.path.dirname(self.avro_schema_path)
437
+ if base_dir and not os.path.exists(base_dir):
438
+ os.makedirs(base_dir)
439
+ with open(self.avro_schema_path, 'w', encoding='utf-8') as avro_file:
440
+ json.dump(output, avro_file, indent=4)
441
+
442
+
443
+ def convert_kusto_to_avro(kusto_uri: str, kusto_database: str, table_name: str | None, avro_namespace: str, avro_schema_file: str, emit_cloudevents:bool, emit_cloudevents_xregistry: bool, token_provider=None):
444
+ """ Converts Kusto table schemas to Avro schema format."""
445
+
446
+ if not kusto_uri:
447
+ raise ValueError("kusto_uri is required")
448
+ if not kusto_database:
449
+ raise ValueError("kusto_database is required")
450
+ if not avro_namespace:
451
+ avro_namespace = kusto_database
452
+
453
+ kusto_to_avro = KustoToAvro(
454
+ kusto_uri, kusto_database, table_name, avro_namespace, avro_schema_file,emit_cloudevents, emit_cloudevents_xregistry, token_provider=token_provider)
455
+ return kusto_to_avro.process_all_tables()