structurize 3.5.5__tar.gz → 3.5.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {structurize-3.5.5/structurize.egg-info → structurize-3.5.7}/PKG-INFO +1 -1
  2. {structurize-3.5.5 → structurize-3.5.7}/avrotize/_version.py +3 -3
  3. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotojstruct.py +4 -0
  4. structurize-3.5.7/avrotize/avrototsml.py +349 -0
  5. {structurize-3.5.5 → structurize-3.5.7}/avrotize/commands.json +180 -0
  6. {structurize-3.5.5 → structurize-3.5.7}/avrotize/jstructtoavro.py +5 -0
  7. {structurize-3.5.5 → structurize-3.5.7}/avrotize/sqltoavro.py +98 -0
  8. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretopython.py +56 -5
  9. structurize-3.5.7/avrotize/structuretotsml.py +34 -0
  10. structurize-3.5.7/avrotize/tmslvalidate.py +317 -0
  11. {structurize-3.5.5 → structurize-3.5.7/structurize.egg-info}/PKG-INFO +1 -1
  12. {structurize-3.5.5 → structurize-3.5.7}/structurize.egg-info/SOURCES.txt +6 -0
  13. {structurize-3.5.5 → structurize-3.5.7}/.gitignore +0 -0
  14. {structurize-3.5.5 → structurize-3.5.7}/LICENSE +0 -0
  15. {structurize-3.5.5 → structurize-3.5.7}/MANIFEST.in +0 -0
  16. {structurize-3.5.5 → structurize-3.5.7}/README.md +0 -0
  17. {structurize-3.5.5 → structurize-3.5.7}/avrotize/__init__.py +0 -0
  18. {structurize-3.5.5 → structurize-3.5.7}/avrotize/__main__.py +0 -0
  19. {structurize-3.5.5 → structurize-3.5.7}/avrotize/asn1toavro.py +0 -0
  20. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotize.py +0 -0
  21. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotocpp.py +0 -0
  22. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotocsharp.py +0 -0
  23. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotocsv.py +0 -0
  24. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotodatapackage.py +0 -0
  25. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotodb.py +0 -0
  26. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotogo.py +0 -0
  27. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotographql.py +0 -0
  28. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotoiceberg.py +0 -0
  29. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotojava.py +0 -0
  30. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotojs.py +0 -0
  31. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotojsons.py +0 -0
  32. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotokusto.py +0 -0
  33. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotomd.py +0 -0
  34. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotools.py +0 -0
  35. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotoparquet.py +0 -0
  36. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotoproto.py +0 -0
  37. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotopython.py +0 -0
  38. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotorust.py +0 -0
  39. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotots.py +0 -0
  40. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrotoxsd.py +0 -0
  41. {structurize-3.5.5 → structurize-3.5.7}/avrotize/avrovalidator.py +0 -0
  42. {structurize-3.5.5 → structurize-3.5.7}/avrotize/cddltostructure.py +0 -0
  43. {structurize-3.5.5 → structurize-3.5.7}/avrotize/choice_inference.py +0 -0
  44. {structurize-3.5.5 → structurize-3.5.7}/avrotize/common.py +0 -0
  45. {structurize-3.5.5 → structurize-3.5.7}/avrotize/constants.py +0 -0
  46. {structurize-3.5.5 → structurize-3.5.7}/avrotize/csvtoavro.py +0 -0
  47. {structurize-3.5.5 → structurize-3.5.7}/avrotize/datapackagetoavro.py +0 -0
  48. {structurize-3.5.5 → structurize-3.5.7}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
  49. {structurize-3.5.5 → structurize-3.5.7}/avrotize/dependencies/typescript/node22/package.json +0 -0
  50. {structurize-3.5.5 → structurize-3.5.7}/avrotize/dependency_resolver.py +0 -0
  51. {structurize-3.5.5 → structurize-3.5.7}/avrotize/dependency_version.py +0 -0
  52. {structurize-3.5.5 → structurize-3.5.7}/avrotize/jsonstoavro.py +0 -0
  53. {structurize-3.5.5 → structurize-3.5.7}/avrotize/jsonstostructure.py +0 -0
  54. {structurize-3.5.5 → structurize-3.5.7}/avrotize/jsontoschema.py +0 -0
  55. {structurize-3.5.5 → structurize-3.5.7}/avrotize/kstructtoavro.py +0 -0
  56. {structurize-3.5.5 → structurize-3.5.7}/avrotize/kustotoavro.py +0 -0
  57. {structurize-3.5.5 → structurize-3.5.7}/avrotize/kustotojstruct.py +0 -0
  58. {structurize-3.5.5 → structurize-3.5.7}/avrotize/mcp_server.py +0 -0
  59. {structurize-3.5.5 → structurize-3.5.7}/avrotize/openapitostructure.py +0 -0
  60. {structurize-3.5.5 → structurize-3.5.7}/avrotize/parquettoavro.py +0 -0
  61. {structurize-3.5.5 → structurize-3.5.7}/avrotize/proto2parser.py +0 -0
  62. {structurize-3.5.5 → structurize-3.5.7}/avrotize/proto3parser.py +0 -0
  63. {structurize-3.5.5 → structurize-3.5.7}/avrotize/prototoavro.py +0 -0
  64. {structurize-3.5.5 → structurize-3.5.7}/avrotize/schema_inference.py +0 -0
  65. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretocddl.py +0 -0
  66. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretocpp.py +0 -0
  67. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretocsharp.py +0 -0
  68. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretocsv.py +0 -0
  69. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretodatapackage.py +0 -0
  70. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretodb.py +0 -0
  71. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretogo.py +0 -0
  72. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretographql.py +0 -0
  73. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretoiceberg.py +0 -0
  74. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretojava.py +0 -0
  75. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretojs.py +0 -0
  76. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretojsons.py +0 -0
  77. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretokusto.py +0 -0
  78. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretomd.py +0 -0
  79. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretoproto.py +0 -0
  80. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretorust.py +0 -0
  81. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretots.py +0 -0
  82. {structurize-3.5.5 → structurize-3.5.7}/avrotize/structuretoxsd.py +0 -0
  83. {structurize-3.5.5 → structurize-3.5.7}/avrotize/validate.py +0 -0
  84. {structurize-3.5.5 → structurize-3.5.7}/avrotize/xmltoschema.py +0 -0
  85. {structurize-3.5.5 → structurize-3.5.7}/avrotize/xsdtoavro.py +0 -0
  86. {structurize-3.5.5 → structurize-3.5.7}/build.ps1 +0 -0
  87. {structurize-3.5.5 → structurize-3.5.7}/build.sh +0 -0
  88. {structurize-3.5.5 → structurize-3.5.7}/pyproject.toml +0 -0
  89. {structurize-3.5.5 → structurize-3.5.7}/setup.cfg +0 -0
  90. {structurize-3.5.5 → structurize-3.5.7}/structurize.egg-info/dependency_links.txt +0 -0
  91. {structurize-3.5.5 → structurize-3.5.7}/structurize.egg-info/entry_points.txt +0 -0
  92. {structurize-3.5.5 → structurize-3.5.7}/structurize.egg-info/requires.txt +0 -0
  93. {structurize-3.5.5 → structurize-3.5.7}/structurize.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structurize
3
- Version: 3.5.5
3
+ Version: 3.5.7
4
4
  Summary: Tools to convert from and to JSON Structure from various other schema languages.
5
5
  Author-email: Clemens Vasters <clemensv@microsoft.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
18
18
  commit_id: str | None
19
19
  __commit_id__: str | None
20
20
 
21
- __version__ = version = '3.5.5'
22
- __version_tuple__ = version_tuple = (3, 5, 5)
21
+ __version__ = version = '3.5.7'
22
+ __version_tuple__ = version_tuple = (3, 5, 7)
23
23
 
24
- __commit_id__ = commit_id = 'g2c742c4cc'
24
+ __commit_id__ = commit_id = 'gaf486f61f'
@@ -132,6 +132,10 @@ class AvroToJsonStructure:
132
132
  props = {"name": name, "type": "object", "properties": {}, "required": []}
133
133
  if "doc" in avro_schema:
134
134
  props["description"] = avro_schema["doc"]
135
+ if isinstance(avro_schema.get("unique"), list):
136
+ props["x-avrotize-unique"] = avro_schema["unique"]
137
+ if isinstance(avro_schema.get("foreignKeys"), list):
138
+ props["x-avrotize-foreignKeys"] = avro_schema["foreignKeys"]
135
139
 
136
140
  # Namespace for resolving field types within this record
137
141
  record_fields_namespace = avro_schema.get("namespace", namespace)
@@ -0,0 +1,349 @@
1
+ """Convert an Avro schema to a Tabular Model Scripting Language (TMSL) schema."""
2
+
3
+ import json
4
+ import sys
5
+ from typing import Any, Dict, List, Optional, Tuple
6
+
7
+ JsonNode = Dict[str, "JsonNode"] | List["JsonNode"] | str | bool | int | float | None
8
+
9
+
10
+ class AvroToTmslConverter:
11
+ """Class to convert Avro schema to TMSL schema."""
12
+
13
+ def __init__(self: "AvroToTmslConverter") -> None:
14
+ self.named_type_cache: Dict[str, Dict[str, Any]] = {}
15
+
16
+ def get_fullname(self, namespace: str, name: str) -> str:
17
+ """Get fully-qualified type name."""
18
+ return f"{namespace}.{name}" if namespace else name
19
+
20
+ def cache_named_types(self, avro_type: JsonNode, namespace: str = "") -> None:
21
+ """Cache named Avro types for reference resolution."""
22
+ if isinstance(avro_type, list):
23
+ for item in avro_type:
24
+ self.cache_named_types(item, namespace)
25
+ return
26
+
27
+ if isinstance(avro_type, dict):
28
+ current_namespace = str(avro_type.get("namespace", namespace))
29
+ type_name = avro_type.get("name")
30
+ if isinstance(type_name, str):
31
+ fullname = self.get_fullname(current_namespace, type_name)
32
+ self.named_type_cache[fullname] = avro_type
33
+ self.named_type_cache[type_name] = avro_type
34
+
35
+ avro_kind = avro_type.get("type")
36
+ if avro_kind == "record":
37
+ for field in avro_type.get("fields", []):
38
+ if isinstance(field, dict) and "type" in field:
39
+ self.cache_named_types(field["type"], current_namespace)
40
+ elif avro_kind == "array":
41
+ self.cache_named_types(avro_type.get("items"), current_namespace)
42
+ elif avro_kind == "map":
43
+ self.cache_named_types(avro_type.get("values"), current_namespace)
44
+
45
+ def map_avro_type_to_tmsl(self, avro_type: JsonNode) -> Tuple[str, bool]:
46
+ """Map an Avro field type to a TMSL data type and nullability."""
47
+ if isinstance(avro_type, list):
48
+ non_null_types = [item for item in avro_type if item != "null"]
49
+ nullable = len(non_null_types) != len(avro_type)
50
+ if not non_null_types:
51
+ return "string", True
52
+ if len(non_null_types) == 1:
53
+ mapped_type, _ = self.map_avro_type_to_tmsl(non_null_types[0])
54
+ return mapped_type, True if nullable else False
55
+ return "variant", True
56
+
57
+ if isinstance(avro_type, dict):
58
+ avro_kind = avro_type.get("type")
59
+
60
+ if avro_kind == "record":
61
+ return "variant", False
62
+ if avro_kind in ["array", "map"]:
63
+ return "variant", False
64
+ if avro_kind == "enum":
65
+ return "string", False
66
+ if avro_kind == "fixed":
67
+ return "binary", False
68
+
69
+ logical_type = avro_type.get("logicalType")
70
+ if logical_type in ["timestamp-millis", "timestamp-micros", "date", "time-millis", "time-micros"]:
71
+ return "dateTime", False
72
+ if logical_type == "decimal":
73
+ return "decimal", False
74
+
75
+ if isinstance(avro_kind, (str, dict, list)):
76
+ return self.map_avro_type_to_tmsl(avro_kind)
77
+ return "string", False
78
+
79
+ if isinstance(avro_type, str):
80
+ if avro_type in ["boolean"]:
81
+ return "boolean", False
82
+ if avro_type in ["int", "long"]:
83
+ return "int64", False
84
+ if avro_type in ["float", "double"]:
85
+ return "double", False
86
+ if avro_type == "bytes":
87
+ return "binary", False
88
+ if avro_type in ["string", "null"]:
89
+ return "string", avro_type == "null"
90
+
91
+ referenced = self.named_type_cache.get(avro_type)
92
+ if referenced is not None:
93
+ return self.map_avro_type_to_tmsl(referenced)
94
+
95
+ return "variant", False
96
+
97
+ return "string", False
98
+
99
+ def resolve_root_record(self, schema: JsonNode, avro_record_type: Optional[str]) -> Dict[str, Any]:
100
+ """Resolve the root record from a schema document."""
101
+ if isinstance(schema, dict):
102
+ if schema.get("type") != "record":
103
+ print("Expected an Avro schema with a root type of 'record'")
104
+ sys.exit(1)
105
+ return schema
106
+
107
+ if isinstance(schema, list):
108
+ if avro_record_type:
109
+ for candidate in schema:
110
+ if not isinstance(candidate, dict):
111
+ continue
112
+ if candidate.get("type") != "record":
113
+ continue
114
+ record_name = str(candidate.get("name", ""))
115
+ namespace = str(candidate.get("namespace", ""))
116
+ fullname = self.get_fullname(namespace, record_name)
117
+ if avro_record_type in [record_name, fullname]:
118
+ return candidate
119
+ print(f"No top-level record type {avro_record_type} found in the Avro schema")
120
+ sys.exit(1)
121
+
122
+ for candidate in schema:
123
+ if isinstance(candidate, dict) and candidate.get("type") == "record":
124
+ return candidate
125
+
126
+ print("Expected at least one Avro 'record' schema in the schema list")
127
+ sys.exit(1)
128
+
129
+ print("Expected an Avro schema as a JSON object or a list of schema records")
130
+ sys.exit(1)
131
+
132
+ def resolve_records(self, schema: JsonNode, avro_record_type: Optional[str]) -> List[Dict[str, Any]]:
133
+ """Resolve one or more record schemas from the input document."""
134
+ if isinstance(schema, dict):
135
+ return [self.resolve_root_record(schema, avro_record_type)]
136
+
137
+ if isinstance(schema, list):
138
+ if avro_record_type:
139
+ return [self.resolve_root_record(schema, avro_record_type)]
140
+
141
+ records = [item for item in schema if isinstance(item, dict) and item.get("type") == "record"]
142
+ if records:
143
+ return records
144
+
145
+ print("Expected one or more Avro 'record' schemas")
146
+ sys.exit(1)
147
+
148
+ def build_table(self, record: Dict[str, Any], emit_cloudevents_columns: bool) -> Dict[str, Any]:
149
+ """Build a TMSL table object from an Avro record."""
150
+ table_name = str(record.get("name", "Table"))
151
+ unique_columns = set(str(column) for column in record.get("unique", []) if isinstance(column, str))
152
+
153
+ columns: List[Dict[str, Any]] = []
154
+ for field in record.get("fields", []):
155
+ if not isinstance(field, dict):
156
+ continue
157
+ field_name = str(field.get("name", ""))
158
+ if not field_name:
159
+ continue
160
+ data_type, nullable = self.map_avro_type_to_tmsl(field.get("type"))
161
+ column: Dict[str, Any] = {
162
+ "name": field_name,
163
+ "dataType": data_type,
164
+ "sourceColumn": field_name,
165
+ }
166
+ if field_name in unique_columns:
167
+ column["isKey"] = True
168
+ if nullable:
169
+ column["isNullable"] = True
170
+ columns.append(column)
171
+
172
+ if emit_cloudevents_columns:
173
+ columns.extend([
174
+ {"name": "___type", "dataType": "string", "sourceColumn": "___type", "isNullable": True},
175
+ {"name": "___source", "dataType": "string", "sourceColumn": "___source", "isNullable": True},
176
+ {"name": "___id", "dataType": "string", "sourceColumn": "___id", "isNullable": True},
177
+ {"name": "___time", "dataType": "dateTime", "sourceColumn": "___time", "isNullable": True},
178
+ {"name": "___subject", "dataType": "string", "sourceColumn": "___subject", "isNullable": True},
179
+ ])
180
+
181
+ return {
182
+ "name": table_name,
183
+ "columns": columns,
184
+ }
185
+
186
+ def _record_sql_identifier(self, record: Dict[str, Any]) -> str | None:
187
+ """Get SQL table identifier from Avro altnames metadata, if available."""
188
+ altnames = record.get("altnames")
189
+ if isinstance(altnames, dict):
190
+ sql_name = altnames.get("sql")
191
+ if isinstance(sql_name, str) and sql_name:
192
+ return sql_name
193
+ return None
194
+
195
+ def build_relationships(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
196
+ """Build TMSL relationships from Avro foreignKeys metadata."""
197
+ table_by_name = {
198
+ str(record.get("name")): str(record.get("name"))
199
+ for record in records
200
+ if isinstance(record.get("name"), str)
201
+ }
202
+ table_by_sql = {}
203
+ for record in records:
204
+ record_name = record.get("name")
205
+ if not isinstance(record_name, str) or not record_name:
206
+ continue
207
+ sql_identifier = self._record_sql_identifier(record)
208
+ if sql_identifier:
209
+ table_by_sql[sql_identifier] = record_name
210
+
211
+ relationships: List[Dict[str, Any]] = []
212
+ relationship_names: set[str] = set()
213
+
214
+ for record in records:
215
+ from_table = record.get("name")
216
+ if not isinstance(from_table, str) or not from_table:
217
+ continue
218
+
219
+ foreign_keys = record.get("foreignKeys")
220
+ if not isinstance(foreign_keys, list):
221
+ continue
222
+
223
+ for fk in foreign_keys:
224
+ if not isinstance(fk, dict):
225
+ continue
226
+
227
+ columns = fk.get("columns")
228
+ referenced_columns = fk.get("referencedColumns")
229
+ if not isinstance(columns, list) or not isinstance(referenced_columns, list):
230
+ continue
231
+ if len(columns) != len(referenced_columns) or len(columns) == 0:
232
+ continue
233
+
234
+ target_table = None
235
+ referenced_table_sql = fk.get("referencedTableSql")
236
+ if isinstance(referenced_table_sql, str):
237
+ target_table = table_by_sql.get(referenced_table_sql)
238
+
239
+ if not target_table:
240
+ referenced_table = fk.get("referencedTable")
241
+ if isinstance(referenced_table, str):
242
+ target_table = table_by_name.get(referenced_table)
243
+
244
+ if not target_table:
245
+ continue
246
+
247
+ for from_column, to_column in zip(columns, referenced_columns):
248
+ if not isinstance(from_column, str) or not isinstance(to_column, str):
249
+ continue
250
+
251
+ relationship_name = f"{from_table}_{from_column}_to_{target_table}_{to_column}"
252
+ if relationship_name in relationship_names:
253
+ continue
254
+
255
+ relationships.append(
256
+ {
257
+ "name": relationship_name,
258
+ "fromTable": from_table,
259
+ "fromColumn": from_column,
260
+ "toTable": target_table,
261
+ "toColumn": to_column,
262
+ }
263
+ )
264
+ relationship_names.add(relationship_name)
265
+
266
+ return relationships
267
+
268
+ def build_tmsl_schema(
269
+ self,
270
+ avro_schema: JsonNode,
271
+ avro_record_type: Optional[str] = None,
272
+ database_name: str = "",
273
+ compatibility_level: int = 1605,
274
+ emit_cloudevents_columns: bool = False,
275
+ ) -> Dict[str, Any]:
276
+ """Build a TMSL JSON document from an Avro schema document."""
277
+ self.cache_named_types(avro_schema)
278
+ records = self.resolve_records(avro_schema, avro_record_type)
279
+
280
+ tables = [self.build_table(record, emit_cloudevents_columns) for record in records]
281
+ first_table_name = str(tables[0].get("name", "Database")) if tables else "Database"
282
+ database = database_name or first_table_name
283
+ relationships = self.build_relationships(records)
284
+
285
+ model: Dict[str, Any] = {
286
+ "culture": "en-US",
287
+ "tables": tables,
288
+ }
289
+ if relationships:
290
+ model["relationships"] = relationships
291
+
292
+ return {
293
+ "createOrReplace": {
294
+ "object": {"database": database},
295
+ "database": {
296
+ "name": database,
297
+ "compatibilityLevel": compatibility_level,
298
+ "model": model,
299
+ },
300
+ }
301
+ }
302
+
303
+ def convert_avro_to_tmsl(
304
+ self,
305
+ avro_schema_path: str,
306
+ avro_record_type: Optional[str],
307
+ tmsl_file_path: str,
308
+ database_name: str = "",
309
+ compatibility_level: int = 1605,
310
+ emit_cloudevents_columns: bool = False,
311
+ ) -> None:
312
+ """Convert an Avro schema file to a TMSL JSON file."""
313
+ if not avro_schema_path:
314
+ print("Please specify the avro schema file")
315
+ sys.exit(1)
316
+
317
+ with open(avro_schema_path, "r", encoding="utf-8") as f:
318
+ schema = json.load(f)
319
+
320
+ tmsl_schema = self.build_tmsl_schema(
321
+ schema,
322
+ avro_record_type=avro_record_type,
323
+ database_name=database_name,
324
+ compatibility_level=compatibility_level,
325
+ emit_cloudevents_columns=emit_cloudevents_columns,
326
+ )
327
+
328
+ with open(tmsl_file_path, "w", encoding="utf-8") as f:
329
+ json.dump(tmsl_schema, f, indent=2)
330
+
331
+
332
+ def convert_avro_to_tmsl(
333
+ avro_schema_path: str,
334
+ avro_record_type: Optional[str],
335
+ tmsl_file_path: str,
336
+ database_name: str = "",
337
+ compatibility_level: int = 1605,
338
+ emit_cloudevents_columns: bool = False,
339
+ ) -> None:
340
+ """Convert an Avro schema file to a TMSL JSON file."""
341
+ converter = AvroToTmslConverter()
342
+ converter.convert_avro_to_tmsl(
343
+ avro_schema_path,
344
+ avro_record_type,
345
+ tmsl_file_path,
346
+ database_name,
347
+ compatibility_level,
348
+ emit_cloudevents_columns,
349
+ )
@@ -1704,6 +1704,39 @@
1704
1704
  ],
1705
1705
  "prompts": []
1706
1706
  },
1707
+ {
1708
+ "command": "validate-tmsl",
1709
+ "description": "Validate TMSL scripts locally against documented object structure",
1710
+ "group": "7_Utility",
1711
+ "function": {
1712
+ "name": "avrotize.tmslvalidate.validate_tmsl",
1713
+ "args": {
1714
+ "tmsl_file_path": "input_file_path",
1715
+ "quiet": "args.quiet"
1716
+ }
1717
+ },
1718
+ "extensions": [
1719
+ ".tmsl.json",
1720
+ ".json"
1721
+ ],
1722
+ "args": [
1723
+ {
1724
+ "name": "input",
1725
+ "type": "str",
1726
+ "nargs": "?",
1727
+ "help": "Path to the TMSL JSON file (or read from stdin if omitted)",
1728
+ "required": false
1729
+ },
1730
+ {
1731
+ "name": "--quiet",
1732
+ "type": "bool",
1733
+ "help": "Suppress output. Exit code 0 if valid, 1 if invalid.",
1734
+ "default": false,
1735
+ "required": false
1736
+ }
1737
+ ],
1738
+ "prompts": []
1739
+ },
1707
1740
  {
1708
1741
  "command": "a2mongo",
1709
1742
  "description": "Convert Avrotize schema to MongoDB schema",
@@ -1945,6 +1978,153 @@
1945
1978
  }
1946
1979
  ]
1947
1980
  },
1981
+ {
1982
+ "command": "a2tsml",
1983
+ "description": "Convert Avrotize schema to Tabular Model Scripting Language (TMSL) schema",
1984
+ "group": "3_Datalake",
1985
+ "function": {
1986
+ "name": "avrotize.avrototsml.convert_avro_to_tmsl",
1987
+ "args": {
1988
+ "avro_schema_path": "input_file_path",
1989
+ "tmsl_file_path": "output_file_path",
1990
+ "avro_record_type": "args.record_type",
1991
+ "database_name": "args.database_name",
1992
+ "compatibility_level": "args.compatibility_level",
1993
+ "emit_cloudevents_columns": "args.emit_cloudevents_columns"
1994
+ }
1995
+ },
1996
+ "extensions": [
1997
+ ".avsc"
1998
+ ],
1999
+ "args": [
2000
+ {
2001
+ "name": "input",
2002
+ "type": "str",
2003
+ "nargs": "?",
2004
+ "help": "Path to the Avrotize schema file (or read from stdin if omitted)",
2005
+ "required": false
2006
+ },
2007
+ {
2008
+ "name": "--out",
2009
+ "type": "str",
2010
+ "help": "Path to the TMSL schema JSON file",
2011
+ "required": false
2012
+ },
2013
+ {
2014
+ "name": "--avsc",
2015
+ "type": "str",
2016
+ "help": "Deprecated: Path to the Avrotize schema file (for backcompat)",
2017
+ "required": false
2018
+ },
2019
+ {
2020
+ "name": "--record-type",
2021
+ "type": "str",
2022
+ "help": "Record type in the Avrotize schema",
2023
+ "required": false
2024
+ },
2025
+ {
2026
+ "name": "--database-name",
2027
+ "type": "str",
2028
+ "help": "Tabular model database name (defaults to the selected record type name)",
2029
+ "required": false,
2030
+ "default": ""
2031
+ },
2032
+ {
2033
+ "name": "--compatibility-level",
2034
+ "type": "int",
2035
+ "help": "Tabular model compatibility level",
2036
+ "required": false,
2037
+ "default": 1605
2038
+ },
2039
+ {
2040
+ "name": "--emit-cloudevents-columns",
2041
+ "type": "bool",
2042
+ "help": "Add CloudEvents columns to the TMSL table",
2043
+ "default": false,
2044
+ "required": false
2045
+ }
2046
+ ],
2047
+ "suggested_output_file_path": "{input_file_name}.tmsl.json",
2048
+ "prompts": [
2049
+ {
2050
+ "name": "--emit-cloudevents-columns",
2051
+ "message": "Add CloudEvents columns to the TMSL schema?",
2052
+ "type": "bool",
2053
+ "default": false
2054
+ }
2055
+ ]
2056
+ },
2057
+ {
2058
+ "command": "s2tsml",
2059
+ "description": "Convert JSON Structure to Tabular Model Scripting Language (TMSL) schema",
2060
+ "group": "3_Datalake",
2061
+ "function": {
2062
+ "name": "avrotize.structuretotsml.convert_structure_to_tmsl",
2063
+ "args": {
2064
+ "structure_schema_path": "input_file_path",
2065
+ "tmsl_file_path": "output_file_path",
2066
+ "structure_record_type": "args.record_type",
2067
+ "database_name": "args.database_name",
2068
+ "compatibility_level": "args.compatibility_level",
2069
+ "emit_cloudevents_columns": "args.emit_cloudevents_columns"
2070
+ }
2071
+ },
2072
+ "extensions": [
2073
+ ".struct.json",
2074
+ ".json"
2075
+ ],
2076
+ "args": [
2077
+ {
2078
+ "name": "input",
2079
+ "type": "str",
2080
+ "nargs": "?",
2081
+ "help": "Path to the JSON Structure schema file (or read from stdin if omitted)",
2082
+ "required": false
2083
+ },
2084
+ {
2085
+ "name": "--out",
2086
+ "type": "str",
2087
+ "help": "Path to the TMSL schema JSON file",
2088
+ "required": false
2089
+ },
2090
+ {
2091
+ "name": "--record-type",
2092
+ "type": "str",
2093
+ "help": "Record type in the JSON Structure schema",
2094
+ "required": false
2095
+ },
2096
+ {
2097
+ "name": "--database-name",
2098
+ "type": "str",
2099
+ "help": "Tabular model database name (defaults to the selected record type name)",
2100
+ "required": false,
2101
+ "default": ""
2102
+ },
2103
+ {
2104
+ "name": "--compatibility-level",
2105
+ "type": "int",
2106
+ "help": "Tabular model compatibility level",
2107
+ "required": false,
2108
+ "default": 1605
2109
+ },
2110
+ {
2111
+ "name": "--emit-cloudevents-columns",
2112
+ "type": "bool",
2113
+ "help": "Add CloudEvents columns to the TMSL table",
2114
+ "default": false,
2115
+ "required": false
2116
+ }
2117
+ ],
2118
+ "suggested_output_file_path": "{input_file_name}.tmsl.json",
2119
+ "prompts": [
2120
+ {
2121
+ "name": "--emit-cloudevents-columns",
2122
+ "message": "Add CloudEvents columns to the TMSL schema?",
2123
+ "type": "bool",
2124
+ "default": false
2125
+ }
2126
+ ]
2127
+ },
1948
2128
  {
1949
2129
  "command": "pq2a",
1950
2130
  "description": "Convert Parquet schema to Avrotize schema",
@@ -364,6 +364,11 @@ class JsonStructureToAvro:
364
364
 
365
365
  if 'description' in merged_schema:
366
366
  avro_record['doc'] = merged_schema['description']
367
+
368
+ if isinstance(merged_schema.get('x-avrotize-unique'), list):
369
+ avro_record['unique'] = merged_schema['x-avrotize-unique']
370
+ if isinstance(merged_schema.get('x-avrotize-foreignKeys'), list):
371
+ avro_record['foreignKeys'] = merged_schema['x-avrotize-foreignKeys']
367
372
 
368
373
  # Convert properties to fields
369
374
  properties = merged_schema.get('properties', {})