structurize 2.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. avrotize/__init__.py +64 -0
  2. avrotize/__main__.py +6 -0
  3. avrotize/_version.py +34 -0
  4. avrotize/asn1toavro.py +160 -0
  5. avrotize/avrotize.py +152 -0
  6. avrotize/avrotocpp.py +483 -0
  7. avrotize/avrotocsharp.py +1075 -0
  8. avrotize/avrotocsv.py +121 -0
  9. avrotize/avrotodatapackage.py +173 -0
  10. avrotize/avrotodb.py +1383 -0
  11. avrotize/avrotogo.py +476 -0
  12. avrotize/avrotographql.py +197 -0
  13. avrotize/avrotoiceberg.py +210 -0
  14. avrotize/avrotojava.py +2156 -0
  15. avrotize/avrotojs.py +250 -0
  16. avrotize/avrotojsons.py +481 -0
  17. avrotize/avrotojstruct.py +345 -0
  18. avrotize/avrotokusto.py +364 -0
  19. avrotize/avrotomd.py +137 -0
  20. avrotize/avrotools.py +168 -0
  21. avrotize/avrotoparquet.py +208 -0
  22. avrotize/avrotoproto.py +359 -0
  23. avrotize/avrotopython.py +624 -0
  24. avrotize/avrotorust.py +435 -0
  25. avrotize/avrotots.py +598 -0
  26. avrotize/avrotoxsd.py +344 -0
  27. avrotize/cddltostructure.py +1841 -0
  28. avrotize/commands.json +3337 -0
  29. avrotize/common.py +834 -0
  30. avrotize/constants.py +72 -0
  31. avrotize/csvtoavro.py +132 -0
  32. avrotize/datapackagetoavro.py +76 -0
  33. avrotize/dependencies/cpp/vcpkg/vcpkg.json +19 -0
  34. avrotize/dependencies/typescript/node22/package.json +16 -0
  35. avrotize/dependency_resolver.py +348 -0
  36. avrotize/dependency_version.py +432 -0
  37. avrotize/jsonstoavro.py +2167 -0
  38. avrotize/jsonstostructure.py +2642 -0
  39. avrotize/jstructtoavro.py +878 -0
  40. avrotize/kstructtoavro.py +93 -0
  41. avrotize/kustotoavro.py +455 -0
  42. avrotize/parquettoavro.py +157 -0
  43. avrotize/proto2parser.py +498 -0
  44. avrotize/proto3parser.py +403 -0
  45. avrotize/prototoavro.py +382 -0
  46. avrotize/structuretocddl.py +597 -0
  47. avrotize/structuretocpp.py +697 -0
  48. avrotize/structuretocsharp.py +2295 -0
  49. avrotize/structuretocsv.py +365 -0
  50. avrotize/structuretodatapackage.py +659 -0
  51. avrotize/structuretodb.py +1125 -0
  52. avrotize/structuretogo.py +720 -0
  53. avrotize/structuretographql.py +502 -0
  54. avrotize/structuretoiceberg.py +355 -0
  55. avrotize/structuretojava.py +853 -0
  56. avrotize/structuretojsons.py +498 -0
  57. avrotize/structuretokusto.py +639 -0
  58. avrotize/structuretomd.py +322 -0
  59. avrotize/structuretoproto.py +764 -0
  60. avrotize/structuretopython.py +772 -0
  61. avrotize/structuretorust.py +714 -0
  62. avrotize/structuretots.py +653 -0
  63. avrotize/structuretoxsd.py +679 -0
  64. avrotize/xsdtoavro.py +413 -0
  65. structurize-2.19.0.dist-info/METADATA +107 -0
  66. structurize-2.19.0.dist-info/RECORD +70 -0
  67. structurize-2.19.0.dist-info/WHEEL +5 -0
  68. structurize-2.19.0.dist-info/entry_points.txt +2 -0
  69. structurize-2.19.0.dist-info/licenses/LICENSE +201 -0
  70. structurize-2.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1125 @@
1
+ """
2
+ Convert JSON Structure schema to SQL schema for various databases.
3
+ """
4
+
5
+ import json
6
+ import sys
7
+ import os
8
+ from typing import Dict, List, Optional, Any, cast, Set
9
+
10
+ from avrotize.common import altname
11
+
12
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | float | None
13
+
14
+
15
+ class SchemaConverter:
16
+ """Helper class to manage schema conversion with support for $ref, $extends, and definitions"""
17
+
18
+ def __init__(self, schema_list: List[Dict[str, JsonNode]]):
19
+ self.schema_list = schema_list if isinstance(schema_list, list) else [schema_list]
20
+ self.schema_registry: Dict[str, Dict] = {}
21
+ self.definitions: Dict[str, Dict] = {}
22
+ self.generated_tables: Set[str] = set()
23
+
24
+ # Register all schemas with $id
25
+ for schema in self.schema_list:
26
+ self.register_schema_ids(schema)
27
+ # Extract definitions from schema
28
+ if isinstance(schema, dict) and 'definitions' in schema:
29
+ self.definitions.update(schema['definitions'])
30
+
31
+ def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
32
+ """Recursively registers schemas with $id keywords"""
33
+ if not isinstance(schema, dict):
34
+ return
35
+
36
+ if '$id' in schema:
37
+ schema_id = schema['$id']
38
+ if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
39
+ from urllib.parse import urljoin
40
+ schema_id = urljoin(base_uri, schema_id)
41
+ self.schema_registry[schema_id] = schema
42
+ base_uri = schema_id
43
+
44
+ # Register definitions
45
+ if 'definitions' in schema:
46
+ for def_name, def_schema in schema['definitions'].items():
47
+ if isinstance(def_schema, dict):
48
+ self.register_schema_ids(def_schema, base_uri)
49
+
50
+ # Register properties
51
+ if 'properties' in schema:
52
+ for prop_name, prop_schema in schema['properties'].items():
53
+ if isinstance(prop_schema, dict):
54
+ self.register_schema_ids(prop_schema, base_uri)
55
+
56
+ def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None) -> Optional[Dict]:
57
+ """Resolves a $ref to the actual schema definition"""
58
+ # Check if it's an absolute URI reference
59
+ if not ref.startswith('#/'):
60
+ if ref in self.schema_registry:
61
+ return self.schema_registry[ref]
62
+ return None
63
+
64
+ # Handle fragment-only references (internal to document)
65
+ path = ref[2:].split('/')
66
+ schema = context_schema if context_schema else (self.schema_list[0] if self.schema_list else {})
67
+
68
+ for part in path:
69
+ if not isinstance(schema, dict) or part not in schema:
70
+ return None
71
+ schema = schema[part]
72
+
73
+ return schema
74
+
75
+ def get_all_properties(self, schema: Dict) -> Dict[str, Any]:
76
+ """Gets all properties including inherited ones from $extends"""
77
+ properties = {}
78
+
79
+ # First, recursively get base class properties if $extends exists
80
+ if '$extends' in schema:
81
+ base_ref = schema['$extends']
82
+ base_schema = self.resolve_ref(base_ref, schema)
83
+ if base_schema:
84
+ # Recursively get base properties
85
+ properties.update(self.get_all_properties(base_schema))
86
+
87
+ # Then add/override with current schema properties
88
+ if 'properties' in schema:
89
+ properties.update(schema['properties'])
90
+
91
+ return properties
92
+
93
+ def get_all_required(self, schema: Dict) -> List[str]:
94
+ """Gets all required properties including inherited ones from $extends"""
95
+ required = []
96
+
97
+ # First, recursively get base class required if $extends exists
98
+ if '$extends' in schema:
99
+ base_ref = schema['$extends']
100
+ base_schema = self.resolve_ref(base_ref, schema)
101
+ if base_schema:
102
+ required.extend(self.get_all_required(base_schema))
103
+
104
+ # Then add current schema required
105
+ if 'required' in schema:
106
+ schema_required = schema['required']
107
+ if isinstance(schema_required, list):
108
+ required.extend(schema_required)
109
+
110
+ return list(set(required)) # Remove duplicates
111
+
112
+ def should_generate_table(self, schema: Dict) -> bool:
113
+ """Determines if a table should be generated for this schema"""
114
+ # Only generate tables for object and tuple types
115
+ schema_type = schema.get('type')
116
+ if schema_type not in ['object', 'tuple']:
117
+ return False
118
+
119
+ # Don't generate tables for abstract types (they're only for inheritance)
120
+ if schema.get('abstract', False):
121
+ return False
122
+
123
+ return True
124
+
125
+
126
+ def convert_structure_to_sql(structure_schema_path: str, dbscript_file_path: str, db_dialect: str,
127
+ emit_cloudevents_columns: bool = False, schema_name: str = ''):
128
+ """
129
+ Converts a JSON Structure schema to database schema for the specified DB dialect.
130
+
131
+ Args:
132
+ structure_schema_path (str): Path to the JSON Structure schema file.
133
+ dbscript_file_path (str): Path to the output SQL file.
134
+ db_dialect (str): SQL/DB dialect. Supported: 'sqlserver', 'postgres', 'mysql', 'mariadb',
135
+ 'sqlite', 'oracle', 'db2', 'sqlanywhere', 'bigquery', 'snowflake',
136
+ 'redshift', 'cassandra', 'mongodb', 'dynamodb', 'elasticsearch',
137
+ 'couchdb', 'neo4j', 'firebase', 'cosmosdb', 'hbase'.
138
+ emit_cloudevents_columns (bool): Whether to include cloud events columns.
139
+ schema_name (str): Schema name (optional).
140
+
141
+ Raises:
142
+ ValueError: If the SQL dialect is unsupported.
143
+ """
144
+ if db_dialect not in ["sqlserver", "postgres", "mysql", "mariadb", "sqlite", "oracle", "db2",
145
+ "sqlanywhere", "bigquery", "snowflake", "redshift", "cassandra", "mongodb",
146
+ "dynamodb", "elasticsearch", "couchdb", "neo4j", "firebase", "cosmosdb", "hbase"]:
147
+ print(f"Unsupported SQL dialect: {db_dialect}")
148
+ sys.exit(1)
149
+
150
+ if not structure_schema_path:
151
+ print("Please specify the structure schema file")
152
+ sys.exit(1)
153
+
154
+ with open(structure_schema_path, "r", encoding="utf-8") as f:
155
+ schema_json = f.read()
156
+
157
+ schema_list = schema = json.loads(schema_json)
158
+ if not isinstance(schema_list, list):
159
+ schema_list = [schema_list]
160
+
161
+ # Create converter with support for $ref and $extends
162
+ converter = SchemaConverter(schema_list)
163
+
164
+ tables_sql = []
165
+ # Process all schemas and generate tables only for object/tuple types
166
+ for schema in schema_list:
167
+ if not isinstance(schema, dict):
168
+ continue
169
+
170
+ # Only generate tables for object and tuple types (not abstract)
171
+ if converter.should_generate_table(schema):
172
+ tables_sql.extend(generate_sql(
173
+ schema, db_dialect, emit_cloudevents_columns, converter, schema_name))
174
+
175
+ # Also process definitions if present
176
+ if 'definitions' in schema:
177
+ for def_name, def_schema in schema['definitions'].items():
178
+ if isinstance(def_schema, dict) and converter.should_generate_table(def_schema):
179
+ # Add name if not present
180
+ if 'name' not in def_schema:
181
+ def_schema['name'] = def_name
182
+ tables_sql.extend(generate_sql(
183
+ def_schema, db_dialect, emit_cloudevents_columns, converter, schema_name))
184
+
185
+ with open(dbscript_file_path, "w", encoding="utf-8") as sql_file:
186
+ sql_file.write("\n".join(tables_sql))
187
+
188
+
189
+ def generate_sql(
190
+ schema: Dict[str, JsonNode],
191
+ sql_dialect: str,
192
+ emit_cloudevents_columns: bool,
193
+ converter: SchemaConverter,
194
+ schema_name: str = '') -> List[str]:
195
+ """
196
+ Generates SQL schema statements for the given JSON Structure schema.
197
+
198
+ Args:
199
+ schema (dict): JSON Structure schema.
200
+ sql_dialect (str): SQL dialect.
201
+ emit_cloudevents_columns (bool): Whether to include cloud events columns.
202
+ converter (SchemaConverter): Schema converter with $ref/$extends support.
203
+ schema_name (str): Schema name (optional).
204
+
205
+ Returns:
206
+ list: List of SQL statements.
207
+ """
208
+ if sql_dialect in ["sqlserver", "postgres", "mysql", "mariadb", "sqlite", "oracle", "db2", "sqlanywhere",
209
+ "bigquery", "snowflake", "redshift"]:
210
+ return generate_relational_sql(schema, sql_dialect, emit_cloudevents_columns, converter, schema_name)
211
+ elif sql_dialect == "cassandra":
212
+ return generate_cassandra_schema(schema, emit_cloudevents_columns, converter, schema_name)
213
+ else:
214
+ raise ValueError(f"Unsupported SQL dialect: {sql_dialect}")
215
+
216
+
217
+ def generate_relational_sql(
218
+ schema: Dict[str, JsonNode],
219
+ sql_dialect: str,
220
+ emit_cloudevents_columns: bool,
221
+ converter: SchemaConverter,
222
+ schema_name: str = '') -> List[str]:
223
+ """
224
+ Generates relational SQL schema statements for the given JSON Structure schema.
225
+ Handles inheritance from $extends by including all properties from base types.
226
+ """
227
+ namespace = str(schema.get("namespace", "")).replace('.', '_')
228
+ plain_table_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'table')}"
229
+ table_name = escape_name(plain_table_name, sql_dialect)
230
+
231
+ # Get all properties including inherited ones
232
+ properties: Dict[str, Any] = converter.get_all_properties(schema)
233
+ required_props: List[str] = converter.get_all_required(schema)
234
+
235
+ table_comments = generate_table_comments_json(schema)
236
+ column_comments = generate_column_comments_json(properties, converter.schema_list)
237
+
238
+ sql = []
239
+
240
+ # Add comment about inheritance if $extends is present
241
+ if '$extends' in schema:
242
+ base_ref = schema['$extends']
243
+ sql.append(f"-- Inherits from: {base_ref}")
244
+
245
+ sql.append(f"CREATE TABLE {table_name} (")
246
+
247
+ for prop_name, prop_schema in properties.items():
248
+ column_name = escape_name(altname(prop_schema, 'sql') if isinstance(prop_schema, dict) and 'sql' in prop_schema.get('$altnames', {}) else prop_name, sql_dialect)
249
+ column_type = structure_type_to_sql_type(prop_schema, sql_dialect)
250
+ column_definition = f"{column_name} {column_type}"
251
+
252
+ if isinstance(prop_schema, dict) and prop_schema.get("unique", False):
253
+ column_definition += f" {unique_clause(sql_dialect)}"
254
+ if sql_dialect == "mysql" and prop_name in column_comments:
255
+ cmt = column_comments[str(prop_name)].replace("'", "''")
256
+ column_definition += f" COMMENT '{cmt}'"
257
+ sql.append(f" {column_definition},")
258
+
259
+ if emit_cloudevents_columns:
260
+ sql.extend([
261
+ f" {escape_name('___type', sql_dialect)} {structure_type_to_sql_type('string', sql_dialect)} NOT NULL,",
262
+ f" {escape_name('___source', sql_dialect)} {structure_type_to_sql_type('string', sql_dialect)} NOT NULL,",
263
+ f" {escape_name('___id', sql_dialect)} {structure_type_to_sql_type('string', sql_dialect)} NOT NULL,",
264
+ f" {escape_name('___time', sql_dialect)} {structure_type_to_sql_type('datetime', sql_dialect)} NULL,",
265
+ f" {escape_name('___subject', sql_dialect)} {structure_type_to_sql_type('string', sql_dialect)} NULL,"
266
+ ])
267
+
268
+ # Handle unique keys from schema
269
+ unique_keys = []
270
+ for prop_name in properties.keys():
271
+ if prop_name in required_props:
272
+ unique_keys.append(prop_name)
273
+
274
+ if unique_keys:
275
+ unique_column_names = []
276
+ if sql_dialect in ["mysql", "mariadb"]:
277
+ for prop_name in unique_keys:
278
+ prop_schema = properties[prop_name]
279
+ column_type = structure_type_to_sql_type(prop_schema, sql_dialect)
280
+ col_name = altname(prop_schema, 'sql') if isinstance(prop_schema, dict) and 'sql' in prop_schema.get('$altnames', {}) else prop_name
281
+ if column_type in ["BLOB", "TEXT"]:
282
+ unique_column_names.append(escape_name(col_name + "(20)", sql_dialect))
283
+ else:
284
+ unique_column_names.append(escape_name(col_name, sql_dialect))
285
+ else:
286
+ unique_column_names = [escape_name(
287
+ altname(properties[prop_name], 'sql') if isinstance(properties[prop_name], dict) and 'sql' in properties[prop_name].get('$altnames', {}) else prop_name,
288
+ sql_dialect) for prop_name in unique_keys]
289
+
290
+ sql.append(f" {primary_key_clause(sql_dialect)} ({', '.join(unique_column_names)})")
291
+ else:
292
+ sql[-1] = sql[-1][:-1] # Remove the last comma
293
+ sql.append(");")
294
+ sql.append("")
295
+
296
+ if sql_dialect != "mysql":
297
+ sql.extend(generate_table_comment_sql(sql_dialect, table_comments, plain_table_name))
298
+ sql.extend(generate_column_comment_sql(sql_dialect, column_comments, plain_table_name))
299
+
300
+ return sql
301
+
302
+
303
+ def structure_type_to_sql_type(structure_type: Any, dialect: str) -> str:
304
+ """
305
+ Maps a JSON Structure type to a SQL type for the specified dialect.
306
+ """
307
+ type_map = {
308
+ "sqlserver": {
309
+ "null": "NULL",
310
+ "boolean": "BIT",
311
+ "string": "NVARCHAR(512)",
312
+ "int8": "TINYINT",
313
+ "uint8": "TINYINT",
314
+ "int16": "SMALLINT",
315
+ "uint16": "SMALLINT",
316
+ "int32": "INT",
317
+ "uint32": "INT",
318
+ "int64": "BIGINT",
319
+ "uint64": "BIGINT",
320
+ "int128": "DECIMAL(38,0)",
321
+ "uint128": "DECIMAL(38,0)",
322
+ "integer": "INT",
323
+ "float8": "REAL",
324
+ "float": "FLOAT",
325
+ "double": "FLOAT",
326
+ "number": "FLOAT",
327
+ "decimal": "DECIMAL(18,6)",
328
+ "binary": "VARBINARY(MAX)",
329
+ "date": "DATE",
330
+ "time": "TIME",
331
+ "datetime": "DATETIME2",
332
+ "timestamp": "DATETIME2",
333
+ "duration": "BIGINT",
334
+ "uuid": "UNIQUEIDENTIFIER",
335
+ "uri": "NVARCHAR(2048)",
336
+ "jsonpointer": "NVARCHAR(512)",
337
+ "any": "NVARCHAR(MAX)",
338
+ "array": "NVARCHAR(MAX)",
339
+ "set": "NVARCHAR(MAX)",
340
+ "map": "NVARCHAR(MAX)",
341
+ "object": "NVARCHAR(MAX)",
342
+ "choice": "NVARCHAR(MAX)",
343
+ "tuple": "NVARCHAR(MAX)"
344
+ },
345
+ "postgres": {
346
+ "null": "NULL",
347
+ "boolean": "BOOLEAN",
348
+ "string": "VARCHAR(512)",
349
+ "int8": "SMALLINT",
350
+ "uint8": "SMALLINT",
351
+ "int16": "SMALLINT",
352
+ "uint16": "INTEGER",
353
+ "int32": "INTEGER",
354
+ "uint32": "BIGINT",
355
+ "int64": "BIGINT",
356
+ "uint64": "NUMERIC(20)",
357
+ "int128": "NUMERIC(39)",
358
+ "uint128": "NUMERIC(39)",
359
+ "integer": "INTEGER",
360
+ "float8": "REAL",
361
+ "float": "REAL",
362
+ "double": "DOUBLE PRECISION",
363
+ "number": "DOUBLE PRECISION",
364
+ "decimal": "NUMERIC(18,6)",
365
+ "binary": "BYTEA",
366
+ "date": "DATE",
367
+ "time": "TIME",
368
+ "datetime": "TIMESTAMP",
369
+ "timestamp": "TIMESTAMP",
370
+ "duration": "INTERVAL",
371
+ "uuid": "UUID",
372
+ "uri": "VARCHAR(2048)",
373
+ "jsonpointer": "VARCHAR(512)",
374
+ "any": "JSONB",
375
+ "array": "JSONB",
376
+ "set": "JSONB",
377
+ "map": "JSONB",
378
+ "object": "JSONB",
379
+ "choice": "JSONB",
380
+ "tuple": "JSONB"
381
+ },
382
+ "mysql": {
383
+ "null": "NULL",
384
+ "boolean": "BOOLEAN",
385
+ "string": "VARCHAR(512)",
386
+ "int8": "TINYINT",
387
+ "uint8": "TINYINT UNSIGNED",
388
+ "int16": "SMALLINT",
389
+ "uint16": "SMALLINT UNSIGNED",
390
+ "int32": "INT",
391
+ "uint32": "INT UNSIGNED",
392
+ "int64": "BIGINT",
393
+ "uint64": "BIGINT UNSIGNED",
394
+ "int128": "DECIMAL(38,0)",
395
+ "uint128": "DECIMAL(38,0)",
396
+ "integer": "INT",
397
+ "float8": "FLOAT",
398
+ "float": "FLOAT",
399
+ "double": "DOUBLE",
400
+ "number": "DOUBLE",
401
+ "decimal": "DECIMAL(18,6)",
402
+ "binary": "BLOB",
403
+ "date": "DATE",
404
+ "time": "TIME",
405
+ "datetime": "DATETIME",
406
+ "timestamp": "TIMESTAMP",
407
+ "duration": "BIGINT",
408
+ "uuid": "CHAR(36)",
409
+ "uri": "VARCHAR(2048)",
410
+ "jsonpointer": "VARCHAR(512)",
411
+ "any": "JSON",
412
+ "array": "JSON",
413
+ "set": "JSON",
414
+ "map": "JSON",
415
+ "object": "JSON",
416
+ "choice": "JSON",
417
+ "tuple": "JSON"
418
+ }
419
+ }
420
+
421
+ # Handle mariadb, sqlite, oracle, etc. by copying mysql/postgres patterns
422
+ type_map["mariadb"] = type_map["mysql"].copy()
423
+ type_map["sqlite"] = {k: v.replace("JSON", "TEXT").replace("BYTEA", "BLOB").replace("BIGINT", "INTEGER") for k, v in type_map["postgres"].items()}
424
+ type_map["oracle"] = {k: v.replace("VARCHAR", "VARCHAR2").replace("JSONB", "CLOB").replace("BYTEA", "BLOB") for k, v in type_map["postgres"].items()}
425
+ type_map["db2"] = type_map["postgres"].copy()
426
+ type_map["sqlanywhere"] = type_map["sqlserver"].copy()
427
+ type_map["bigquery"] = {k: v.replace("VARCHAR", "STRING").replace("JSONB", "STRING").replace("BYTEA", "BYTES") for k, v in type_map["postgres"].items()}
428
+ type_map["snowflake"] = {k: v.replace("JSONB", "VARIANT").replace("BYTEA", "BINARY") for k, v in type_map["postgres"].items()}
429
+ type_map["redshift"] = type_map["postgres"].copy()
430
+
431
+ # Handle type resolution
432
+ if isinstance(structure_type, str):
433
+ return type_map.get(dialect, type_map["postgres"]).get(structure_type, type_map[dialect]["string"])
434
+
435
+ if isinstance(structure_type, list):
436
+ # Union type - filter out null
437
+ non_null_types = [t for t in structure_type if t != "null"]
438
+ if len(non_null_types) == 1:
439
+ return structure_type_to_sql_type(non_null_types[0], dialect)
440
+ return type_map[dialect]["any"]
441
+
442
+ if isinstance(structure_type, dict):
443
+ struct_type = structure_type.get("type", "string")
444
+ if struct_type in ["array", "set", "map", "object", "choice", "tuple"]:
445
+ return type_map[dialect][struct_type]
446
+ return structure_type_to_sql_type(struct_type, dialect)
447
+
448
+ return type_map.get(dialect, type_map["postgres"])["string"]
449
+
450
+
451
+ def generate_cassandra_schema(schema: Dict[str, JsonNode], emit_cloudevents_columns: bool, converter: SchemaConverter, schema_name: str) -> List[str]:
452
+ """
453
+ Generates Cassandra schema statements for the given JSON Structure schema.
454
+ Handles inheritance from $extends by including all properties from base types.
455
+ """
456
+ namespace = cast(str, schema.get("namespace", "")).replace(".", "_")
457
+ table_name = altname(schema, 'sql') or schema.get("name", "table")
458
+ table_name = escape_name(
459
+ f"{namespace}_{table_name}" if namespace else table_name, "cassandra")
460
+ if schema_name:
461
+ table_name = f"{schema_name}.{table_name}"
462
+
463
+ # Get all properties including inherited ones
464
+ properties: Dict[str, Any] = converter.get_all_properties(schema)
465
+ required_props: List[str] = converter.get_all_required(schema)
466
+
467
+ cql = []
468
+
469
+ # Add comment about inheritance if $extends is present
470
+ if '$extends' in schema:
471
+ base_ref = schema['$extends']
472
+ cql.append(f"-- Inherits from: {base_ref}")
473
+
474
+ cql.append(f"CREATE TABLE {table_name} (")
475
+ for prop_name, prop_schema in properties.items():
476
+ column_name = escape_name(
477
+ altname(prop_schema, 'sql') if isinstance(prop_schema, dict) and 'sql' in prop_schema.get('$altnames', {}) else prop_name,
478
+ 'cassandra')
479
+ column_type = structure_type_to_cassandra_type(prop_schema)
480
+ cql.append(f" {column_name} {column_type},")
481
+
482
+ if emit_cloudevents_columns:
483
+ cql.extend([
484
+ f" {escape_name('cloudevents_type', 'cassandra')} text,",
485
+ f" {escape_name('cloudevents_source', 'cassandra')} text,",
486
+ f" {escape_name('cloudevents_id', 'cassandra')} text,",
487
+ f" {escape_name('cloudevents_time', 'cassandra')} timestamp,",
488
+ f" {escape_name('cloudevents_subject', 'cassandra')} text,"
489
+ ])
490
+
491
+ # Determine primary key
492
+ primary_keys = [prop_name for prop_name in required_props if prop_name in properties]
493
+ if primary_keys:
494
+ pk_columns = [escape_name(
495
+ altname(properties[prop_name], 'sql') if isinstance(properties[prop_name], dict) and 'sql' in properties[prop_name].get('$altnames', {}) else prop_name,
496
+ "cassandra") for prop_name in primary_keys]
497
+ cql.append(f" PRIMARY KEY ({', '.join(pk_columns)})")
498
+ elif emit_cloudevents_columns:
499
+ cql.append(f" PRIMARY KEY ({escape_name('cloudevents_id', 'cassandra')})")
500
+ else:
501
+ # Use first column as primary key
502
+ if properties:
503
+ first_prop = list(properties.keys())[0]
504
+ cql.append(f" PRIMARY KEY ({escape_name(first_prop, 'cassandra')})")
505
+
506
+ cql.append(");")
507
+ return cql
508
+
509
+
510
+ def structure_type_to_cassandra_type(structure_type: Any) -> str:
511
+ """
512
+ Converts a JSON Structure type to Cassandra type.
513
+ """
514
+ type_map = {
515
+ "null": "text",
516
+ "boolean": "boolean",
517
+ "string": "text",
518
+ "int8": "tinyint",
519
+ "uint8": "tinyint",
520
+ "int16": "smallint",
521
+ "uint16": "smallint",
522
+ "int32": "int",
523
+ "uint32": "int",
524
+ "int64": "bigint",
525
+ "uint64": "bigint",
526
+ "int128": "varint",
527
+ "uint128": "varint",
528
+ "integer": "int",
529
+ "float8": "float",
530
+ "float": "float",
531
+ "double": "double",
532
+ "number": "double",
533
+ "decimal": "decimal",
534
+ "binary": "blob",
535
+ "date": "date",
536
+ "time": "time",
537
+ "datetime": "timestamp",
538
+ "timestamp": "timestamp",
539
+ "duration": "bigint",
540
+ "uuid": "uuid",
541
+ "uri": "text",
542
+ "jsonpointer": "text",
543
+ "any": "text",
544
+ "array": "text",
545
+ "set": "text",
546
+ "map": "text",
547
+ "object": "text",
548
+ "choice": "text",
549
+ "tuple": "text"
550
+ }
551
+
552
+ if isinstance(structure_type, str):
553
+ return type_map.get(structure_type, "text")
554
+
555
+ if isinstance(structure_type, list):
556
+ non_null_types = [t for t in structure_type if t != "null"]
557
+ if len(non_null_types) == 1:
558
+ return structure_type_to_cassandra_type(non_null_types[0])
559
+ return "text"
560
+
561
+ if isinstance(structure_type, dict):
562
+ return type_map.get(structure_type.get("type", "string"), "text")
563
+
564
+ return "text"
565
+
566
+
567
+ def escape_name(name: str, dialect: str) -> str:
568
+ """
569
+ Escapes a name (table or column) for the given SQL dialect.
570
+ """
571
+ if dialect in ["sqlserver", "sqlanywhere"]:
572
+ return f"[{name}]"
573
+ elif dialect in ["postgres", "sqlite", "bigquery", "snowflake", "redshift", "cassandra"]:
574
+ return f'"{name}"'
575
+ elif dialect in ["mysql", "mariadb"]:
576
+ return f"`{name}`"
577
+ elif dialect in ["oracle", "db2"]:
578
+ return f'"{name.upper()}"'
579
+ else:
580
+ return name
581
+
582
+
583
+ def unique_clause(dialect: str) -> str:
584
+ """
585
+ Returns the UNIQUE clause for the given SQL dialect.
586
+ """
587
+ return "UNIQUE"
588
+
589
+
590
+ def primary_key_clause(dialect: str) -> str:
591
+ """
592
+ Returns the PRIMARY KEY clause for the given SQL dialect.
593
+ """
594
+ return "PRIMARY KEY"
595
+
596
+
597
+ def generate_table_comments_json(schema: Dict[str, JsonNode]) -> Dict[str, str]:
598
+ """
599
+ Generates table-level comments as JSON.
600
+ """
601
+ comments = {}
602
+ if "description" in schema:
603
+ comments["doc"] = str(schema["description"])
604
+ elif "doc" in schema:
605
+ comments["doc"] = str(schema["doc"])
606
+ return comments
607
+
608
+
609
+ def generate_column_comments_json(properties: Dict[str, Any], schema_list: List[Dict[str, JsonNode]]) -> Dict[str, str]:
610
+ """
611
+ Generates column-level comments as JSON.
612
+ """
613
+ comments = {}
614
+ for prop_name, prop_schema in properties.items():
615
+ column_comment = {}
616
+ if isinstance(prop_schema, dict):
617
+ if "description" in prop_schema:
618
+ column_comment["doc"] = prop_schema["description"]
619
+ elif "doc" in prop_schema:
620
+ column_comment["doc"] = prop_schema["doc"]
621
+
622
+ if "type" in prop_schema and prop_schema["type"] in ["array", "set", "map", "object", "choice", "tuple"]:
623
+ column_comment["schema"] = prop_schema
624
+
625
+ if column_comment:
626
+ comments[str(prop_name)] = json.dumps(column_comment)
627
+ return comments
628
+
629
+
630
+ def generate_table_comment_sql(dialect: str, table_comments: Dict[str, str], table_name: str) -> List[str]:
631
+ """
632
+ Generates SQL statements for table-level comments.
633
+ """
634
+ comments = []
635
+ if "doc" in table_comments:
636
+ doc_string = table_comments["doc"].replace("'", "''")
637
+ if dialect == "sqlserver":
638
+ comments.append(
639
+ f"EXEC sp_addextendedproperty 'MS_Description', '{doc_string}', 'SCHEMA', 'dbo', 'TABLE', '{table_name}';")
640
+ elif dialect in ["postgres", "oracle"]:
641
+ comments.append(
642
+ f"COMMENT ON TABLE {escape_name(table_name, dialect)} IS '{doc_string}';")
643
+ elif dialect == "sqlite":
644
+ comments.append(
645
+ f"-- COMMENT ON TABLE {escape_name(table_name, dialect)} IS '{doc_string}';")
646
+ return comments
647
+
648
+
649
+ def generate_column_comment_sql(dialect: str, column_comments: Dict[str, str], table_name: str) -> List[str]:
650
+ """
651
+ Generates SQL statements for column-level comments.
652
+ """
653
+ comments = []
654
+ for column_name, comment in column_comments.items():
655
+ comment_data = json.loads(comment)
656
+ doc = comment_data.get("doc", "")
657
+ doc = doc.replace("'", "''")
658
+ schema = comment_data.get("schema", "")
659
+ if dialect == "sqlserver":
660
+ if doc:
661
+ comments.append(
662
+ f"EXEC sp_addextendedproperty 'MS_Description', '{doc}', 'SCHEMA', 'dbo', 'TABLE', '{table_name}', 'COLUMN', '{column_name}';")
663
+ if schema:
664
+ comments.append(
665
+ f"EXEC sp_addextendedproperty 'MS_Schema', '{json.dumps(schema)}', 'SCHEMA', 'dbo', 'TABLE', '{table_name}', 'COLUMN', '{column_name}';")
666
+ else:
667
+ comment = comment.replace("'", "''")
668
+ comments.append(
669
+ f"COMMENT ON COLUMN {escape_name(table_name, dialect)}.{escape_name(column_name, dialect)} IS '{comment}';")
670
+ return comments
671
+
672
+
673
+ def convert_structure_to_nosql(structure_schema_path: str, nosql_file_path: str, nosql_dialect: str,
674
+ emit_cloudevents_columns: bool = False):
675
+ """
676
+ Converts a JSON Structure schema to NoSQL schema for the specified NoSQL dialect.
677
+ Only generates schemas for object and tuple types (not abstract types).
678
+ """
679
+ if not structure_schema_path:
680
+ print("Please specify the structure schema file")
681
+ sys.exit(1)
682
+
683
+ with open(structure_schema_path, "r", encoding="utf-8") as f:
684
+ schema_json = f.read()
685
+
686
+ schema_list = schema = json.loads(schema_json)
687
+ if not isinstance(schema_list, list):
688
+ schema_list = [schema_list]
689
+
690
+ # Create converter with support for $ref and $extends
691
+ converter = SchemaConverter(schema_list)
692
+
693
+ dirname = nosql_file_path
694
+ if not os.path.exists(dirname):
695
+ os.makedirs(dirname, exist_ok=True)
696
+
697
+ # Process all schemas
698
+ for schema in schema_list:
699
+ if not isinstance(schema, dict):
700
+ continue
701
+
702
+ # Only generate for object and tuple types (not abstract)
703
+ if converter.should_generate_table(schema):
704
+ model = generate_nosql(schema, nosql_dialect, emit_cloudevents_columns, converter)
705
+ file_name = os.path.join(
706
+ nosql_file_path, get_file_name(schema, get_nosql_file_extension(nosql_dialect)))
707
+ with open(file_name, "w", encoding="utf-8") as nosql_file:
708
+ if isinstance(model, list):
709
+ nosql_file.write("\n".join(model))
710
+ else:
711
+ nosql_file.write(model)
712
+
713
+ # Also process definitions if present
714
+ if 'definitions' in schema:
715
+ for def_name, def_schema in schema['definitions'].items():
716
+ if isinstance(def_schema, dict) and converter.should_generate_table(def_schema):
717
+ # Add name if not present
718
+ if 'name' not in def_schema:
719
+ def_schema['name'] = def_name
720
+ model = generate_nosql(def_schema, nosql_dialect, emit_cloudevents_columns, converter)
721
+ file_name = os.path.join(
722
+ nosql_file_path, get_file_name(def_schema, get_nosql_file_extension(nosql_dialect)))
723
+ with open(file_name, "w", encoding="utf-8") as nosql_file:
724
+ if isinstance(model, list):
725
+ nosql_file.write("\n".join(model))
726
+ else:
727
+ nosql_file.write(model)
728
+
729
+
730
+ def get_nosql_file_extension(nosql_dialect: str) -> str:
731
+ """
732
+ Returns the file extension for the given NoSQL dialect.
733
+ """
734
+ if nosql_dialect == "neo4j":
735
+ return "cypher"
736
+ else:
737
+ return "json"
738
+
739
+
740
+ def generate_nosql(schema: Dict[str, Any], nosql_dialect: str, emit_cloudevents_columns: bool,
741
+ converter: SchemaConverter) -> str:
742
+ """
743
+ Generates NoSQL schema statements for the given JSON Structure schema.
744
+ """
745
+ if nosql_dialect == "mongodb":
746
+ return generate_mongodb_schema(schema, emit_cloudevents_columns, converter)
747
+ elif nosql_dialect == "dynamodb":
748
+ return generate_dynamodb_schema(schema, emit_cloudevents_columns, converter)
749
+ elif nosql_dialect == "elasticsearch":
750
+ return generate_elasticsearch_schema(schema, emit_cloudevents_columns, converter)
751
+ elif nosql_dialect == "couchdb":
752
+ return generate_couchdb_schema(schema, emit_cloudevents_columns, converter)
753
+ elif nosql_dialect == "neo4j":
754
+ return generate_neo4j_schema(schema, emit_cloudevents_columns, converter)
755
+ elif nosql_dialect == "firebase":
756
+ return generate_firebase_schema(schema, emit_cloudevents_columns, converter)
757
+ elif nosql_dialect == "cosmosdb":
758
+ return generate_cosmosdb_schema(schema, emit_cloudevents_columns, converter)
759
+ elif nosql_dialect == "hbase":
760
+ return generate_hbase_schema(schema, emit_cloudevents_columns, converter)
761
+ else:
762
+ raise ValueError(f"Unsupported NoSQL dialect: {nosql_dialect}")
763
+
764
+
765
+ def generate_mongodb_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
766
+ """
767
+ Generates MongoDB schema statements for the given JSON Structure schema.
768
+ """
769
+ namespace = schema.get("namespace", "")
770
+ collection_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'collection')}"
771
+ properties = converter.get_all_properties(schema)
772
+ required_props = converter.get_all_required(schema)
773
+
774
+ mongodb_schema = {
775
+ "$jsonSchema": {
776
+ "bsonType": "object",
777
+ "required": [],
778
+ "properties": {}
779
+ }
780
+ }
781
+
782
+ for prop_name, prop_schema in properties.items():
783
+ column_type = structure_type_to_mongodb_type(prop_schema)
784
+ mongodb_schema["$jsonSchema"]["properties"][prop_name] = column_type
785
+ if prop_name in required_props:
786
+ mongodb_schema["$jsonSchema"]["required"].append(prop_name)
787
+ if isinstance(prop_schema, dict):
788
+ if prop_schema.get("description"):
789
+ mongodb_schema["$jsonSchema"]["properties"][prop_name]["description"] = prop_schema["description"]
790
+
791
+ if emit_cloudevents_columns:
792
+ mongodb_schema["$jsonSchema"]["properties"].update({
793
+ "___type": {"bsonType": "string"},
794
+ "___source": {"bsonType": "string"},
795
+ "___id": {"bsonType": "string"},
796
+ "___time": {"bsonType": "date"},
797
+ "___subject": {"bsonType": "string"}
798
+ })
799
+ mongodb_schema["$jsonSchema"]["required"].extend(["___type", "___source", "___id"])
800
+
801
+ return json.dumps({collection_name: mongodb_schema}, indent=4)
802
+
803
+
804
+ def structure_type_to_mongodb_type(structure_type: Any) -> Dict[str, str]:
805
+ """
806
+ Converts a JSON Structure type to MongoDB type.
807
+ """
808
+ type_map = {
809
+ "null": {"bsonType": "null"},
810
+ "boolean": {"bsonType": "bool"},
811
+ "string": {"bsonType": "string"},
812
+ "int8": {"bsonType": "int"},
813
+ "uint8": {"bsonType": "int"},
814
+ "int16": {"bsonType": "int"},
815
+ "uint16": {"bsonType": "int"},
816
+ "int32": {"bsonType": "int"},
817
+ "uint32": {"bsonType": "long"},
818
+ "int64": {"bsonType": "long"},
819
+ "uint64": {"bsonType": "long"},
820
+ "int128": {"bsonType": "decimal"},
821
+ "uint128": {"bsonType": "decimal"},
822
+ "integer": {"bsonType": "int"},
823
+ "float": {"bsonType": "double"},
824
+ "double": {"bsonType": "double"},
825
+ "number": {"bsonType": "double"},
826
+ "decimal": {"bsonType": "decimal"},
827
+ "binary": {"bsonType": "binData"},
828
+ "date": {"bsonType": "date"},
829
+ "datetime": {"bsonType": "date"},
830
+ "timestamp": {"bsonType": "date"},
831
+ "uuid": {"bsonType": "string"},
832
+ "uri": {"bsonType": "string"},
833
+ "array": {"bsonType": "array"},
834
+ "map": {"bsonType": "object"},
835
+ "object": {"bsonType": "object"},
836
+ "any": {"bsonType": "object"}
837
+ }
838
+
839
+ if isinstance(structure_type, str):
840
+ return type_map.get(structure_type, {"bsonType": "string"})
841
+
842
+ if isinstance(structure_type, list):
843
+ non_null_types = [t for t in structure_type if t != "null"]
844
+ if len(non_null_types) == 1:
845
+ return structure_type_to_mongodb_type(non_null_types[0])
846
+ return {"bsonType": "object"}
847
+
848
+ if isinstance(structure_type, dict):
849
+ return type_map.get(structure_type.get("type", "string"), {"bsonType": "string"})
850
+
851
+ return {"bsonType": "string"}
852
+
853
+
854
+ def generate_dynamodb_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
855
+ """
856
+ Generates DynamoDB schema statements for the given JSON Structure schema.
857
+ """
858
+ namespace = schema.get("namespace", "").replace('.', '_')
859
+ table_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'table')}"
860
+ properties = converter.get_all_properties(schema)
861
+
862
+ dynamodb_schema = {
863
+ "TableName": table_name,
864
+ "KeySchema": [],
865
+ "AttributeDefinitions": [],
866
+ "ProvisionedThroughput": {
867
+ "ReadCapacityUnits": 5,
868
+ "WriteCapacityUnits": 5
869
+ }
870
+ }
871
+
872
+ # Add first property as hash key
873
+ if properties:
874
+ first_prop = list(properties.keys())[0]
875
+ first_prop_schema = properties[first_prop]
876
+ attr_type = structure_type_to_dynamodb_type(first_prop_schema)
877
+ dynamodb_schema["AttributeDefinitions"].append(
878
+ {"AttributeName": first_prop, "AttributeType": attr_type})
879
+ dynamodb_schema["KeySchema"].append(
880
+ {"AttributeName": first_prop, "KeyType": "HASH"})
881
+
882
+ if emit_cloudevents_columns:
883
+ dynamodb_schema["AttributeDefinitions"].append(
884
+ {"AttributeName": "___id", "AttributeType": "S"})
885
+ if not dynamodb_schema["KeySchema"]:
886
+ dynamodb_schema["KeySchema"].append(
887
+ {"AttributeName": "___id", "KeyType": "HASH"})
888
+
889
+ return json.dumps(dynamodb_schema, indent=4)
890
+
891
+
892
+ def structure_type_to_dynamodb_type(structure_type: Any) -> str:
893
+ """
894
+ Converts a JSON Structure type to DynamoDB type.
895
+ """
896
+ type_map = {
897
+ "boolean": "N",
898
+ "string": "S",
899
+ "int8": "N", "uint8": "N",
900
+ "int16": "N", "uint16": "N",
901
+ "int32": "N", "uint32": "N",
902
+ "int64": "N", "uint64": "N",
903
+ "integer": "N",
904
+ "float": "N", "double": "N", "number": "N",
905
+ "binary": "B",
906
+ "uuid": "S"
907
+ }
908
+
909
+ if isinstance(structure_type, str):
910
+ return type_map.get(structure_type, "S")
911
+
912
+ if isinstance(structure_type, dict):
913
+ return type_map.get(structure_type.get("type", "string"), "S")
914
+
915
+ return "S"
916
+
917
+
918
+ def generate_elasticsearch_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
919
+ """
920
+ Generates Elasticsearch schema.
921
+ """
922
+ namespace = schema.get("namespace", "").replace('.', '_')
923
+ index_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'index')}"
924
+ properties = converter.get_all_properties(schema)
925
+
926
+ es_mapping = {
927
+ "mappings": {
928
+ "properties": {}
929
+ }
930
+ }
931
+
932
+ for prop_name, prop_schema in properties.items():
933
+ es_mapping["mappings"]["properties"][prop_name] = structure_type_to_elasticsearch_type(prop_schema)
934
+
935
+ if emit_cloudevents_columns:
936
+ es_mapping["mappings"]["properties"].update({
937
+ "___type": {"type": "keyword"},
938
+ "___source": {"type": "keyword"},
939
+ "___id": {"type": "keyword"},
940
+ "___time": {"type": "date"},
941
+ "___subject": {"type": "keyword"}
942
+ })
943
+
944
+ return json.dumps({index_name: es_mapping}, indent=4)
945
+
946
+
947
+ def structure_type_to_elasticsearch_type(structure_type: Any) -> Dict[str, str]:
948
+ """
949
+ Converts a JSON Structure type to Elasticsearch type.
950
+ """
951
+ type_map = {
952
+ "boolean": {"type": "boolean"},
953
+ "string": {"type": "text"},
954
+ "int8": {"type": "byte"}, "uint8": {"type": "byte"},
955
+ "int16": {"type": "short"}, "uint16": {"type": "short"},
956
+ "int32": {"type": "integer"}, "uint32": {"type": "integer"},
957
+ "int64": {"type": "long"}, "uint64": {"type": "long"},
958
+ "integer": {"type": "integer"},
959
+ "float": {"type": "float"}, "double": {"type": "double"},
960
+ "number": {"type": "double"},
961
+ "binary": {"type": "binary"},
962
+ "date": {"type": "date"},
963
+ "datetime": {"type": "date"},
964
+ "uuid": {"type": "keyword"}
965
+ }
966
+
967
+ if isinstance(structure_type, str):
968
+ return type_map.get(structure_type, {"type": "text"})
969
+
970
+ if isinstance(structure_type, dict):
971
+ return type_map.get(structure_type.get("type", "string"), {"type": "text"})
972
+
973
+ return {"type": "text"}
974
+
975
+
976
+ def generate_couchdb_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
977
+ """
978
+ Generates CouchDB schema.
979
+ """
980
+ namespace = schema.get("namespace", "").replace('.', '_')
981
+ db_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'db')}"
982
+ properties = converter.get_all_properties(schema)
983
+
984
+ couchdb_schema = {
985
+ "type": "object",
986
+ "properties": {}
987
+ }
988
+
989
+ for prop_name, prop_schema in properties.items():
990
+ couchdb_schema["properties"][prop_name] = {"type": "string"}
991
+
992
+ if emit_cloudevents_columns:
993
+ couchdb_schema["properties"].update({
994
+ "___type": {"type": "string"},
995
+ "___source": {"type": "string"},
996
+ "___id": {"type": "string"},
997
+ "___time": {"type": "string"},
998
+ "___subject": {"type": "string"}
999
+ })
1000
+
1001
+ return json.dumps({db_name: couchdb_schema}, indent=4)
1002
+
1003
+
1004
+ def generate_neo4j_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
1005
+ """
1006
+ Generates Neo4j schema.
1007
+ """
1008
+ namespace = schema.get("namespace", "").replace('.', '_')
1009
+ label_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'node')}"
1010
+ properties = converter.get_all_properties(schema)
1011
+
1012
+ cypher = []
1013
+ cypher.append(f"CREATE (:{label_name} {{")
1014
+
1015
+ prop_lines = []
1016
+ for prop_name in properties.keys():
1017
+ prop_lines.append(f" {prop_name}: 'value'")
1018
+
1019
+ if emit_cloudevents_columns:
1020
+ prop_lines.extend([
1021
+ " ___type: 'value'",
1022
+ " ___source: 'value'",
1023
+ " ___id: 'value'",
1024
+ " ___time: 'value'",
1025
+ " ___subject: 'value'"
1026
+ ])
1027
+
1028
+ cypher.append(",\n".join(prop_lines))
1029
+ cypher.append("});")
1030
+ return "\n".join(cypher)
1031
+
1032
+
1033
+ def generate_firebase_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
1034
+ """
1035
+ Generates Firebase schema.
1036
+ """
1037
+ namespace = schema.get("namespace", "").replace('.', '_')
1038
+ collection_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'collection')}"
1039
+ properties = converter.get_all_properties(schema)
1040
+
1041
+ firebase_schema = {
1042
+ "fields": {}
1043
+ }
1044
+
1045
+ for prop_name in properties.keys():
1046
+ firebase_schema["fields"][prop_name] = {"type": "string"}
1047
+
1048
+ if emit_cloudevents_columns:
1049
+ firebase_schema["fields"].update({
1050
+ "___type": {"type": "string"},
1051
+ "___source": {"type": "string"},
1052
+ "___id": {"type": "string"},
1053
+ "___time": {"type": "timestamp"},
1054
+ "___subject": {"type": "string"}
1055
+ })
1056
+
1057
+ return json.dumps({collection_name: firebase_schema}, indent=4)
1058
+
1059
+
1060
+ def generate_cosmosdb_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
1061
+ """
1062
+ Generates CosmosDB schema.
1063
+ """
1064
+ namespace = schema.get("namespace", "").replace('.', '_')
1065
+ collection_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'collection')}"
1066
+ properties = converter.get_all_properties(schema)
1067
+
1068
+ cosmosdb_schema = {
1069
+ "id": collection_name,
1070
+ "partitionKey": {
1071
+ "paths": [],
1072
+ "kind": "Hash"
1073
+ },
1074
+ "fields": {}
1075
+ }
1076
+
1077
+ for prop_name in properties.keys():
1078
+ cosmosdb_schema["fields"][prop_name] = {"type": "string"}
1079
+ cosmosdb_schema["partitionKey"]["paths"].append(f"/{prop_name}")
1080
+
1081
+ if emit_cloudevents_columns:
1082
+ cosmosdb_schema["fields"].update({
1083
+ "___type": {"type": "string"},
1084
+ "___id": {"type": "string"}
1085
+ })
1086
+ cosmosdb_schema["partitionKey"]["paths"].append("/___id")
1087
+
1088
+ return json.dumps(cosmosdb_schema, indent=4)
1089
+
1090
+
1091
+ def generate_hbase_schema(schema: Dict[str, Any], emit_cloudevents_columns: bool, converter: SchemaConverter) -> str:
1092
+ """
1093
+ Generates HBase schema.
1094
+ """
1095
+ namespace = schema.get("namespace", "").replace('.', '_')
1096
+ table_name = altname(schema, 'sql') or f"{namespace}_{schema.get('name', 'table')}"
1097
+ properties = converter.get_all_properties(schema)
1098
+
1099
+ hbase_schema = {
1100
+ "table": table_name,
1101
+ "column_families": []
1102
+ }
1103
+
1104
+ for prop_name in properties.keys():
1105
+ hbase_schema["column_families"].append({
1106
+ "name": prop_name,
1107
+ "column_family": "string"
1108
+ })
1109
+
1110
+ if emit_cloudevents_columns:
1111
+ hbase_schema["column_families"].extend([
1112
+ {"name": "___type", "column_family": "string"},
1113
+ {"name": "___id", "column_family": "string"}
1114
+ ])
1115
+
1116
+ return json.dumps(hbase_schema, indent=4)
1117
+
1118
+
1119
+ def get_file_name(schema: Dict[str, Any], extension: str) -> str:
1120
+ """
1121
+ Generates a file name based on the schema.
1122
+ """
1123
+ namespace = schema.get("namespace", "").replace('.', '_')
1124
+ name = schema.get("name", "schema")
1125
+ return (namespace + '.' + name + '.' + extension) if namespace else (name + '.' + extension)