structurize 2.16.5__py3-none-any.whl → 2.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,659 @@
1
+ """Convert JSON Structure schemas to Data Package format."""
2
+
3
+ import json
4
+ import sys
5
+ from typing import Dict, List, Optional, Set, cast
6
+ from datapackage import Package
7
+
8
+ from avrotize.common import get_longest_namespace_prefix
9
+
10
+ JsonNode = Dict[str, 'JsonNode'] | List['JsonNode'] | str | bool | int | None
11
+
12
+
13
+ class StructureToDataPackageConverter:
14
+ """Class to convert JSON Structure schema to Data Package."""
15
+
16
+ def __init__(self) -> None:
17
+ self.named_type_cache: Dict[str, JsonNode] = {}
18
+ self.schema_registry: Dict[str, Dict] = {}
19
+
20
+ def get_fullname(self, namespace: str, name: str) -> str:
21
+ """Get the full name of a record type."""
22
+ return f"{namespace}.{name}" if namespace else name
23
+
24
+ def resolve_ref(self, ref: str, context_schema: Optional[Dict] = None) -> Optional[Dict]:
25
+ """Resolves a $ref to the actual schema definition."""
26
+ # Check if it's an absolute URI reference (schema with $id)
27
+ if not ref.startswith('#/'):
28
+ if ref in self.schema_registry:
29
+ return self.schema_registry[ref]
30
+ return None
31
+
32
+ # Handle fragment-only references (internal to document)
33
+ path = ref[2:].split('/')
34
+ schema = context_schema if context_schema else None
35
+
36
+ if schema is None:
37
+ return None
38
+
39
+ for part in path:
40
+ if not isinstance(schema, dict) or part not in schema:
41
+ return None
42
+ schema = schema[part]
43
+
44
+ return cast(Dict, schema)
45
+
46
+ def register_schema_ids(self, schema: Dict, base_uri: str = '') -> None:
47
+ """Recursively registers schemas with $id keywords."""
48
+ if not isinstance(schema, dict):
49
+ return
50
+
51
+ # Register this schema if it has an $id
52
+ if '$id' in schema:
53
+ schema_id = schema['$id']
54
+ # Handle relative URIs
55
+ if base_uri and not schema_id.startswith(('http://', 'https://', 'urn:')):
56
+ from urllib.parse import urljoin
57
+ schema_id = urljoin(base_uri, schema_id)
58
+ self.schema_registry[schema_id] = schema
59
+ base_uri = schema_id
60
+
61
+ # Recursively process definitions
62
+ if 'definitions' in schema:
63
+ for def_name, def_schema in schema['definitions'].items():
64
+ if isinstance(def_schema, dict):
65
+ self.register_schema_ids(def_schema, base_uri)
66
+
67
+ # Recursively process properties
68
+ if 'properties' in schema:
69
+ for prop_name, prop_schema in schema['properties'].items():
70
+ if isinstance(prop_schema, dict):
71
+ self.register_schema_ids(prop_schema, base_uri)
72
+
73
+ # Recursively process items, values, etc.
74
+ for key in ['items', 'values', 'additionalProperties']:
75
+ if key in schema and isinstance(schema[key], dict):
76
+ self.register_schema_ids(schema[key], base_uri)
77
+
78
+ def convert_structure_to_datapackage(self, structure_schema_path: str,
79
+ structure_record_type: Optional[str],
80
+ datapackage_path: str) -> None:
81
+ """Convert a JSON Structure schema to a Data Package."""
82
+ with open(structure_schema_path, "r", encoding="utf-8") as f:
83
+ schema_json = f.read()
84
+
85
+ # Parse the schema as a JSON object
86
+ schema = json.loads(schema_json)
87
+
88
+ # Register schema IDs for cross-references
89
+ if isinstance(schema, dict):
90
+ self.register_schema_ids(schema)
91
+ elif isinstance(schema, list):
92
+ for s in schema:
93
+ if isinstance(s, dict):
94
+ self.register_schema_ids(s)
95
+
96
+ self.cache_named_types(schema)
97
+
98
+ # Handle list of schemas or single schema
99
+ if isinstance(schema, list):
100
+ if structure_record_type:
101
+ schema = next(
102
+ (x for x in schema
103
+ if isinstance(x, dict) and
104
+ (x.get("name") == structure_record_type or
105
+ str(x.get("namespace", "")) + "." + str(x.get("name", "")) == structure_record_type)),
106
+ None)
107
+ if schema is None:
108
+ print(f"No top-level record type {structure_record_type} found in the JSON Structure schema")
109
+ sys.exit(1)
110
+ schemas_to_convert = schema if isinstance(schema, list) else [schema]
111
+ elif isinstance(schema, dict):
112
+ # Single schema - convert it to a list
113
+ if 'type' in schema and schema['type'] == 'object':
114
+ schemas_to_convert = [schema]
115
+ elif 'definitions' in schema or '$defs' in schema:
116
+ # Schema with definitions/$defs - extract object types
117
+ schemas_to_convert = []
118
+ definitions = schema.get('definitions', schema.get('$defs', {}))
119
+ self._extract_object_schemas(definitions, schemas_to_convert)
120
+ # Also include root if it's an object
121
+ if schema.get('type') == 'object':
122
+ schemas_to_convert.insert(0, schema)
123
+ else:
124
+ schemas_to_convert = [schema]
125
+ else:
126
+ print("Expected a single JSON Structure schema as a JSON object, or a list of schema records")
127
+ sys.exit(1)
128
+
129
+ # Calculate longest namespace prefix
130
+ longest_namespace_prefix = self._get_longest_namespace_prefix(schemas_to_convert)
131
+ self.create_datapackage_for_schemas(schemas_to_convert, datapackage_path, longest_namespace_prefix)
132
+
133
+ def _extract_object_schemas(self, definitions: Dict, schemas_to_convert: List[Dict]) -> None:
134
+ """Extract object type schemas from definitions recursively."""
135
+ for name, definition in definitions.items():
136
+ if isinstance(definition, dict):
137
+ if definition.get('type') == 'object':
138
+ # Add name if not present
139
+ if 'name' not in definition:
140
+ definition['name'] = name
141
+ schemas_to_convert.append(definition)
142
+ elif 'definitions' in definition:
143
+ # Nested definitions
144
+ self._extract_object_schemas(definition['definitions'], schemas_to_convert)
145
+
146
+ def _get_longest_namespace_prefix(self, schemas: List[Dict]) -> str:
147
+ """Calculate the longest common namespace prefix from schemas."""
148
+ if not schemas:
149
+ return ""
150
+
151
+ namespaces = []
152
+ for schema in schemas:
153
+ if isinstance(schema, dict):
154
+ ns = schema.get('namespace', '')
155
+ if ns:
156
+ namespaces.append(ns)
157
+
158
+ if not namespaces:
159
+ return ""
160
+
161
+ # Find common prefix
162
+ if len(namespaces) == 1:
163
+ return namespaces[0]
164
+
165
+ # Split by dots and find common parts
166
+ parts_lists = [ns.split('.') for ns in namespaces]
167
+ common_parts = []
168
+
169
+ for i in range(min(len(p) for p in parts_lists)):
170
+ part = parts_lists[0][i]
171
+ if all(p[i] == part for p in parts_lists):
172
+ common_parts.append(part)
173
+ else:
174
+ break
175
+
176
+ return '.'.join(common_parts)
177
+
178
+ def create_datapackage_for_schemas(self, schemas: List[Dict],
179
+ datapackage_path: str,
180
+ namespace_prefix: str) -> None:
181
+ """Create a Data Package for given schemas."""
182
+ package = Package()
183
+ data_package_resources = []
184
+
185
+ for schema in schemas:
186
+ if not isinstance(schema, dict):
187
+ continue
188
+
189
+ # Skip non-object types
190
+ if schema.get('type') != 'object':
191
+ continue
192
+
193
+ name = str(schema.get("name", "UnnamedTable"))
194
+ namespace = str(schema.get("namespace", ""))
195
+
196
+ # Remove common namespace prefix
197
+ if namespace.startswith(namespace_prefix):
198
+ namespace = namespace[len(namespace_prefix):].strip(".")
199
+
200
+ table_name = f"{namespace}_{name}" if namespace else name
201
+ # Data Package resource names must be lowercase with hyphens/underscores only
202
+ table_name = table_name.lower().replace(" ", "-")
203
+ properties = schema.get("properties", {})
204
+
205
+ # Create the Data Package schema
206
+ resource_schema: Dict[str, List[JsonNode]] = {
207
+ "fields": []
208
+ }
209
+
210
+ for prop_name, prop_schema in properties.items():
211
+ column_name = prop_name
212
+ column_type = self.convert_structure_type_to_datapackage_type(prop_schema, schema)
213
+ field_schema = {"name": column_name, "type": column_type}
214
+
215
+ # Add description from doc or description (only if prop_schema is a dict)
216
+ if isinstance(prop_schema, dict):
217
+ # Handle title
218
+ if "title" in prop_schema:
219
+ field_schema["title"] = prop_schema["title"]
220
+
221
+ if "description" in prop_schema:
222
+ field_schema["description"] = prop_schema["description"]
223
+ elif "doc" in prop_schema:
224
+ field_schema["description"] = prop_schema["doc"]
225
+
226
+ # Handle $comment (add to description)
227
+ if "$comment" in prop_schema:
228
+ comment = prop_schema["$comment"]
229
+ if "description" in field_schema:
230
+ field_schema["description"] += f" [Comment: {comment}]"
231
+ else:
232
+ field_schema["description"] = f"[Comment: {comment}]"
233
+
234
+ # Handle examples
235
+ if "examples" in prop_schema:
236
+ field_schema["examples"] = prop_schema["examples"]
237
+
238
+ # Handle default values
239
+ if "default" in prop_schema:
240
+ field_schema["default"] = prop_schema["default"]
241
+
242
+ # Handle const (fixed value)
243
+ if "const" in prop_schema:
244
+ if 'constraints' not in field_schema:
245
+ field_schema['constraints'] = {}
246
+ field_schema['constraints']['enum'] = [prop_schema["const"]]
247
+ if "description" in field_schema:
248
+ field_schema["description"] += f" (constant value)"
249
+ else:
250
+ field_schema["description"] = "Constant value"
251
+
252
+ # Handle readOnly/writeOnly
253
+ if "readOnly" in prop_schema and prop_schema["readOnly"]:
254
+ if "description" in field_schema:
255
+ field_schema["description"] += " (read-only)"
256
+ else:
257
+ field_schema["description"] = "Read-only field"
258
+
259
+ if "writeOnly" in prop_schema and prop_schema["writeOnly"]:
260
+ if "description" in field_schema:
261
+ field_schema["description"] += " (write-only)"
262
+ else:
263
+ field_schema["description"] = "Write-only field"
264
+
265
+ # Handle deprecated
266
+ if "deprecated" in prop_schema and prop_schema["deprecated"]:
267
+ if "description" in field_schema:
268
+ field_schema["description"] += " (DEPRECATED)"
269
+ else:
270
+ field_schema["description"] = "DEPRECATED"
271
+
272
+ # Add format constraints if applicable
273
+ self._add_field_constraints(field_schema, prop_schema)
274
+
275
+ resource_schema["fields"].append(field_schema)
276
+
277
+ resource = {
278
+ "name": table_name,
279
+ "data": [], # Empty data array for schema-only package
280
+ "schema": resource_schema
281
+ }
282
+
283
+ # Add resource title if available
284
+ if "title" in schema:
285
+ resource["title"] = schema["title"]
286
+
287
+ # Add resource description if available
288
+ if "description" in schema:
289
+ resource["description"] = schema["description"]
290
+ elif "doc" in schema:
291
+ resource["description"] = schema["doc"]
292
+
293
+ # Handle abstract types
294
+ if schema.get("abstract", False):
295
+ if "description" in resource:
296
+ resource["description"] += " (Abstract type - cannot be instantiated directly)"
297
+ else:
298
+ resource["description"] = "Abstract type - cannot be instantiated directly"
299
+
300
+ # Handle $extends (inheritance)
301
+ if "$extends" in schema:
302
+ extends_ref = schema["$extends"]
303
+ if "description" in resource:
304
+ resource["description"] += f" (Extends: {extends_ref})"
305
+ else:
306
+ resource["description"] = f"Extends: {extends_ref}"
307
+
308
+ # Handle $offers (add-in system)
309
+ if "$offers" in schema:
310
+ offers = schema["$offers"]
311
+ if "description" in resource:
312
+ resource["description"] += f" (Offers: {', '.join(offers.keys()) if isinstance(offers, dict) else str(offers)})"
313
+ else:
314
+ resource["description"] = f"Offers: {', '.join(offers.keys()) if isinstance(offers, dict) else str(offers)}"
315
+
316
+ # Handle $uses (add-in system)
317
+ if "$uses" in schema:
318
+ uses = schema["$uses"]
319
+ uses_str = ', '.join(uses) if isinstance(uses, list) else str(uses)
320
+ if "description" in resource:
321
+ resource["description"] += f" (Uses add-ins: {uses_str})"
322
+ else:
323
+ resource["description"] = f"Uses add-ins: {uses_str}"
324
+
325
+ # Handle deprecated at schema level
326
+ if schema.get("deprecated", False):
327
+ if "description" in resource:
328
+ resource["description"] += " (DEPRECATED)"
329
+ else:
330
+ resource["description"] = "DEPRECATED"
331
+
332
+ data_package_resources.append(resource)
333
+
334
+ # Add resources to the Data Package
335
+ for resource in data_package_resources:
336
+ package.add_resource(resource)
337
+
338
+ # Save the Data Package
339
+ package.descriptor["name"] = namespace_prefix if namespace_prefix else "datapackage"
340
+ package.commit()
341
+
342
+ with open(datapackage_path, "w", encoding="utf-8") as f:
343
+ f.write(json.dumps(package.descriptor, indent=2))
344
+
345
+ def _add_field_constraints(self, field_schema: Dict, prop_schema: Dict) -> None:
346
+ """Add Data Package field constraints from JSON Structure annotations."""
347
+ # Add format for specific types
348
+ prop_type = prop_schema.get('type', '')
349
+
350
+ # Date/time formats
351
+ if prop_type == 'date':
352
+ field_schema['format'] = 'date'
353
+ elif prop_type == 'datetime' or prop_type == 'timestamp':
354
+ field_schema['format'] = 'datetime'
355
+ elif prop_type == 'time':
356
+ field_schema['format'] = 'time'
357
+ elif prop_type == 'duration':
358
+ field_schema['format'] = 'duration'
359
+ elif prop_type == 'uri':
360
+ field_schema['format'] = 'uri'
361
+ elif prop_type == 'uuid':
362
+ field_schema['format'] = 'uuid'
363
+ elif prop_type == 'binary':
364
+ field_schema['format'] = 'binary'
365
+
366
+ # Handle format keyword for additional string formats
367
+ if 'format' in prop_schema:
368
+ field_schema['format'] = prop_schema['format']
369
+
370
+ # String constraints
371
+ if 'maxLength' in prop_schema:
372
+ if 'constraints' not in field_schema:
373
+ field_schema['constraints'] = {}
374
+ field_schema['constraints']['maxLength'] = prop_schema['maxLength']
375
+
376
+ if 'minLength' in prop_schema:
377
+ if 'constraints' not in field_schema:
378
+ field_schema['constraints'] = {}
379
+ field_schema['constraints']['minLength'] = prop_schema['minLength']
380
+
381
+ if 'pattern' in prop_schema:
382
+ if 'constraints' not in field_schema:
383
+ field_schema['constraints'] = {}
384
+ field_schema['constraints']['pattern'] = prop_schema['pattern']
385
+
386
+ # Numeric constraints
387
+ if 'minimum' in prop_schema:
388
+ if 'constraints' not in field_schema:
389
+ field_schema['constraints'] = {}
390
+ field_schema['constraints']['minimum'] = prop_schema['minimum']
391
+
392
+ if 'maximum' in prop_schema:
393
+ if 'constraints' not in field_schema:
394
+ field_schema['constraints'] = {}
395
+ field_schema['constraints']['maximum'] = prop_schema['maximum']
396
+
397
+ if 'exclusiveMinimum' in prop_schema:
398
+ if 'constraints' not in field_schema:
399
+ field_schema['constraints'] = {}
400
+ # Data Package doesn't have exclusiveMinimum, so we document it
401
+ field_schema['constraints']['minimum'] = prop_schema['exclusiveMinimum']
402
+ if 'description' in field_schema:
403
+ field_schema['description'] += f" (exclusive minimum: {prop_schema['exclusiveMinimum']})"
404
+ else:
405
+ field_schema['description'] = f"Exclusive minimum: {prop_schema['exclusiveMinimum']}"
406
+
407
+ if 'exclusiveMaximum' in prop_schema:
408
+ if 'constraints' not in field_schema:
409
+ field_schema['constraints'] = {}
410
+ # Data Package doesn't have exclusiveMaximum, so we document it
411
+ field_schema['constraints']['maximum'] = prop_schema['exclusiveMaximum']
412
+ if 'description' in field_schema:
413
+ field_schema['description'] += f" (exclusive maximum: {prop_schema['exclusiveMaximum']})"
414
+ else:
415
+ field_schema['description'] = f"Exclusive maximum: {prop_schema['exclusiveMaximum']}"
416
+
417
+ if 'multipleOf' in prop_schema:
418
+ # Data Package doesn't have multipleOf, document in description
419
+ multiple_of = prop_schema['multipleOf']
420
+ if 'description' in field_schema:
421
+ field_schema['description'] += f" (multiple of {multiple_of})"
422
+ else:
423
+ field_schema['description'] = f"Must be multiple of {multiple_of}"
424
+
425
+ # Decimal precision/scale
426
+ if 'precision' in prop_schema or 'scale' in prop_schema:
427
+ precision = prop_schema.get('precision')
428
+ scale = prop_schema.get('scale')
429
+ desc_parts = []
430
+ if precision:
431
+ desc_parts.append(f"precision: {precision}")
432
+ if scale:
433
+ desc_parts.append(f"scale: {scale}")
434
+ precision_desc = f" ({', '.join(desc_parts)})"
435
+ if 'description' in field_schema:
436
+ field_schema['description'] += precision_desc
437
+ else:
438
+ field_schema['description'] = precision_desc.strip('() ')
439
+
440
+ # Array constraints
441
+ if 'maxItems' in prop_schema:
442
+ # Data Package doesn't have maxItems, document in description
443
+ if 'description' in field_schema:
444
+ field_schema['description'] += f" (max items: {prop_schema['maxItems']})"
445
+ else:
446
+ field_schema['description'] = f"Maximum {prop_schema['maxItems']} items"
447
+
448
+ if 'minItems' in prop_schema:
449
+ # Data Package doesn't have minItems, document in description
450
+ if 'description' in field_schema:
451
+ field_schema['description'] += f" (min items: {prop_schema['minItems']})"
452
+ else:
453
+ field_schema['description'] = f"Minimum {prop_schema['minItems']} items"
454
+
455
+ if 'uniqueItems' in prop_schema and prop_schema['uniqueItems']:
456
+ if 'constraints' not in field_schema:
457
+ field_schema['constraints'] = {}
458
+ field_schema['constraints']['unique'] = True
459
+
460
+ # Enum values
461
+ if 'enum' in prop_schema:
462
+ if 'constraints' not in field_schema:
463
+ field_schema['constraints'] = {}
464
+ field_schema['constraints']['enum'] = prop_schema['enum']
465
+
466
+ # Content metadata
467
+ if 'contentEncoding' in prop_schema:
468
+ # Store as custom property
469
+ field_schema['contentEncoding'] = prop_schema['contentEncoding']
470
+
471
+ if 'contentMediaType' in prop_schema:
472
+ # Store as custom property
473
+ field_schema['contentMediaType'] = prop_schema['contentMediaType']
474
+
475
+ def convert_structure_type_to_datapackage_type(self, structure_type: JsonNode,
476
+ context_schema: Optional[Dict] = None) -> str:
477
+ """Convert a JSON Structure type to a Data Package type."""
478
+ if isinstance(structure_type, list):
479
+ # Union type
480
+ item_count = len(structure_type)
481
+ if item_count == 1:
482
+ return self.convert_structure_type_to_datapackage_type(structure_type[0], context_schema)
483
+ elif item_count == 2:
484
+ # Check for nullable union (type + null)
485
+ first = structure_type[0]
486
+ second = structure_type[1]
487
+ if isinstance(first, str) and first == "null":
488
+ return self.convert_structure_type_to_datapackage_type(second, context_schema)
489
+ elif isinstance(second, str) and second == "null":
490
+ return self.convert_structure_type_to_datapackage_type(first, context_schema)
491
+ # Complex union - default to string
492
+ return "string"
493
+ elif isinstance(structure_type, dict):
494
+ # Handle allOf (merge all schemas)
495
+ if 'allOf' in structure_type:
496
+ # For allOf, we typically take the most specific type
497
+ # In Data Package context, we'll use the first concrete type
498
+ for sub_schema in structure_type['allOf']:
499
+ if isinstance(sub_schema, dict) and 'type' in sub_schema:
500
+ return self.convert_structure_type_to_datapackage_type(sub_schema, context_schema)
501
+ return "object" # Default to object for allOf
502
+
503
+ # Handle oneOf (one of the schemas must match)
504
+ if 'oneOf' in structure_type:
505
+ # For oneOf, we use string as it's the most flexible
506
+ # Could potentially be a union in more sophisticated implementations
507
+ return "string"
508
+
509
+ # Handle anyOf (any of the schemas may match)
510
+ if 'anyOf' in structure_type:
511
+ # Similar to oneOf, use string for flexibility
512
+ return "string"
513
+
514
+ # Handle not (negation)
515
+ if 'not' in structure_type:
516
+ # Can't directly represent negation, default to string
517
+ return "string"
518
+
519
+ # Handle if/then/else (conditional schemas)
520
+ if 'if' in structure_type:
521
+ # Use 'then' schema if present, else 'else' schema, else string
522
+ if 'then' in structure_type:
523
+ return self.convert_structure_type_to_datapackage_type(structure_type['then'], context_schema)
524
+ elif 'else' in structure_type:
525
+ return self.convert_structure_type_to_datapackage_type(structure_type['else'], context_schema)
526
+ return "string"
527
+
528
+ # Handle $ref
529
+ if '$ref' in structure_type:
530
+ ref_schema = self.resolve_ref(structure_type['$ref'], context_schema)
531
+ if ref_schema:
532
+ return self.convert_structure_type_to_datapackage_type(ref_schema, context_schema)
533
+ return "string"
534
+
535
+ # Handle enum
536
+ if 'enum' in structure_type:
537
+ # Enums are represented as strings with enum constraint
538
+ return "string"
539
+
540
+ # Get the type field
541
+ type_name = structure_type.get("type")
542
+
543
+ # Handle case where type itself is a dict with $ref
544
+ if isinstance(type_name, dict):
545
+ return self.convert_structure_type_to_datapackage_type(type_name, context_schema)
546
+
547
+ if type_name == "array":
548
+ return "array"
549
+ elif type_name == "set":
550
+ return "array" # Sets are represented as arrays in Data Package
551
+ elif type_name == "map":
552
+ return "object"
553
+ elif type_name == "object":
554
+ return "object"
555
+ elif type_name == "choice":
556
+ return "string" # Choices default to string
557
+ elif type_name == "tuple":
558
+ return "array" # Tuples are arrays with fixed structure
559
+ elif type_name:
560
+ return self.map_scalar_type(type_name)
561
+ else:
562
+ return "string"
563
+ elif isinstance(structure_type, str):
564
+ # Check named type cache
565
+ if structure_type in self.named_type_cache:
566
+ return self.convert_structure_type_to_datapackage_type(
567
+ self.named_type_cache[structure_type], context_schema)
568
+ return self.map_scalar_type(structure_type)
569
+
570
+ return "string"
571
+
572
+ def cache_named_types(self, structure_type: JsonNode) -> None:
573
+ """Add an encountered type to the cache of named types."""
574
+ if isinstance(structure_type, list):
575
+ for item in structure_type:
576
+ self.cache_named_types(item)
577
+ elif isinstance(structure_type, dict):
578
+ # Cache this type if it has a name
579
+ if structure_type.get("name"):
580
+ full_name = self.get_fullname(
581
+ str(structure_type.get("namespace", "")),
582
+ str(structure_type.get("name")))
583
+ self.named_type_cache[full_name] = structure_type
584
+
585
+ # Recursively cache types in properties
586
+ if "properties" in structure_type:
587
+ for prop_name, prop_schema in structure_type["properties"].items():
588
+ if isinstance(prop_schema, dict):
589
+ self.cache_named_types(prop_schema)
590
+
591
+ # Recursively cache types in definitions
592
+ if "definitions" in structure_type:
593
+ for def_name, def_schema in structure_type["definitions"].items():
594
+ if isinstance(def_schema, dict):
595
+ self.cache_named_types(def_schema)
596
+
597
+ # Cache types in array items, map values, etc.
598
+ for key in ['items', 'values', 'additionalProperties']:
599
+ if key in structure_type and isinstance(structure_type[key], dict):
600
+ self.cache_named_types(structure_type[key])
601
+
602
+ def map_scalar_type(self, type_name: str) -> str:
603
+ """Map a JSON Structure scalar type to a Data Package scalar type."""
604
+ # JSON Structure Core primitive types mapping
605
+ scalar_type_mapping = {
606
+ # JSON primitive types
607
+ "null": "string",
608
+ "boolean": "boolean",
609
+ "string": "string",
610
+ "number": "number",
611
+ "integer": "integer",
612
+
613
+ # Extended primitive types - integers
614
+ "int8": "integer",
615
+ "uint8": "integer",
616
+ "int16": "integer",
617
+ "uint16": "integer",
618
+ "int32": "integer",
619
+ "uint32": "integer",
620
+ "int64": "integer",
621
+ "uint64": "integer",
622
+ "int128": "integer",
623
+ "uint128": "integer",
624
+
625
+ # Extended primitive types - floats
626
+ "float8": "number",
627
+ "float": "number",
628
+ "double": "number",
629
+ "binary32": "number",
630
+ "binary64": "number",
631
+ "decimal": "number",
632
+
633
+ # Extended primitive types - other
634
+ "binary": "string", # Base64-encoded in JSON
635
+ "date": "date",
636
+ "time": "time",
637
+ "datetime": "datetime",
638
+ "timestamp": "datetime",
639
+ "duration": "duration",
640
+ "uuid": "string",
641
+ "uri": "string",
642
+ "jsonpointer": "string",
643
+
644
+ # Special type
645
+ "any": "any"
646
+ }
647
+ return scalar_type_mapping.get(type_name, "string")
648
+
649
+
650
+ def convert_structure_to_datapackage(structure_schema_path: str,
651
+ structure_record_type: Optional[str],
652
+ datapackage_path: str) -> None:
653
+ """Convert a JSON Structure schema to a Data Package."""
654
+ converter = StructureToDataPackageConverter()
655
+ converter.convert_structure_to_datapackage(structure_schema_path, structure_record_type, datapackage_path)
656
+
657
+
658
+ # Example usage:
659
+ # convert_structure_to_datapackage("schema.struct.json", "MyRecord", "datapackage.json")