structurize 3.2.0__tar.gz → 3.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {structurize-3.2.0/structurize.egg-info → structurize-3.2.1}/PKG-INFO +1 -1
  2. {structurize-3.2.0 → structurize-3.2.1}/avrotize/_version.py +3 -3
  3. {structurize-3.2.0 → structurize-3.2.1}/avrotize/schema_inference.py +148 -24
  4. {structurize-3.2.0 → structurize-3.2.1/structurize.egg-info}/PKG-INFO +1 -1
  5. {structurize-3.2.0 → structurize-3.2.1}/.gitignore +0 -0
  6. {structurize-3.2.0 → structurize-3.2.1}/LICENSE +0 -0
  7. {structurize-3.2.0 → structurize-3.2.1}/MANIFEST.in +0 -0
  8. {structurize-3.2.0 → structurize-3.2.1}/README.md +0 -0
  9. {structurize-3.2.0 → structurize-3.2.1}/avrotize/__init__.py +0 -0
  10. {structurize-3.2.0 → structurize-3.2.1}/avrotize/__main__.py +0 -0
  11. {structurize-3.2.0 → structurize-3.2.1}/avrotize/asn1toavro.py +0 -0
  12. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotize.py +0 -0
  13. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotocpp.py +0 -0
  14. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotocsharp.py +0 -0
  15. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotocsv.py +0 -0
  16. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotodatapackage.py +0 -0
  17. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotodb.py +0 -0
  18. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotogo.py +0 -0
  19. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotographql.py +0 -0
  20. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotoiceberg.py +0 -0
  21. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotojava.py +0 -0
  22. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotojs.py +0 -0
  23. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotojsons.py +0 -0
  24. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotojstruct.py +0 -0
  25. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotokusto.py +0 -0
  26. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotomd.py +0 -0
  27. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotools.py +0 -0
  28. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotoparquet.py +0 -0
  29. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotoproto.py +0 -0
  30. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotopython.py +0 -0
  31. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotorust.py +0 -0
  32. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotots.py +0 -0
  33. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrotoxsd.py +0 -0
  34. {structurize-3.2.0 → structurize-3.2.1}/avrotize/avrovalidator.py +0 -0
  35. {structurize-3.2.0 → structurize-3.2.1}/avrotize/cddltostructure.py +0 -0
  36. {structurize-3.2.0 → structurize-3.2.1}/avrotize/choice_inference.py +0 -0
  37. {structurize-3.2.0 → structurize-3.2.1}/avrotize/commands.json +0 -0
  38. {structurize-3.2.0 → structurize-3.2.1}/avrotize/common.py +0 -0
  39. {structurize-3.2.0 → structurize-3.2.1}/avrotize/constants.py +0 -0
  40. {structurize-3.2.0 → structurize-3.2.1}/avrotize/csvtoavro.py +0 -0
  41. {structurize-3.2.0 → structurize-3.2.1}/avrotize/datapackagetoavro.py +0 -0
  42. {structurize-3.2.0 → structurize-3.2.1}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
  43. {structurize-3.2.0 → structurize-3.2.1}/avrotize/dependencies/typescript/node22/package.json +0 -0
  44. {structurize-3.2.0 → structurize-3.2.1}/avrotize/dependency_resolver.py +0 -0
  45. {structurize-3.2.0 → structurize-3.2.1}/avrotize/dependency_version.py +0 -0
  46. {structurize-3.2.0 → structurize-3.2.1}/avrotize/jsonstoavro.py +0 -0
  47. {structurize-3.2.0 → structurize-3.2.1}/avrotize/jsonstostructure.py +0 -0
  48. {structurize-3.2.0 → structurize-3.2.1}/avrotize/jsontoschema.py +0 -0
  49. {structurize-3.2.0 → structurize-3.2.1}/avrotize/jstructtoavro.py +0 -0
  50. {structurize-3.2.0 → structurize-3.2.1}/avrotize/kstructtoavro.py +0 -0
  51. {structurize-3.2.0 → structurize-3.2.1}/avrotize/kustotoavro.py +0 -0
  52. {structurize-3.2.0 → structurize-3.2.1}/avrotize/openapitostructure.py +0 -0
  53. {structurize-3.2.0 → structurize-3.2.1}/avrotize/parquettoavro.py +0 -0
  54. {structurize-3.2.0 → structurize-3.2.1}/avrotize/proto2parser.py +0 -0
  55. {structurize-3.2.0 → structurize-3.2.1}/avrotize/proto3parser.py +0 -0
  56. {structurize-3.2.0 → structurize-3.2.1}/avrotize/prototoavro.py +0 -0
  57. {structurize-3.2.0 → structurize-3.2.1}/avrotize/sqltoavro.py +0 -0
  58. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretocddl.py +0 -0
  59. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretocpp.py +0 -0
  60. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretocsharp.py +0 -0
  61. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretocsv.py +0 -0
  62. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretodatapackage.py +0 -0
  63. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretodb.py +0 -0
  64. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretogo.py +0 -0
  65. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretographql.py +0 -0
  66. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretoiceberg.py +0 -0
  67. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretojava.py +0 -0
  68. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretojs.py +0 -0
  69. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretojsons.py +0 -0
  70. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretokusto.py +0 -0
  71. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretomd.py +0 -0
  72. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretoproto.py +0 -0
  73. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretopython.py +0 -0
  74. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretorust.py +0 -0
  75. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretots.py +0 -0
  76. {structurize-3.2.0 → structurize-3.2.1}/avrotize/structuretoxsd.py +0 -0
  77. {structurize-3.2.0 → structurize-3.2.1}/avrotize/validate.py +0 -0
  78. {structurize-3.2.0 → structurize-3.2.1}/avrotize/xmltoschema.py +0 -0
  79. {structurize-3.2.0 → structurize-3.2.1}/avrotize/xsdtoavro.py +0 -0
  80. {structurize-3.2.0 → structurize-3.2.1}/build.ps1 +0 -0
  81. {structurize-3.2.0 → structurize-3.2.1}/build.sh +0 -0
  82. {structurize-3.2.0 → structurize-3.2.1}/pyproject.toml +0 -0
  83. {structurize-3.2.0 → structurize-3.2.1}/setup.cfg +0 -0
  84. {structurize-3.2.0 → structurize-3.2.1}/structurize.egg-info/SOURCES.txt +0 -0
  85. {structurize-3.2.0 → structurize-3.2.1}/structurize.egg-info/dependency_links.txt +0 -0
  86. {structurize-3.2.0 → structurize-3.2.1}/structurize.egg-info/entry_points.txt +0 -0
  87. {structurize-3.2.0 → structurize-3.2.1}/structurize.egg-info/requires.txt +0 -0
  88. {structurize-3.2.0 → structurize-3.2.1}/structurize.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structurize
3
- Version: 3.2.0
3
+ Version: 3.2.1
4
4
  Summary: Tools to convert from and to JSON Structure from various other schema languages.
5
5
  Author-email: Clemens Vasters <clemensv@microsoft.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.2.0'
32
- __version_tuple__ = version_tuple = (3, 2, 0)
31
+ __version__ = version = '3.2.1'
32
+ __version_tuple__ = version_tuple = (3, 2, 1)
33
33
 
34
- __commit_id__ = commit_id = 'gb9324402c'
34
+ __commit_id__ = commit_id = 'gfc8429a20'
@@ -648,6 +648,22 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
648
648
  if python_value is None:
649
649
  return "null"
650
650
 
651
+ # Handle integers with proper range detection
652
+ # bool is subclass of int in Python, so check bool first
653
+ if isinstance(python_value, bool):
654
+ return "boolean"
655
+
656
+ if isinstance(python_value, int):
657
+ # Check if value fits in int32 range
658
+ if -2147483648 <= python_value <= 2147483647:
659
+ return "integer" # int32 alias
660
+ else:
661
+ # Per JSON Structure spec, int64 values are string-encoded
662
+ # Since we're inferring from JSON native numbers (which can't exceed
663
+ # double precision ~2^53), use 'double' for large integers from JSON
664
+ # This allows validation of the source data as-is
665
+ return "double"
666
+
651
667
  if isinstance(python_value, dict):
652
668
  # Generate an object type
653
669
  safe_name = avro_name(type_name.rsplit('.', 1)[-1])
@@ -691,7 +707,16 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
691
707
  items = item_types[0]
692
708
  else:
693
709
  # Use choice for multiple item types
694
- items = {"type": "choice", "choices": item_types}
710
+ # choices must be a map with type names as keys
711
+ choices_map: Dict[str, Any] = {}
712
+ for it in item_types:
713
+ if isinstance(it, str):
714
+ choices_map[it] = {"type": it}
715
+ elif isinstance(it, dict):
716
+ # For object types, use name if available
717
+ name = it.get("name", f"type{len(choices_map)}")
718
+ choices_map[name] = it
719
+ items = {"type": "choice", "choices": choices_map}
695
720
  else:
696
721
  items = {"type": "string"}
697
722
 
@@ -808,7 +833,15 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
808
833
  if len(item_types) == 1:
809
834
  list_types.append({"type": "array", "items": item_types[0]})
810
835
  else:
811
- list_types.append({"type": "array", "items": {"type": "choice", "choices": item_types}})
836
+ # Build choices map from item types
837
+ choices_map: Dict[str, Any] = {}
838
+ for it in item_types:
839
+ if isinstance(it, str):
840
+ choices_map[it] = {"type": it}
841
+ elif isinstance(it, dict):
842
+ name = it.get("name", f"type{len(choices_map)}")
843
+ choices_map[name] = it
844
+ list_types.append({"type": "array", "items": {"type": "choice", "choices": choices_map}})
812
845
 
813
846
  value_types: List[Any] = []
814
847
  for item3 in map_types:
@@ -944,9 +977,14 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
944
977
 
945
978
  if field_name == parent_field:
946
979
  if len(variant_types) > 1:
980
+ # Build choices as a map (object) per JSON Structure spec
981
+ # Each value is a schema directly (the object type definition)
982
+ choices_map: Dict[str, Any] = {}
983
+ for vt in variant_types:
984
+ choices_map[vt["name"]] = vt
947
985
  envelope_properties[safe_name] = {
948
986
  "type": "choice",
949
- "choices": variant_types
987
+ "choices": choices_map
950
988
  }
951
989
  else:
952
990
  envelope_properties[safe_name] = variant_types[0] if variant_types else {"type": "object"}
@@ -975,9 +1013,13 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
975
1013
 
976
1014
  return envelope_record
977
1015
 
978
- # Handle top-level discriminated union
1016
+ # Handle top-level discriminated union as inline union
1017
+ # Inline unions match the actual instance format where the discriminator
1018
+ # is a property value, not a key wrapper (tagged union)
979
1019
  if result.discriminator_field:
980
- variant_types = []
1020
+ # Collect all fields from all variants to find common vs variant-specific
1021
+ all_variant_fields: Dict[str, Set[str]] = {} # variant_value -> field names
1022
+ variant_docs: Dict[str, Dict[str, Any]] = {} # variant_value -> sample doc
981
1023
 
982
1024
  for value in sorted(result.discriminator_values):
983
1025
  cluster_docs = [c for c in result.clusters
@@ -986,27 +1028,98 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
986
1028
  if not cluster_docs:
987
1029
  continue
988
1030
  cluster = cluster_docs[0]
1031
+ all_variant_fields[value] = set(cluster.merged_signature)
1032
+ variant_docs[value] = cluster.documents[0].data if cluster.documents else {}
1033
+
1034
+ if not all_variant_fields:
1035
+ return None
1036
+
1037
+ # Find common fields (present in ALL variants)
1038
+ common_fields = set.intersection(*all_variant_fields.values()) if all_variant_fields else set()
1039
+
1040
+ # Build abstract base type with common fields
1041
+ base_name = avro_name(type_name) + "Base"
1042
+ base_properties: Dict[str, Any] = {}
1043
+ base_required: List[str] = []
1044
+
1045
+ # Use first variant's doc for type inference of common fields
1046
+ first_value = sorted(result.discriminator_values)[0]
1047
+ rep_doc = variant_docs.get(first_value, {})
1048
+
1049
+ for field_name in sorted(common_fields):
1050
+ safe_name = avro_name(field_name)
1051
+ field_value = rep_doc.get(field_name)
1052
+ field_type = self.python_type_to_jstruct_type(f"{type_name}.{safe_name}", field_value)
1053
+
1054
+ if isinstance(field_type, str):
1055
+ base_properties[safe_name] = {"type": field_type}
1056
+ else:
1057
+ base_properties[safe_name] = field_type
1058
+
1059
+ if field_name != safe_name:
1060
+ base_properties[safe_name]["altnames"] = {self.altnames_key: field_name}
1061
+
1062
+ # Check if required in all clusters
1063
+ # Note: discriminator field is NOT required as it's handled by selector
1064
+ all_required = all(
1065
+ field_name in c.required_fields
1066
+ for c in result.clusters if c.merged_signature
1067
+ )
1068
+ if all_required and field_name != result.discriminator_field:
1069
+ base_required.append(safe_name)
1070
+
1071
+ base_type: Dict[str, Any] = {
1072
+ "abstract": True,
1073
+ "type": "object",
1074
+ "name": base_name,
1075
+ "properties": base_properties
1076
+ }
1077
+ if base_required:
1078
+ base_type["required"] = base_required
1079
+
1080
+ # Build variant types that extend the base
1081
+ definitions: Dict[str, Any] = {base_name: base_type}
1082
+ choices_map: Dict[str, Any] = {}
1083
+
1084
+ for value in sorted(result.discriminator_values):
1085
+ if value not in all_variant_fields:
1086
+ continue
989
1087
 
1088
+ # Type name is PascalCase for definitions
990
1089
  variant_name = avro_name(''.join(word.capitalize() for word in value.replace('_', ' ').split()))
991
- variant_doc = cluster.documents[0].data if cluster.documents else {}
1090
+ # Choice key must match actual selector value in instances
1091
+ choice_key = value
1092
+
1093
+ variant_doc = variant_docs.get(value, {})
1094
+ variant_specific = all_variant_fields[value] - common_fields
1095
+
1096
+ # Get cluster with all documents for this variant
1097
+ cluster_for_variant = next(
1098
+ (c for c in result.clusters
1099
+ if any(d.field_values.get(result.discriminator_field) == value
1100
+ for d in c.documents)),
1101
+ None
1102
+ )
992
1103
 
993
1104
  properties: Dict[str, Any] = {}
994
1105
  required: List[str] = []
995
1106
 
996
- # Add discriminator field with default
997
- disc_safe = avro_name(result.discriminator_field)
998
- properties[disc_safe] = {
999
- "type": "string",
1000
- "default": value
1001
- }
1002
- required.append(disc_safe)
1003
-
1004
- # Add other fields
1005
- for field_name in sorted(cluster.merged_signature):
1006
- if field_name == result.discriminator_field:
1007
- continue
1107
+ # Add variant-specific fields only (common fields inherited from base)
1108
+ for field_name in sorted(variant_specific):
1008
1109
  safe_name = avro_name(field_name)
1009
- field_value = variant_doc.get(field_name)
1110
+
1111
+ # Find the first non-null value across all documents in this variant
1112
+ # to properly infer the type
1113
+ field_value = None
1114
+ if cluster_for_variant:
1115
+ for doc in cluster_for_variant.documents:
1116
+ val = doc.data.get(field_name)
1117
+ if val is not None:
1118
+ field_value = val
1119
+ break
1120
+ if field_value is None:
1121
+ field_value = variant_doc.get(field_name)
1122
+
1010
1123
  field_type = self.python_type_to_jstruct_type(f"{type_name}.{safe_name}", field_value)
1011
1124
 
1012
1125
  if isinstance(field_type, str):
@@ -1017,25 +1130,36 @@ class JsonStructureSchemaInferrer(SchemaInferrer):
1017
1130
  if field_name != safe_name:
1018
1131
  properties[safe_name]["altnames"] = {self.altnames_key: field_name}
1019
1132
 
1020
- if field_name in cluster.required_fields:
1133
+ # Field is required only if present in all documents of this variant
1134
+ if cluster_for_variant and field_name in cluster_for_variant.required_fields:
1021
1135
  required.append(safe_name)
1022
1136
 
1023
1137
  variant_record: Dict[str, Any] = {
1024
1138
  "type": "object",
1025
1139
  "name": variant_name,
1140
+ "$extends": f"#/definitions/{base_name}",
1026
1141
  "properties": properties
1027
1142
  }
1028
1143
  if required:
1029
1144
  variant_record["required"] = required
1030
- variant_types.append(variant_record)
1145
+
1146
+ definitions[variant_name] = variant_record
1147
+ # Use actual discriminator value as choice key (must match selector in instances)
1148
+ choices_map[choice_key] = {"type": {"$ref": f"#/definitions/{variant_name}"}}
1031
1149
 
1032
- if len(variant_types) == 1:
1033
- return variant_types[0]
1150
+ if len(choices_map) == 1:
1151
+ # Single variant - just return it directly
1152
+ return list(definitions.values())[1] # Skip base, return the variant
1034
1153
 
1154
+ # Build inline union choice type
1155
+ disc_safe = avro_name(result.discriminator_field)
1035
1156
  return {
1036
1157
  "type": "choice",
1037
1158
  "name": avro_name(type_name),
1038
- "choices": variant_types
1159
+ "$extends": f"#/definitions/{base_name}",
1160
+ "selector": disc_safe,
1161
+ "choices": choices_map,
1162
+ "definitions": definitions
1039
1163
  }
1040
1164
 
1041
1165
  # Undiscriminated union - fall back to standard inference
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structurize
3
- Version: 3.2.0
3
+ Version: 3.2.1
4
4
  Summary: Tools to convert from and to JSON Structure from various other schema languages.
5
5
  Author-email: Clemens Vasters <clemensv@microsoft.com>
6
6
  Classifier: Programming Language :: Python :: 3
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes