structurize 3.0.2__tar.gz → 3.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {structurize-3.0.2/structurize.egg-info → structurize-3.1.1}/PKG-INFO +1 -1
  2. {structurize-3.0.2 → structurize-3.1.1}/avrotize/_version.py +3 -3
  3. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotize.py +4 -0
  4. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotots.py +62 -7
  5. structurize-3.1.1/avrotize/avrovalidator.py +518 -0
  6. {structurize-3.0.2 → structurize-3.1.1}/avrotize/commands.json +466 -0
  7. {structurize-3.0.2 → structurize-3.1.1}/avrotize/dependencies/typescript/node22/package.json +1 -1
  8. structurize-3.1.1/avrotize/jsontoschema.py +151 -0
  9. structurize-3.1.1/avrotize/schema_inference.py +825 -0
  10. structurize-3.1.1/avrotize/sqltoavro.py +1159 -0
  11. structurize-3.1.1/avrotize/validate.py +242 -0
  12. structurize-3.1.1/avrotize/xmltoschema.py +122 -0
  13. {structurize-3.0.2 → structurize-3.1.1/structurize.egg-info}/PKG-INFO +1 -1
  14. {structurize-3.0.2 → structurize-3.1.1}/structurize.egg-info/SOURCES.txt +12 -0
  15. {structurize-3.0.2 → structurize-3.1.1}/.gitignore +0 -0
  16. {structurize-3.0.2 → structurize-3.1.1}/LICENSE +0 -0
  17. {structurize-3.0.2 → structurize-3.1.1}/MANIFEST.in +0 -0
  18. {structurize-3.0.2 → structurize-3.1.1}/README.md +0 -0
  19. {structurize-3.0.2 → structurize-3.1.1}/avrotize/__init__.py +0 -0
  20. {structurize-3.0.2 → structurize-3.1.1}/avrotize/__main__.py +0 -0
  21. {structurize-3.0.2 → structurize-3.1.1}/avrotize/asn1toavro.py +0 -0
  22. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotocpp.py +0 -0
  23. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotocsharp.py +0 -0
  24. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotocsv.py +0 -0
  25. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotodatapackage.py +0 -0
  26. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotodb.py +0 -0
  27. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotogo.py +0 -0
  28. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotographql.py +0 -0
  29. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotoiceberg.py +0 -0
  30. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotojava.py +0 -0
  31. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotojs.py +0 -0
  32. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotojsons.py +0 -0
  33. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotojstruct.py +0 -0
  34. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotokusto.py +0 -0
  35. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotomd.py +0 -0
  36. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotools.py +0 -0
  37. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotoparquet.py +0 -0
  38. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotoproto.py +0 -0
  39. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotopython.py +0 -0
  40. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotorust.py +0 -0
  41. {structurize-3.0.2 → structurize-3.1.1}/avrotize/avrotoxsd.py +0 -0
  42. {structurize-3.0.2 → structurize-3.1.1}/avrotize/cddltostructure.py +0 -0
  43. {structurize-3.0.2 → structurize-3.1.1}/avrotize/common.py +0 -0
  44. {structurize-3.0.2 → structurize-3.1.1}/avrotize/constants.py +0 -0
  45. {structurize-3.0.2 → structurize-3.1.1}/avrotize/csvtoavro.py +0 -0
  46. {structurize-3.0.2 → structurize-3.1.1}/avrotize/datapackagetoavro.py +0 -0
  47. {structurize-3.0.2 → structurize-3.1.1}/avrotize/dependencies/cpp/vcpkg/vcpkg.json +0 -0
  48. {structurize-3.0.2 → structurize-3.1.1}/avrotize/dependency_resolver.py +0 -0
  49. {structurize-3.0.2 → structurize-3.1.1}/avrotize/dependency_version.py +0 -0
  50. {structurize-3.0.2 → structurize-3.1.1}/avrotize/jsonstoavro.py +0 -0
  51. {structurize-3.0.2 → structurize-3.1.1}/avrotize/jsonstostructure.py +0 -0
  52. {structurize-3.0.2 → structurize-3.1.1}/avrotize/jstructtoavro.py +0 -0
  53. {structurize-3.0.2 → structurize-3.1.1}/avrotize/kstructtoavro.py +0 -0
  54. {structurize-3.0.2 → structurize-3.1.1}/avrotize/kustotoavro.py +0 -0
  55. {structurize-3.0.2 → structurize-3.1.1}/avrotize/openapitostructure.py +0 -0
  56. {structurize-3.0.2 → structurize-3.1.1}/avrotize/parquettoavro.py +0 -0
  57. {structurize-3.0.2 → structurize-3.1.1}/avrotize/proto2parser.py +0 -0
  58. {structurize-3.0.2 → structurize-3.1.1}/avrotize/proto3parser.py +0 -0
  59. {structurize-3.0.2 → structurize-3.1.1}/avrotize/prototoavro.py +0 -0
  60. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretocddl.py +0 -0
  61. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretocpp.py +0 -0
  62. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretocsharp.py +0 -0
  63. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretocsv.py +0 -0
  64. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretodatapackage.py +0 -0
  65. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretodb.py +0 -0
  66. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretogo.py +0 -0
  67. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretographql.py +0 -0
  68. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretoiceberg.py +0 -0
  69. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretojava.py +0 -0
  70. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretojs.py +0 -0
  71. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretojsons.py +0 -0
  72. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretokusto.py +0 -0
  73. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretomd.py +0 -0
  74. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretoproto.py +0 -0
  75. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretopython.py +0 -0
  76. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretorust.py +0 -0
  77. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretots.py +0 -0
  78. {structurize-3.0.2 → structurize-3.1.1}/avrotize/structuretoxsd.py +0 -0
  79. {structurize-3.0.2 → structurize-3.1.1}/avrotize/xsdtoavro.py +0 -0
  80. {structurize-3.0.2 → structurize-3.1.1}/build.ps1 +0 -0
  81. {structurize-3.0.2 → structurize-3.1.1}/build.sh +0 -0
  82. {structurize-3.0.2 → structurize-3.1.1}/pyproject.toml +0 -0
  83. {structurize-3.0.2 → structurize-3.1.1}/setup.cfg +0 -0
  84. {structurize-3.0.2 → structurize-3.1.1}/structurize.egg-info/dependency_links.txt +0 -0
  85. {structurize-3.0.2 → structurize-3.1.1}/structurize.egg-info/entry_points.txt +0 -0
  86. {structurize-3.0.2 → structurize-3.1.1}/structurize.egg-info/requires.txt +0 -0
  87. {structurize-3.0.2 → structurize-3.1.1}/structurize.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: structurize
3
- Version: 3.0.2
3
+ Version: 3.1.1
4
4
  Summary: Tools to convert from and to JSON Structure from various other schema languages.
5
5
  Author-email: Clemens Vasters <clemensv@microsoft.com>
6
6
  Classifier: Programming Language :: Python :: 3
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '3.0.2'
32
- __version_tuple__ = version_tuple = (3, 0, 2)
31
+ __version__ = version = '3.1.1'
32
+ __version_tuple__ = version_tuple = (3, 1, 1)
33
33
 
34
- __commit_id__ = commit_id = 'g3ee5f8f67'
34
+ __commit_id__ = commit_id = 'ge20c22879'
@@ -38,6 +38,10 @@ def create_subparsers(subparsers, commands):
38
38
  kwargs['choices'] = arg['choices']
39
39
  if 'default' in arg:
40
40
  kwargs['default'] = arg['default']
41
+ # Handle dest for optional arguments only (positional args can't have dest)
42
+ arg_is_positional = not arg['name'].startswith('-')
43
+ if 'dest' in arg and not arg_is_positional:
44
+ kwargs['dest'] = arg['dest']
41
45
  if arg['type'] == 'bool':
42
46
  kwargs['action'] = 'store_true'
43
47
  del kwargs['type']
@@ -500,10 +500,11 @@ class AvroToTypeScript:
500
500
  """Generate TypeScript type declaration file for avro-js module."""
501
501
  avro_js_types = '''declare module 'avro-js' {
502
502
  /**
503
- * Avro Type representation.
503
+ * Avro Type interface.
504
+ * Represents the structure of Type instances returned by avro.parse().
504
505
  * Provides methods for encoding, decoding, and validating Avro data.
505
506
  */
506
- export class Type {
507
+ export interface Type {
507
508
  /**
508
509
  * Encode a value to a Buffer.
509
510
  * @param obj - Value to encode
@@ -575,12 +576,66 @@ class AvroToTypeScript:
575
576
  }
576
577
 
577
578
  /**
578
- * Parse an Avro schema and return a Type instance.
579
- * @param schema - Schema as string or object
580
- * @param options - Parse options
581
- * @returns Type instance
579
+ * avro-js default export interface.
580
+ * This module is CommonJS, so in ESM context it only has a default export.
582
581
  */
583
- export function parse(schema: string | any, options?: any): Type;
582
+ export interface Avro {
583
+ /**
584
+ * Type class constructor.
585
+ */
586
+ Type: any;
587
+
588
+ /**
589
+ * Parse an Avro schema and return a Type instance.
590
+ * @param schema - Schema as string or object
591
+ * @param options - Parse options
592
+ * @returns Type instance
593
+ */
594
+ parse(schema: string | any, options?: any): Type;
595
+
596
+ /**
597
+ * Protocol class constructor.
598
+ */
599
+ Protocol: any;
600
+
601
+ /**
602
+ * Create a file decoder.
603
+ */
604
+ createFileDecoder(path: string, options?: any): any;
605
+
606
+ /**
607
+ * Create a file encoder.
608
+ */
609
+ createFileEncoder(path: string, schema: any, options?: any): any;
610
+
611
+ /**
612
+ * Extract file header.
613
+ */
614
+ extractFileHeader(buffer: Buffer): any;
615
+
616
+ /**
617
+ * Streams utilities.
618
+ */
619
+ streams: any;
620
+
621
+ /**
622
+ * Built-in types.
623
+ */
624
+ types: any;
625
+
626
+ /**
627
+ * Validator (deprecated).
628
+ */
629
+ Validator: any;
630
+
631
+ /**
632
+ * ProtocolValidator (deprecated).
633
+ */
634
+ ProtocolValidator: any;
635
+ }
636
+
637
+ const avro: Avro;
638
+ export default avro;
584
639
  }
585
640
  '''
586
641
 
@@ -0,0 +1,518 @@
1
+ """Validates JSON instances against Avro schemas.
2
+
3
+ This module implements JSON validation against Avro schemas according to
4
+ the Avrotize Schema Specification (avrotize-schema.md). It validates:
5
+ - Primitive types: null, boolean, int, long, float, double, bytes, string
6
+ - Logical types: decimal, uuid, date, time, timestamp, duration
7
+ - Complex types: record, enum, array, map, fixed
8
+ - Type unions
9
+ """
10
+
11
+ import base64
12
+ import re
13
+ from typing import Any, Dict, List, Tuple, Union
14
+
15
+ # Type alias for Avro schema
16
+ AvroSchema = Union[str, Dict[str, Any], List[Any]]
17
+
18
+
19
+ class AvroValidationError(Exception):
20
+ """Exception raised when JSON instance doesn't match Avro schema."""
21
+
22
+ def __init__(self, message: str, path: str = "#"):
23
+ self.message = message
24
+ self.path = path
25
+ super().__init__(f"{message} at {path}")
26
+
27
+
28
+ class AvroValidator:
29
+ """Validates JSON instances against Avro schemas."""
30
+
31
+ # RFC 3339 patterns for logical type validation
32
+ UUID_PATTERN = re.compile(
33
+ r'^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$'
34
+ )
35
+ DATE_PATTERN = re.compile(r'^\d{4}-\d{2}-\d{2}$')
36
+ TIME_PATTERN = re.compile(r'^\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?$')
37
+ DATETIME_PATTERN = re.compile(
38
+ r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?([+-]\d{2}:\d{2}|Z)?$'
39
+ )
40
+ DURATION_PATTERN = re.compile(
41
+ r'^P(\d+Y)?(\d+M)?(\d+D)?(T(\d+H)?(\d+M)?(\d+(\.\d+)?S)?)?$'
42
+ )
43
+ DECIMAL_PATTERN = re.compile(r'^[+-]?\d+(\.\d+)?$')
44
+
45
+ # Int32 and Int64 bounds
46
+ INT32_MIN = -(2**31)
47
+ INT32_MAX = 2**31 - 1
48
+ INT64_MIN = -(2**63)
49
+ INT64_MAX = 2**63 - 1
50
+
51
+ def __init__(self, schema: AvroSchema):
52
+ """Initialize the validator with an Avro schema.
53
+
54
+ Args:
55
+ schema: The Avro schema to validate against
56
+ """
57
+ self.schema = schema
58
+ self.named_types: Dict[str, Dict[str, Any]] = {}
59
+ self._collect_named_types(schema, '')
60
+
61
+ def _collect_named_types(self, schema: AvroSchema, current_namespace: str) -> None:
62
+ """Collects all named types from the schema for reference resolution.
63
+
64
+ Args:
65
+ schema: The schema to scan
66
+ current_namespace: The current namespace context
67
+ """
68
+ if isinstance(schema, dict):
69
+ schema_type = schema.get('type')
70
+ if schema_type in ('record', 'enum', 'fixed'):
71
+ namespace = schema.get('namespace', current_namespace)
72
+ name = schema.get('name', '')
73
+ if namespace:
74
+ fullname = f"{namespace}.{name}"
75
+ else:
76
+ fullname = name
77
+ self.named_types[fullname] = schema
78
+ self.named_types[name] = schema # Also store short name
79
+
80
+ # Recurse into record fields
81
+ if schema_type == 'record':
82
+ for field in schema.get('fields', []):
83
+ self._collect_named_types(field.get('type', 'null'), namespace)
84
+
85
+ elif schema_type == 'array':
86
+ self._collect_named_types(schema.get('items', 'null'), current_namespace)
87
+ elif schema_type == 'map':
88
+ self._collect_named_types(schema.get('values', 'null'), current_namespace)
89
+
90
+ elif isinstance(schema, list):
91
+ # Type union
92
+ for item in schema:
93
+ self._collect_named_types(item, current_namespace)
94
+
95
+ def validate(self, instance: Any) -> None:
96
+ """Validates a JSON instance against the schema.
97
+
98
+ Args:
99
+ instance: The JSON value to validate
100
+
101
+ Raises:
102
+ AvroValidationError: If the instance doesn't match the schema
103
+ """
104
+ self._validate(instance, self.schema, "#")
105
+
106
+ def _validate(self, instance: Any, schema: AvroSchema, path: str) -> None:
107
+ """Internal validation method.
108
+
109
+ Args:
110
+ instance: The JSON value to validate
111
+ schema: The schema to validate against
112
+ path: JSON pointer path for error messages
113
+
114
+ Raises:
115
+ AvroValidationError: If validation fails
116
+ """
117
+ if isinstance(schema, str):
118
+ self._validate_primitive_or_reference(instance, schema, path)
119
+ elif isinstance(schema, dict):
120
+ self._validate_complex(instance, schema, path)
121
+ elif isinstance(schema, list):
122
+ self._validate_union(instance, schema, path)
123
+ else:
124
+ raise AvroValidationError(f"Invalid schema type: {type(schema)}", path)
125
+
126
+ def _validate_primitive_or_reference(
127
+ self, instance: Any, schema: str, path: str
128
+ ) -> None:
129
+ """Validates against a primitive type or named type reference.
130
+
131
+ Args:
132
+ instance: The JSON value to validate
133
+ schema: The primitive type name or named type reference
134
+ path: JSON pointer path for error messages
135
+ """
136
+ # Check if it's a named type reference
137
+ if schema in self.named_types:
138
+ self._validate_complex(instance, self.named_types[schema], path)
139
+ return
140
+
141
+ # Primitive type validation
142
+ if schema == 'null':
143
+ if instance is not None:
144
+ raise AvroValidationError(f"Expected null, got {type(instance).__name__}", path)
145
+
146
+ elif schema == 'boolean':
147
+ if not isinstance(instance, bool):
148
+ raise AvroValidationError(f"Expected boolean, got {type(instance).__name__}", path)
149
+
150
+ elif schema == 'int':
151
+ if not isinstance(instance, int) or isinstance(instance, bool):
152
+ raise AvroValidationError(f"Expected int, got {type(instance).__name__}", path)
153
+ if not (self.INT32_MIN <= instance <= self.INT32_MAX):
154
+ raise AvroValidationError(
155
+ f"Integer {instance} out of int32 range [{self.INT32_MIN}, {self.INT32_MAX}]",
156
+ path
157
+ )
158
+
159
+ elif schema == 'long':
160
+ if not isinstance(instance, int) or isinstance(instance, bool):
161
+ raise AvroValidationError(f"Expected long, got {type(instance).__name__}", path)
162
+ if not (self.INT64_MIN <= instance <= self.INT64_MAX):
163
+ raise AvroValidationError(
164
+ f"Integer {instance} out of int64 range [{self.INT64_MIN}, {self.INT64_MAX}]",
165
+ path
166
+ )
167
+
168
+ elif schema == 'float':
169
+ if not isinstance(instance, (int, float)) or isinstance(instance, bool):
170
+ raise AvroValidationError(f"Expected float, got {type(instance).__name__}", path)
171
+
172
+ elif schema == 'double':
173
+ if not isinstance(instance, (int, float)) or isinstance(instance, bool):
174
+ raise AvroValidationError(f"Expected double, got {type(instance).__name__}", path)
175
+
176
+ elif schema == 'bytes':
177
+ # In JSON, bytes are represented as strings with unicode escapes
178
+ if not isinstance(instance, str):
179
+ raise AvroValidationError(f"Expected bytes (string), got {type(instance).__name__}", path)
180
+
181
+ elif schema == 'string':
182
+ if not isinstance(instance, str):
183
+ raise AvroValidationError(f"Expected string, got {type(instance).__name__}", path)
184
+
185
+ else:
186
+ raise AvroValidationError(f"Unknown primitive type: {schema}", path)
187
+
188
+ def _validate_complex(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
189
+ """Validates against a complex type schema.
190
+
191
+ Args:
192
+ instance: The JSON value to validate
193
+ schema: The complex type schema
194
+ path: JSON pointer path for error messages
195
+ """
196
+ schema_type = schema.get('type')
197
+
198
+ if schema_type == 'record':
199
+ self._validate_record(instance, schema, path)
200
+ elif schema_type == 'enum':
201
+ self._validate_enum(instance, schema, path)
202
+ elif schema_type == 'array':
203
+ self._validate_array(instance, schema, path)
204
+ elif schema_type == 'map':
205
+ self._validate_map(instance, schema, path)
206
+ elif schema_type == 'fixed':
207
+ self._validate_fixed(instance, schema, path)
208
+ elif schema_type in ('null', 'boolean', 'int', 'long', 'float', 'double', 'bytes', 'string'):
209
+ # Complex form of primitive type, possibly with logical type
210
+ logical_type = schema.get('logicalType')
211
+ if logical_type:
212
+ self._validate_logical_type(instance, schema, logical_type, path)
213
+ else:
214
+ self._validate_primitive_or_reference(instance, schema_type, path)
215
+ else:
216
+ raise AvroValidationError(f"Unknown complex type: {schema_type}", path)
217
+
218
+ def _validate_record(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
219
+ """Validates a record type.
220
+
221
+ Args:
222
+ instance: The JSON value to validate
223
+ schema: The record schema
224
+ path: JSON pointer path for error messages
225
+ """
226
+ if not isinstance(instance, dict):
227
+ raise AvroValidationError(
228
+ f"Expected object for record '{schema.get('name')}', got {type(instance).__name__}",
229
+ path
230
+ )
231
+
232
+ fields = schema.get('fields', [])
233
+ field_names = set()
234
+
235
+ for field in fields:
236
+ field_name = field.get('name')
237
+ field_names.add(field_name)
238
+
239
+ # Check for altnames (JSON encoding)
240
+ json_name = field_name
241
+ altnames = field.get('altnames', {})
242
+ if 'json' in altnames:
243
+ json_name = altnames['json']
244
+
245
+ if json_name in instance:
246
+ field_path = f"{path}/{json_name}"
247
+ self._validate(instance[json_name], field.get('type', 'null'), field_path)
248
+ elif field_name in instance:
249
+ field_path = f"{path}/{field_name}"
250
+ self._validate(instance[field_name], field.get('type', 'null'), field_path)
251
+ elif 'default' not in field:
252
+ # Check if the field type allows null
253
+ field_type = field.get('type', 'null')
254
+ if not self._type_allows_null(field_type):
255
+ raise AvroValidationError(
256
+ f"Missing required field '{field_name}'",
257
+ path
258
+ )
259
+
260
+ def _type_allows_null(self, schema: AvroSchema) -> bool:
261
+ """Check if a type allows null values."""
262
+ if schema == 'null':
263
+ return True
264
+ if isinstance(schema, list):
265
+ return 'null' in schema or any(
266
+ (isinstance(s, dict) and s.get('type') == 'null') for s in schema
267
+ )
268
+ return False
269
+
270
+ def _validate_enum(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
271
+ """Validates an enum type.
272
+
273
+ Args:
274
+ instance: The JSON value to validate
275
+ schema: The enum schema
276
+ path: JSON pointer path for error messages
277
+ """
278
+ if not isinstance(instance, str):
279
+ raise AvroValidationError(
280
+ f"Expected string for enum '{schema.get('name')}', got {type(instance).__name__}",
281
+ path
282
+ )
283
+
284
+ symbols = schema.get('symbols', [])
285
+
286
+ # Check direct symbol match
287
+ if instance in symbols:
288
+ return
289
+
290
+ # Check altsymbols for JSON encoding
291
+ altsymbols = schema.get('altsymbols', {}).get('json', {})
292
+ for symbol, alt_value in altsymbols.items():
293
+ if instance == alt_value:
294
+ return
295
+
296
+ raise AvroValidationError(
297
+ f"'{instance}' is not a valid symbol for enum '{schema.get('name')}'. Valid symbols: {symbols}",
298
+ path
299
+ )
300
+
301
+ def _validate_array(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
302
+ """Validates an array type.
303
+
304
+ Args:
305
+ instance: The JSON value to validate
306
+ schema: The array schema
307
+ path: JSON pointer path for error messages
308
+ """
309
+ if not isinstance(instance, list):
310
+ raise AvroValidationError(
311
+ f"Expected array, got {type(instance).__name__}",
312
+ path
313
+ )
314
+
315
+ items_schema = schema.get('items', 'null')
316
+ for i, item in enumerate(instance):
317
+ item_path = f"{path}/{i}"
318
+ self._validate(item, items_schema, item_path)
319
+
320
+ def _validate_map(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
321
+ """Validates a map type.
322
+
323
+ Args:
324
+ instance: The JSON value to validate
325
+ schema: The map schema
326
+ path: JSON pointer path for error messages
327
+ """
328
+ if not isinstance(instance, dict):
329
+ raise AvroValidationError(
330
+ f"Expected object for map, got {type(instance).__name__}",
331
+ path
332
+ )
333
+
334
+ values_schema = schema.get('values', 'null')
335
+ for key, value in instance.items():
336
+ if not isinstance(key, str):
337
+ raise AvroValidationError(
338
+ f"Map keys must be strings, got {type(key).__name__}",
339
+ path
340
+ )
341
+ value_path = f"{path}/{key}"
342
+ self._validate(value, values_schema, value_path)
343
+
344
+ def _validate_fixed(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
345
+ """Validates a fixed type.
346
+
347
+ Args:
348
+ instance: The JSON value to validate
349
+ schema: The fixed schema
350
+ path: JSON pointer path for error messages
351
+ """
352
+ if not isinstance(instance, str):
353
+ raise AvroValidationError(
354
+ f"Expected string for fixed '{schema.get('name')}', got {type(instance).__name__}",
355
+ path
356
+ )
357
+
358
+ size = schema.get('size', 0)
359
+ # In JSON, fixed values are represented as unicode escape sequences
360
+ # Each byte is represented as a unicode character
361
+ if len(instance) != size:
362
+ raise AvroValidationError(
363
+ f"Fixed '{schema.get('name')}' requires exactly {size} bytes, got {len(instance)}",
364
+ path
365
+ )
366
+
367
+ def _validate_union(self, instance: Any, schema: List[Any], path: str) -> None:
368
+ """Validates against a type union.
369
+
370
+ Args:
371
+ instance: The JSON value to validate
372
+ schema: The union schema (list of types)
373
+ path: JSON pointer path for error messages
374
+ """
375
+ errors = []
376
+ for union_type in schema:
377
+ try:
378
+ self._validate(instance, union_type, path)
379
+ return # Validation succeeded for this type
380
+ except AvroValidationError as e:
381
+ errors.append(str(e))
382
+
383
+ # None of the union types matched
384
+ type_names = [self._get_type_name(t) for t in schema]
385
+ raise AvroValidationError(
386
+ f"Value doesn't match any type in union {type_names}",
387
+ path
388
+ )
389
+
390
+ def _get_type_name(self, schema: AvroSchema) -> str:
391
+ """Gets a human-readable name for a schema type."""
392
+ if isinstance(schema, str):
393
+ return schema
394
+ elif isinstance(schema, dict):
395
+ schema_type = schema.get('type', 'unknown')
396
+ name = schema.get('name')
397
+ if name:
398
+ return f"{schema_type}:{name}"
399
+ return schema_type
400
+ elif isinstance(schema, list):
401
+ return f"union[{', '.join(self._get_type_name(t) for t in schema)}]"
402
+ return 'unknown'
403
+
404
+ def _validate_logical_type(
405
+ self, instance: Any, schema: Dict[str, Any], logical_type: str, path: str
406
+ ) -> None:
407
+ """Validates a logical type.
408
+
409
+ Args:
410
+ instance: The JSON value to validate
411
+ schema: The schema with logical type
412
+ logical_type: The logical type name
413
+ path: JSON pointer path for error messages
414
+ """
415
+ base_type = schema.get('type')
416
+
417
+ if logical_type == 'decimal':
418
+ self._validate_decimal(instance, schema, path)
419
+
420
+ elif logical_type == 'uuid':
421
+ if base_type != 'string':
422
+ raise AvroValidationError(f"uuid logical type requires string base type", path)
423
+ if not isinstance(instance, str):
424
+ raise AvroValidationError(f"Expected string for uuid, got {type(instance).__name__}", path)
425
+ if not self.UUID_PATTERN.match(instance):
426
+ raise AvroValidationError(f"Invalid UUID format: {instance}", path)
427
+
428
+ elif logical_type == 'date':
429
+ if base_type == 'int':
430
+ if not isinstance(instance, int) or isinstance(instance, bool):
431
+ raise AvroValidationError(f"Expected int for date, got {type(instance).__name__}", path)
432
+ elif base_type == 'string':
433
+ if not isinstance(instance, str):
434
+ raise AvroValidationError(f"Expected string for date, got {type(instance).__name__}", path)
435
+ if not self.DATE_PATTERN.match(instance):
436
+ raise AvroValidationError(f"Invalid date format (expected YYYY-MM-DD): {instance}", path)
437
+ else:
438
+ raise AvroValidationError(f"date logical type requires int or string base type", path)
439
+
440
+ elif logical_type in ('time-millis', 'time-micros'):
441
+ if base_type in ('int', 'long'):
442
+ if not isinstance(instance, int) or isinstance(instance, bool):
443
+ raise AvroValidationError(f"Expected int for {logical_type}, got {type(instance).__name__}", path)
444
+ elif base_type == 'string':
445
+ if not isinstance(instance, str):
446
+ raise AvroValidationError(f"Expected string for {logical_type}, got {type(instance).__name__}", path)
447
+ if not self.TIME_PATTERN.match(instance):
448
+ raise AvroValidationError(f"Invalid time format: {instance}", path)
449
+ else:
450
+ raise AvroValidationError(f"{logical_type} logical type requires int, long, or string base type", path)
451
+
452
+ elif logical_type in ('timestamp-millis', 'timestamp-micros', 'local-timestamp-millis', 'local-timestamp-micros'):
453
+ if base_type == 'long':
454
+ if not isinstance(instance, int) or isinstance(instance, bool):
455
+ raise AvroValidationError(f"Expected long for {logical_type}, got {type(instance).__name__}", path)
456
+ elif base_type == 'string':
457
+ if not isinstance(instance, str):
458
+ raise AvroValidationError(f"Expected string for {logical_type}, got {type(instance).__name__}", path)
459
+ if not self.DATETIME_PATTERN.match(instance):
460
+ raise AvroValidationError(f"Invalid datetime format: {instance}", path)
461
+ else:
462
+ raise AvroValidationError(f"{logical_type} logical type requires long or string base type", path)
463
+
464
+ elif logical_type == 'duration':
465
+ if base_type == 'fixed':
466
+ self._validate_fixed(instance, schema, path)
467
+ elif base_type == 'string':
468
+ if not isinstance(instance, str):
469
+ raise AvroValidationError(f"Expected string for duration, got {type(instance).__name__}", path)
470
+ if not self.DURATION_PATTERN.match(instance):
471
+ raise AvroValidationError(f"Invalid duration format: {instance}", path)
472
+ else:
473
+ raise AvroValidationError(f"duration logical type requires fixed or string base type", path)
474
+
475
+ else:
476
+ # Unknown logical type - fall back to base type validation
477
+ self._validate_primitive_or_reference(instance, base_type, path)
478
+
479
+ def _validate_decimal(self, instance: Any, schema: Dict[str, Any], path: str) -> None:
480
+ """Validates a decimal logical type.
481
+
482
+ Args:
483
+ instance: The JSON value to validate
484
+ schema: The decimal schema
485
+ path: JSON pointer path for error messages
486
+ """
487
+ base_type = schema.get('type')
488
+
489
+ if base_type == 'string':
490
+ if not isinstance(instance, str):
491
+ raise AvroValidationError(f"Expected string for decimal, got {type(instance).__name__}", path)
492
+ if not self.DECIMAL_PATTERN.match(instance):
493
+ raise AvroValidationError(f"Invalid decimal format: {instance}", path)
494
+ elif base_type == 'bytes':
495
+ if not isinstance(instance, str):
496
+ raise AvroValidationError(f"Expected bytes (string) for decimal, got {type(instance).__name__}", path)
497
+ elif base_type == 'fixed':
498
+ self._validate_fixed(instance, schema, path)
499
+ else:
500
+ raise AvroValidationError(f"decimal logical type requires bytes, fixed, or string base type", path)
501
+
502
+
503
+ def validate_json_against_avro(instance: Any, schema: AvroSchema) -> List[str]:
504
+ """Validates a JSON instance against an Avro schema.
505
+
506
+ Args:
507
+ instance: The JSON value to validate
508
+ schema: The Avro schema
509
+
510
+ Returns:
511
+ List of validation error messages (empty if valid)
512
+ """
513
+ validator = AvroValidator(schema)
514
+ try:
515
+ validator.validate(instance)
516
+ return []
517
+ except AvroValidationError as e:
518
+ return [str(e)]