string-schema 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- string_schema/core/__init__.py +23 -0
- string_schema/core/builders.py +244 -0
- string_schema/core/fields.py +138 -0
- string_schema/core/validators.py +242 -0
- string_schema/examples/__init__.py +36 -0
- string_schema/examples/presets.py +345 -0
- string_schema/examples/recipes.py +380 -0
- string_schema/integrations/__init__.py +15 -0
- string_schema/integrations/json_schema.py +385 -0
- string_schema/integrations/openapi.py +484 -0
- string_schema/integrations/pydantic.py +662 -0
- string_schema/integrations/reverse.py +275 -0
- string_schema/parsing/__init__.py +16 -0
- string_schema/parsing/optimizer.py +246 -0
- string_schema/parsing/string_parser.py +703 -0
- string_schema/parsing/syntax.py +250 -0
- {string_schema-0.1.2.dist-info → string_schema-0.1.3.dist-info}/METADATA +1 -1
- string_schema-0.1.3.dist-info/RECORD +24 -0
- string_schema-0.1.2.dist-info/RECORD +0 -8
- {string_schema-0.1.2.dist-info → string_schema-0.1.3.dist-info}/WHEEL +0 -0
- {string_schema-0.1.2.dist-info → string_schema-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {string_schema-0.1.2.dist-info → string_schema-0.1.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Reverse conversion functions for String Schema
|
|
3
|
+
|
|
4
|
+
This module provides functions to convert from various schema formats back to
|
|
5
|
+
String Schema string syntax, completing the conversion matrix.
|
|
6
|
+
|
|
7
|
+
Conversion Matrix:
|
|
8
|
+
- model_to_string() # Pydantic model → String syntax
|
|
9
|
+
- model_to_json_schema() # Pydantic model → JSON Schema
|
|
10
|
+
- json_schema_to_string() # JSON Schema → String syntax
|
|
11
|
+
- openapi_to_string() # OpenAPI schema → String syntax
|
|
12
|
+
- openapi_to_json_schema() # OpenAPI schema → JSON Schema
|
|
13
|
+
|
|
14
|
+
Note: Some information loss is expected and acceptable when converting
|
|
15
|
+
from more complex formats to simpler string syntax.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Any, Dict, List, Union, Optional, Type
|
|
19
|
+
import logging
|
|
20
|
+
|
|
21
|
+
# Optional pydantic import
|
|
22
|
+
try:
|
|
23
|
+
from pydantic import BaseModel
|
|
24
|
+
HAS_PYDANTIC = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
HAS_PYDANTIC = False
|
|
27
|
+
BaseModel = None
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def model_to_string(model: Type[BaseModel], include_name: bool = False) -> str:
|
|
33
|
+
"""
|
|
34
|
+
Convert Pydantic model to String Schema string syntax.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
model: Pydantic model class
|
|
38
|
+
include_name: Whether to include model name in output
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
String representation in String Schema syntax
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
UserModel = create_model("name:string, email:email, age:int?")
|
|
45
|
+
schema_str = model_to_string(UserModel)
|
|
46
|
+
# Returns: "name:string, email:email, age:int?"
|
|
47
|
+
"""
|
|
48
|
+
if not HAS_PYDANTIC:
|
|
49
|
+
raise ImportError("Pydantic is required for model_to_string. Install with: pip install pydantic")
|
|
50
|
+
|
|
51
|
+
# First convert to JSON Schema, then to string
|
|
52
|
+
json_schema = model_to_json_schema(model)
|
|
53
|
+
return json_schema_to_string(json_schema)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def model_to_json_schema(model: Type[BaseModel]) -> Dict[str, Any]:
|
|
57
|
+
"""
|
|
58
|
+
Convert Pydantic model to JSON Schema.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
model: Pydantic model class
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
JSON Schema dictionary
|
|
65
|
+
|
|
66
|
+
Example:
|
|
67
|
+
UserModel = create_model("name:string, email:email")
|
|
68
|
+
json_schema = model_to_json_schema(UserModel)
|
|
69
|
+
"""
|
|
70
|
+
if not HAS_PYDANTIC:
|
|
71
|
+
raise ImportError("Pydantic is required for model_to_json_schema. Install with: pip install pydantic")
|
|
72
|
+
|
|
73
|
+
try:
|
|
74
|
+
# Use Pydantic's built-in JSON Schema generation
|
|
75
|
+
if hasattr(model, 'model_json_schema'):
|
|
76
|
+
# Pydantic v2
|
|
77
|
+
return model.model_json_schema()
|
|
78
|
+
elif hasattr(model, 'schema'):
|
|
79
|
+
# Pydantic v1
|
|
80
|
+
return model.schema()
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError(f"Unable to extract JSON Schema from model {model}")
|
|
83
|
+
except Exception as e:
|
|
84
|
+
raise ValueError(f"Failed to convert model to JSON Schema: {str(e)}") from e
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def json_schema_to_string(json_schema: Dict[str, Any]) -> str:
|
|
88
|
+
"""
|
|
89
|
+
Convert JSON Schema to String Schema string syntax.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
json_schema: JSON Schema dictionary
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
String representation in String Schema syntax
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
json_schema = {"type": "object", "properties": {"name": {"type": "string"}}}
|
|
99
|
+
schema_str = json_schema_to_string(json_schema)
|
|
100
|
+
# Returns: "name:string"
|
|
101
|
+
"""
|
|
102
|
+
if json_schema.get('type') == 'array':
|
|
103
|
+
return _convert_array_schema_to_string(json_schema)
|
|
104
|
+
elif json_schema.get('type') == 'object':
|
|
105
|
+
return _convert_object_schema_to_string(json_schema)
|
|
106
|
+
else:
|
|
107
|
+
# Single field schema
|
|
108
|
+
return _convert_field_schema_to_string('field', json_schema)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def openapi_to_string(openapi_schema: Dict[str, Any]) -> str:
|
|
112
|
+
"""
|
|
113
|
+
Convert OpenAPI schema to String Schema string syntax.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
openapi_schema: OpenAPI schema dictionary
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
String representation in String Schema syntax
|
|
120
|
+
|
|
121
|
+
Example:
|
|
122
|
+
openapi_schema = {"type": "object", "properties": {"name": {"type": "string"}}}
|
|
123
|
+
schema_str = openapi_to_string(openapi_schema)
|
|
124
|
+
# Returns: "name:string"
|
|
125
|
+
"""
|
|
126
|
+
# OpenAPI schemas are very similar to JSON Schema
|
|
127
|
+
# First convert to JSON Schema format, then to string
|
|
128
|
+
json_schema = openapi_to_json_schema(openapi_schema)
|
|
129
|
+
return json_schema_to_string(json_schema)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def openapi_to_json_schema(openapi_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
133
|
+
"""
|
|
134
|
+
Convert OpenAPI schema to JSON Schema.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
openapi_schema: OpenAPI schema dictionary
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
JSON Schema dictionary
|
|
141
|
+
|
|
142
|
+
Example:
|
|
143
|
+
openapi_schema = {"type": "string", "format": "email"}
|
|
144
|
+
json_schema = openapi_to_json_schema(openapi_schema)
|
|
145
|
+
"""
|
|
146
|
+
# OpenAPI 3.0 schemas are mostly compatible with JSON Schema
|
|
147
|
+
# Just need to handle some OpenAPI-specific keywords
|
|
148
|
+
json_schema = openapi_schema.copy()
|
|
149
|
+
|
|
150
|
+
# Remove OpenAPI-specific keywords that aren't in JSON Schema
|
|
151
|
+
openapi_only_keywords = [
|
|
152
|
+
'example', 'examples', 'discriminator', 'xml', 'externalDocs'
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
for keyword in openapi_only_keywords:
|
|
156
|
+
json_schema.pop(keyword, None)
|
|
157
|
+
|
|
158
|
+
# Handle nested properties recursively
|
|
159
|
+
if 'properties' in json_schema:
|
|
160
|
+
for prop_name, prop_schema in json_schema['properties'].items():
|
|
161
|
+
json_schema['properties'][prop_name] = openapi_to_json_schema(prop_schema)
|
|
162
|
+
|
|
163
|
+
# Handle array items
|
|
164
|
+
if 'items' in json_schema:
|
|
165
|
+
json_schema['items'] = openapi_to_json_schema(json_schema['items'])
|
|
166
|
+
|
|
167
|
+
return json_schema
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# Helper functions for JSON Schema to string conversion
|
|
171
|
+
|
|
172
|
+
def _convert_object_schema_to_string(json_schema: Dict[str, Any]) -> str:
|
|
173
|
+
"""Convert object-type JSON Schema to string syntax."""
|
|
174
|
+
properties = json_schema.get('properties', {})
|
|
175
|
+
required_fields = set(json_schema.get('required', []))
|
|
176
|
+
|
|
177
|
+
if not properties:
|
|
178
|
+
return "{}"
|
|
179
|
+
|
|
180
|
+
field_strings = []
|
|
181
|
+
for field_name, field_schema in properties.items():
|
|
182
|
+
field_str = _convert_field_schema_to_string(field_name, field_schema)
|
|
183
|
+
if field_name not in required_fields:
|
|
184
|
+
field_str += "?"
|
|
185
|
+
field_strings.append(field_str)
|
|
186
|
+
|
|
187
|
+
# Check if this should be wrapped in braces (nested object)
|
|
188
|
+
if len(field_strings) > 1 or any(':' in fs for fs in field_strings):
|
|
189
|
+
return "{" + ", ".join(field_strings) + "}"
|
|
190
|
+
else:
|
|
191
|
+
return ", ".join(field_strings)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _convert_array_schema_to_string(json_schema: Dict[str, Any]) -> str:
|
|
195
|
+
"""Convert array-type JSON Schema to string syntax."""
|
|
196
|
+
items_schema = json_schema.get('items', {})
|
|
197
|
+
|
|
198
|
+
if items_schema.get('type') == 'object':
|
|
199
|
+
# Array of objects
|
|
200
|
+
object_str = _convert_object_schema_to_string(items_schema)
|
|
201
|
+
return f"[{object_str}]"
|
|
202
|
+
else:
|
|
203
|
+
# Array of primitives
|
|
204
|
+
item_type = _get_simple_type_from_json_schema(items_schema)
|
|
205
|
+
return f"[{item_type}]"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _convert_field_schema_to_string(field_name: str, field_schema: Dict[str, Any]) -> str:
|
|
209
|
+
"""Convert a single field's JSON Schema to string syntax."""
|
|
210
|
+
field_type = _get_simple_type_from_json_schema(field_schema)
|
|
211
|
+
constraints = _extract_constraints_from_json_schema(field_schema)
|
|
212
|
+
|
|
213
|
+
if constraints:
|
|
214
|
+
constraint_str = ",".join(f"{k}={v}" for k, v in constraints.items())
|
|
215
|
+
return f"{field_name}:{field_type}({constraint_str})"
|
|
216
|
+
else:
|
|
217
|
+
return f"{field_name}:{field_type}"
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _get_simple_type_from_json_schema(field_schema: Dict[str, Any]) -> str:
|
|
221
|
+
"""Extract simple type from JSON Schema field."""
|
|
222
|
+
json_type = field_schema.get('type', 'string')
|
|
223
|
+
format_hint = field_schema.get('format')
|
|
224
|
+
|
|
225
|
+
# Handle special formats
|
|
226
|
+
if format_hint:
|
|
227
|
+
format_mapping = {
|
|
228
|
+
'email': 'email',
|
|
229
|
+
'uri': 'url',
|
|
230
|
+
'url': 'url',
|
|
231
|
+
'date-time': 'datetime',
|
|
232
|
+
'date': 'date',
|
|
233
|
+
'uuid': 'uuid',
|
|
234
|
+
'phone': 'phone'
|
|
235
|
+
}
|
|
236
|
+
if format_hint in format_mapping:
|
|
237
|
+
return format_mapping[format_hint]
|
|
238
|
+
|
|
239
|
+
# Handle enums
|
|
240
|
+
if 'enum' in field_schema:
|
|
241
|
+
enum_values = field_schema['enum']
|
|
242
|
+
return f"enum({','.join(str(v) for v in enum_values)})"
|
|
243
|
+
|
|
244
|
+
# Handle basic types
|
|
245
|
+
type_mapping = {
|
|
246
|
+
'string': 'string',
|
|
247
|
+
'integer': 'int',
|
|
248
|
+
'number': 'number',
|
|
249
|
+
'boolean': 'bool'
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return type_mapping.get(json_type, 'string')
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _extract_constraints_from_json_schema(field_schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
256
|
+
"""Extract constraints from JSON Schema field."""
|
|
257
|
+
constraints = {}
|
|
258
|
+
|
|
259
|
+
# String constraints
|
|
260
|
+
if 'minLength' in field_schema:
|
|
261
|
+
constraints['min'] = field_schema['minLength']
|
|
262
|
+
if 'maxLength' in field_schema:
|
|
263
|
+
constraints['max'] = field_schema['maxLength']
|
|
264
|
+
|
|
265
|
+
# Number constraints
|
|
266
|
+
if 'minimum' in field_schema:
|
|
267
|
+
constraints['min'] = field_schema['minimum']
|
|
268
|
+
if 'maximum' in field_schema:
|
|
269
|
+
constraints['max'] = field_schema['maximum']
|
|
270
|
+
|
|
271
|
+
# Pattern constraint
|
|
272
|
+
if 'pattern' in field_schema:
|
|
273
|
+
constraints['pattern'] = f"'{field_schema['pattern']}'"
|
|
274
|
+
|
|
275
|
+
return constraints
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Parsing module for String Schema
|
|
3
|
+
|
|
4
|
+
Contains functionality for parsing string-based schema definitions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .string_parser import parse_string_schema, validate_string_schema
|
|
8
|
+
from .syntax import get_string_schema_examples
|
|
9
|
+
from .optimizer import optimize_string_schema
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"parse_string_schema",
|
|
13
|
+
"validate_string_schema",
|
|
14
|
+
"get_string_schema_examples",
|
|
15
|
+
"optimize_string_schema"
|
|
16
|
+
]
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema optimization utilities for Simple Schema
|
|
3
|
+
|
|
4
|
+
Contains functions for optimizing and improving schema definitions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
from typing import Dict, Any, List, Optional
|
|
9
|
+
import logging
|
|
10
|
+
|
|
11
|
+
from .string_parser import validate_string_schema
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def optimize_string_schema(schema_str: str) -> str:
|
|
17
|
+
"""Optimize enhanced schema string for better readability"""
|
|
18
|
+
try:
|
|
19
|
+
validation = validate_string_schema(schema_str)
|
|
20
|
+
if not validation['valid']:
|
|
21
|
+
return schema_str
|
|
22
|
+
|
|
23
|
+
# For now, return the original schema
|
|
24
|
+
# Future: Could implement smart formatting, type inference, etc.
|
|
25
|
+
optimized = _format_schema_string(schema_str)
|
|
26
|
+
return optimized
|
|
27
|
+
|
|
28
|
+
except Exception as e:
|
|
29
|
+
logger.warning(f"Failed to optimize schema string: {e}")
|
|
30
|
+
return schema_str
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _format_schema_string(schema_str: str) -> str:
|
|
34
|
+
"""Format schema string for better readability"""
|
|
35
|
+
# Remove extra whitespace
|
|
36
|
+
schema_str = re.sub(r'\s+', ' ', schema_str.strip())
|
|
37
|
+
|
|
38
|
+
# Add spacing around operators
|
|
39
|
+
schema_str = re.sub(r'([:|,])', r'\1 ', schema_str)
|
|
40
|
+
schema_str = re.sub(r'\s+', ' ', schema_str)
|
|
41
|
+
|
|
42
|
+
# Format nested structures
|
|
43
|
+
if '{' in schema_str and '}' in schema_str:
|
|
44
|
+
schema_str = _format_nested_objects(schema_str)
|
|
45
|
+
|
|
46
|
+
return schema_str.strip()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _format_nested_objects(schema_str: str) -> str:
|
|
50
|
+
"""Format nested object structures with proper indentation"""
|
|
51
|
+
# This is a simplified formatter - could be enhanced
|
|
52
|
+
result = ""
|
|
53
|
+
indent_level = 0
|
|
54
|
+
|
|
55
|
+
for char in schema_str:
|
|
56
|
+
if char == '{':
|
|
57
|
+
result += char + '\n' + ' ' * (indent_level + 1)
|
|
58
|
+
indent_level += 1
|
|
59
|
+
elif char == '}':
|
|
60
|
+
indent_level -= 1
|
|
61
|
+
result += '\n' + ' ' * indent_level + char
|
|
62
|
+
elif char == ',':
|
|
63
|
+
result += char + '\n' + ' ' * indent_level
|
|
64
|
+
else:
|
|
65
|
+
result += char
|
|
66
|
+
|
|
67
|
+
return result
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def suggest_improvements(schema_str: str) -> List[str]:
|
|
71
|
+
"""Suggest improvements for a schema string"""
|
|
72
|
+
suggestions = []
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
validation = validate_string_schema(schema_str)
|
|
76
|
+
|
|
77
|
+
# Check for missing constraints
|
|
78
|
+
if 'arrays' in validation.get('features_used', []):
|
|
79
|
+
if 'constraints' not in validation.get('features_used', []):
|
|
80
|
+
suggestions.append("Consider adding array size constraints (e.g., [string](max=5)) for better LLM guidance")
|
|
81
|
+
|
|
82
|
+
# Check for missing special types
|
|
83
|
+
if 'email' in schema_str.lower() and 'special_types' not in validation.get('features_used', []):
|
|
84
|
+
suggestions.append("Use 'email' type instead of 'string' for email fields")
|
|
85
|
+
|
|
86
|
+
if 'url' in schema_str.lower() and 'special_types' not in validation.get('features_used', []):
|
|
87
|
+
suggestions.append("Use 'url' type instead of 'string' for URL fields")
|
|
88
|
+
|
|
89
|
+
# Check for overly complex schemas
|
|
90
|
+
field_count = len(validation.get('parsed_fields', {}))
|
|
91
|
+
if field_count > 15:
|
|
92
|
+
suggestions.append("Consider breaking down complex schemas into smaller, focused schemas")
|
|
93
|
+
|
|
94
|
+
# Check for missing optional markers
|
|
95
|
+
if '?' not in schema_str and field_count > 5:
|
|
96
|
+
suggestions.append("Consider marking some fields as optional with '?' for more flexible extraction")
|
|
97
|
+
|
|
98
|
+
# Check for enum opportunities
|
|
99
|
+
if 'status' in schema_str.lower() and 'enums' not in validation.get('features_used', []):
|
|
100
|
+
suggestions.append("Consider using enum for status fields (e.g., status:enum(active,inactive))")
|
|
101
|
+
|
|
102
|
+
except Exception as e:
|
|
103
|
+
logger.warning(f"Failed to generate suggestions: {e}")
|
|
104
|
+
|
|
105
|
+
return suggestions
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def simplify_schema(schema_str: str, max_fields: int = 10) -> str:
|
|
109
|
+
"""Simplify a complex schema by reducing fields"""
|
|
110
|
+
try:
|
|
111
|
+
validation = validate_string_schema(schema_str)
|
|
112
|
+
parsed_fields = validation.get('parsed_fields', {})
|
|
113
|
+
|
|
114
|
+
if len(parsed_fields) <= max_fields:
|
|
115
|
+
return schema_str
|
|
116
|
+
|
|
117
|
+
# This is a basic implementation - could be enhanced with smarter field selection
|
|
118
|
+
logger.info(f"Schema has {len(parsed_fields)} fields, simplifying to {max_fields}")
|
|
119
|
+
|
|
120
|
+
# For now, just return the original schema with a warning
|
|
121
|
+
return schema_str + " # Note: Consider simplifying this schema"
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.warning(f"Failed to simplify schema: {e}")
|
|
125
|
+
return schema_str
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def infer_types(data_sample: Dict[str, Any]) -> str:
|
|
129
|
+
"""Infer schema string from a data sample"""
|
|
130
|
+
fields = []
|
|
131
|
+
|
|
132
|
+
for key, value in data_sample.items():
|
|
133
|
+
field_def = _infer_field_type(key, value)
|
|
134
|
+
fields.append(field_def)
|
|
135
|
+
|
|
136
|
+
return ", ".join(fields)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _infer_field_type(field_name: str, value: Any) -> str:
|
|
140
|
+
"""Infer field type from a value"""
|
|
141
|
+
if value is None:
|
|
142
|
+
return f"{field_name}:string?"
|
|
143
|
+
|
|
144
|
+
if isinstance(value, bool):
|
|
145
|
+
return f"{field_name}:bool"
|
|
146
|
+
|
|
147
|
+
if isinstance(value, int):
|
|
148
|
+
return f"{field_name}:int"
|
|
149
|
+
|
|
150
|
+
if isinstance(value, float):
|
|
151
|
+
return f"{field_name}:number"
|
|
152
|
+
|
|
153
|
+
if isinstance(value, str):
|
|
154
|
+
# Try to infer special types
|
|
155
|
+
if '@' in value and '.' in value:
|
|
156
|
+
return f"{field_name}:email"
|
|
157
|
+
elif value.startswith(('http://', 'https://')):
|
|
158
|
+
return f"{field_name}:url"
|
|
159
|
+
elif re.match(r'^\d{4}-\d{2}-\d{2}', value):
|
|
160
|
+
return f"{field_name}:date"
|
|
161
|
+
else:
|
|
162
|
+
return f"{field_name}:string"
|
|
163
|
+
|
|
164
|
+
if isinstance(value, list):
|
|
165
|
+
if not value:
|
|
166
|
+
return f"{field_name}:[string]"
|
|
167
|
+
|
|
168
|
+
# Infer from first item
|
|
169
|
+
first_item = value[0]
|
|
170
|
+
if isinstance(first_item, dict):
|
|
171
|
+
# Object array - this would need more complex inference
|
|
172
|
+
return f"{field_name}:[object]"
|
|
173
|
+
else:
|
|
174
|
+
item_type = _infer_simple_type(first_item)
|
|
175
|
+
return f"{field_name}:[{item_type}]"
|
|
176
|
+
|
|
177
|
+
if isinstance(value, dict):
|
|
178
|
+
# Nested object - would need recursive inference
|
|
179
|
+
return f"{field_name}:object"
|
|
180
|
+
|
|
181
|
+
return f"{field_name}:string"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _infer_simple_type(value: Any) -> str:
|
|
185
|
+
"""Infer simple type from a value"""
|
|
186
|
+
if isinstance(value, bool):
|
|
187
|
+
return "bool"
|
|
188
|
+
elif isinstance(value, int):
|
|
189
|
+
return "int"
|
|
190
|
+
elif isinstance(value, float):
|
|
191
|
+
return "number"
|
|
192
|
+
elif isinstance(value, str):
|
|
193
|
+
if '@' in value and '.' in value:
|
|
194
|
+
return "email"
|
|
195
|
+
elif value.startswith(('http://', 'https://')):
|
|
196
|
+
return "url"
|
|
197
|
+
else:
|
|
198
|
+
return "string"
|
|
199
|
+
else:
|
|
200
|
+
return "string"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def validate_optimization(original: str, optimized: str) -> Dict[str, Any]:
|
|
204
|
+
"""Validate that optimization preserves schema semantics"""
|
|
205
|
+
result = {
|
|
206
|
+
'valid': True,
|
|
207
|
+
'errors': [],
|
|
208
|
+
'warnings': [],
|
|
209
|
+
'changes': []
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
original_validation = validate_string_schema(original)
|
|
214
|
+
optimized_validation = validate_string_schema(optimized)
|
|
215
|
+
|
|
216
|
+
if not original_validation['valid']:
|
|
217
|
+
result['errors'].append("Original schema is invalid")
|
|
218
|
+
|
|
219
|
+
if not optimized_validation['valid']:
|
|
220
|
+
result['errors'].append("Optimized schema is invalid")
|
|
221
|
+
|
|
222
|
+
# Compare field counts
|
|
223
|
+
orig_fields = len(original_validation.get('parsed_fields', {}))
|
|
224
|
+
opt_fields = len(optimized_validation.get('parsed_fields', {}))
|
|
225
|
+
|
|
226
|
+
if orig_fields != opt_fields:
|
|
227
|
+
result['warnings'].append(f"Field count changed: {orig_fields} → {opt_fields}")
|
|
228
|
+
|
|
229
|
+
# Compare features
|
|
230
|
+
orig_features = set(original_validation.get('features_used', []))
|
|
231
|
+
opt_features = set(optimized_validation.get('features_used', []))
|
|
232
|
+
|
|
233
|
+
if orig_features != opt_features:
|
|
234
|
+
added = opt_features - orig_features
|
|
235
|
+
removed = orig_features - opt_features
|
|
236
|
+
|
|
237
|
+
if added:
|
|
238
|
+
result['changes'].append(f"Added features: {', '.join(added)}")
|
|
239
|
+
if removed:
|
|
240
|
+
result['changes'].append(f"Removed features: {', '.join(removed)}")
|
|
241
|
+
|
|
242
|
+
except Exception as e:
|
|
243
|
+
result['errors'].append(f"Validation error: {str(e)}")
|
|
244
|
+
|
|
245
|
+
result['valid'] = len(result['errors']) == 0
|
|
246
|
+
return result
|