string-schema 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,275 @@
1
+ """
2
+ Reverse conversion functions for String Schema
3
+
4
+ This module provides functions to convert from various schema formats back to
5
+ String Schema string syntax, completing the conversion matrix.
6
+
7
+ Conversion Matrix:
8
+ - model_to_string() # Pydantic model → String syntax
9
+ - model_to_json_schema() # Pydantic model → JSON Schema
10
+ - json_schema_to_string() # JSON Schema → String syntax
11
+ - openapi_to_string() # OpenAPI schema → String syntax
12
+ - openapi_to_json_schema() # OpenAPI schema → JSON Schema
13
+
14
+ Note: Some information loss is expected and acceptable when converting
15
+ from more complex formats to simpler string syntax.
16
+ """
17
+
18
+ from typing import Any, Dict, List, Union, Optional, Type
19
+ import logging
20
+
21
+ # Optional pydantic import
22
+ try:
23
+ from pydantic import BaseModel
24
+ HAS_PYDANTIC = True
25
+ except ImportError:
26
+ HAS_PYDANTIC = False
27
+ BaseModel = None
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def model_to_string(model: Type[BaseModel], include_name: bool = False) -> str:
33
+ """
34
+ Convert Pydantic model to String Schema string syntax.
35
+
36
+ Args:
37
+ model: Pydantic model class
38
+ include_name: Whether to include model name in output
39
+
40
+ Returns:
41
+ String representation in String Schema syntax
42
+
43
+ Example:
44
+ UserModel = create_model("name:string, email:email, age:int?")
45
+ schema_str = model_to_string(UserModel)
46
+ # Returns: "name:string, email:email, age:int?"
47
+ """
48
+ if not HAS_PYDANTIC:
49
+ raise ImportError("Pydantic is required for model_to_string. Install with: pip install pydantic")
50
+
51
+ # First convert to JSON Schema, then to string
52
+ json_schema = model_to_json_schema(model)
53
+ return json_schema_to_string(json_schema)
54
+
55
+
56
+ def model_to_json_schema(model: Type[BaseModel]) -> Dict[str, Any]:
57
+ """
58
+ Convert Pydantic model to JSON Schema.
59
+
60
+ Args:
61
+ model: Pydantic model class
62
+
63
+ Returns:
64
+ JSON Schema dictionary
65
+
66
+ Example:
67
+ UserModel = create_model("name:string, email:email")
68
+ json_schema = model_to_json_schema(UserModel)
69
+ """
70
+ if not HAS_PYDANTIC:
71
+ raise ImportError("Pydantic is required for model_to_json_schema. Install with: pip install pydantic")
72
+
73
+ try:
74
+ # Use Pydantic's built-in JSON Schema generation
75
+ if hasattr(model, 'model_json_schema'):
76
+ # Pydantic v2
77
+ return model.model_json_schema()
78
+ elif hasattr(model, 'schema'):
79
+ # Pydantic v1
80
+ return model.schema()
81
+ else:
82
+ raise ValueError(f"Unable to extract JSON Schema from model {model}")
83
+ except Exception as e:
84
+ raise ValueError(f"Failed to convert model to JSON Schema: {str(e)}") from e
85
+
86
+
87
+ def json_schema_to_string(json_schema: Dict[str, Any]) -> str:
88
+ """
89
+ Convert JSON Schema to String Schema string syntax.
90
+
91
+ Args:
92
+ json_schema: JSON Schema dictionary
93
+
94
+ Returns:
95
+ String representation in String Schema syntax
96
+
97
+ Example:
98
+ json_schema = {"type": "object", "properties": {"name": {"type": "string"}}}
99
+ schema_str = json_schema_to_string(json_schema)
100
+ # Returns: "name:string"
101
+ """
102
+ if json_schema.get('type') == 'array':
103
+ return _convert_array_schema_to_string(json_schema)
104
+ elif json_schema.get('type') == 'object':
105
+ return _convert_object_schema_to_string(json_schema)
106
+ else:
107
+ # Single field schema
108
+ return _convert_field_schema_to_string('field', json_schema)
109
+
110
+
111
+ def openapi_to_string(openapi_schema: Dict[str, Any]) -> str:
112
+ """
113
+ Convert OpenAPI schema to String Schema string syntax.
114
+
115
+ Args:
116
+ openapi_schema: OpenAPI schema dictionary
117
+
118
+ Returns:
119
+ String representation in String Schema syntax
120
+
121
+ Example:
122
+ openapi_schema = {"type": "object", "properties": {"name": {"type": "string"}}}
123
+ schema_str = openapi_to_string(openapi_schema)
124
+ # Returns: "name:string"
125
+ """
126
+ # OpenAPI schemas are very similar to JSON Schema
127
+ # First convert to JSON Schema format, then to string
128
+ json_schema = openapi_to_json_schema(openapi_schema)
129
+ return json_schema_to_string(json_schema)
130
+
131
+
132
+ def openapi_to_json_schema(openapi_schema: Dict[str, Any]) -> Dict[str, Any]:
133
+ """
134
+ Convert OpenAPI schema to JSON Schema.
135
+
136
+ Args:
137
+ openapi_schema: OpenAPI schema dictionary
138
+
139
+ Returns:
140
+ JSON Schema dictionary
141
+
142
+ Example:
143
+ openapi_schema = {"type": "string", "format": "email"}
144
+ json_schema = openapi_to_json_schema(openapi_schema)
145
+ """
146
+ # OpenAPI 3.0 schemas are mostly compatible with JSON Schema
147
+ # Just need to handle some OpenAPI-specific keywords
148
+ json_schema = openapi_schema.copy()
149
+
150
+ # Remove OpenAPI-specific keywords that aren't in JSON Schema
151
+ openapi_only_keywords = [
152
+ 'example', 'examples', 'discriminator', 'xml', 'externalDocs'
153
+ ]
154
+
155
+ for keyword in openapi_only_keywords:
156
+ json_schema.pop(keyword, None)
157
+
158
+ # Handle nested properties recursively
159
+ if 'properties' in json_schema:
160
+ for prop_name, prop_schema in json_schema['properties'].items():
161
+ json_schema['properties'][prop_name] = openapi_to_json_schema(prop_schema)
162
+
163
+ # Handle array items
164
+ if 'items' in json_schema:
165
+ json_schema['items'] = openapi_to_json_schema(json_schema['items'])
166
+
167
+ return json_schema
168
+
169
+
170
+ # Helper functions for JSON Schema to string conversion
171
+
172
+ def _convert_object_schema_to_string(json_schema: Dict[str, Any]) -> str:
173
+ """Convert object-type JSON Schema to string syntax."""
174
+ properties = json_schema.get('properties', {})
175
+ required_fields = set(json_schema.get('required', []))
176
+
177
+ if not properties:
178
+ return "{}"
179
+
180
+ field_strings = []
181
+ for field_name, field_schema in properties.items():
182
+ field_str = _convert_field_schema_to_string(field_name, field_schema)
183
+ if field_name not in required_fields:
184
+ field_str += "?"
185
+ field_strings.append(field_str)
186
+
187
+ # Check if this should be wrapped in braces (nested object)
188
+ if len(field_strings) > 1 or any(':' in fs for fs in field_strings):
189
+ return "{" + ", ".join(field_strings) + "}"
190
+ else:
191
+ return ", ".join(field_strings)
192
+
193
+
194
+ def _convert_array_schema_to_string(json_schema: Dict[str, Any]) -> str:
195
+ """Convert array-type JSON Schema to string syntax."""
196
+ items_schema = json_schema.get('items', {})
197
+
198
+ if items_schema.get('type') == 'object':
199
+ # Array of objects
200
+ object_str = _convert_object_schema_to_string(items_schema)
201
+ return f"[{object_str}]"
202
+ else:
203
+ # Array of primitives
204
+ item_type = _get_simple_type_from_json_schema(items_schema)
205
+ return f"[{item_type}]"
206
+
207
+
208
+ def _convert_field_schema_to_string(field_name: str, field_schema: Dict[str, Any]) -> str:
209
+ """Convert a single field's JSON Schema to string syntax."""
210
+ field_type = _get_simple_type_from_json_schema(field_schema)
211
+ constraints = _extract_constraints_from_json_schema(field_schema)
212
+
213
+ if constraints:
214
+ constraint_str = ",".join(f"{k}={v}" for k, v in constraints.items())
215
+ return f"{field_name}:{field_type}({constraint_str})"
216
+ else:
217
+ return f"{field_name}:{field_type}"
218
+
219
+
220
+ def _get_simple_type_from_json_schema(field_schema: Dict[str, Any]) -> str:
221
+ """Extract simple type from JSON Schema field."""
222
+ json_type = field_schema.get('type', 'string')
223
+ format_hint = field_schema.get('format')
224
+
225
+ # Handle special formats
226
+ if format_hint:
227
+ format_mapping = {
228
+ 'email': 'email',
229
+ 'uri': 'url',
230
+ 'url': 'url',
231
+ 'date-time': 'datetime',
232
+ 'date': 'date',
233
+ 'uuid': 'uuid',
234
+ 'phone': 'phone'
235
+ }
236
+ if format_hint in format_mapping:
237
+ return format_mapping[format_hint]
238
+
239
+ # Handle enums
240
+ if 'enum' in field_schema:
241
+ enum_values = field_schema['enum']
242
+ return f"enum({','.join(str(v) for v in enum_values)})"
243
+
244
+ # Handle basic types
245
+ type_mapping = {
246
+ 'string': 'string',
247
+ 'integer': 'int',
248
+ 'number': 'number',
249
+ 'boolean': 'bool'
250
+ }
251
+
252
+ return type_mapping.get(json_type, 'string')
253
+
254
+
255
+ def _extract_constraints_from_json_schema(field_schema: Dict[str, Any]) -> Dict[str, Any]:
256
+ """Extract constraints from JSON Schema field."""
257
+ constraints = {}
258
+
259
+ # String constraints
260
+ if 'minLength' in field_schema:
261
+ constraints['min'] = field_schema['minLength']
262
+ if 'maxLength' in field_schema:
263
+ constraints['max'] = field_schema['maxLength']
264
+
265
+ # Number constraints
266
+ if 'minimum' in field_schema:
267
+ constraints['min'] = field_schema['minimum']
268
+ if 'maximum' in field_schema:
269
+ constraints['max'] = field_schema['maximum']
270
+
271
+ # Pattern constraint
272
+ if 'pattern' in field_schema:
273
+ constraints['pattern'] = f"'{field_schema['pattern']}'"
274
+
275
+ return constraints
@@ -0,0 +1,16 @@
1
+ """
2
+ Parsing module for String Schema
3
+
4
+ Contains functionality for parsing string-based schema definitions.
5
+ """
6
+
7
+ from .string_parser import parse_string_schema, validate_string_schema
8
+ from .syntax import get_string_schema_examples
9
+ from .optimizer import optimize_string_schema
10
+
11
+ __all__ = [
12
+ "parse_string_schema",
13
+ "validate_string_schema",
14
+ "get_string_schema_examples",
15
+ "optimize_string_schema"
16
+ ]
@@ -0,0 +1,246 @@
1
+ """
2
+ Schema optimization utilities for Simple Schema
3
+
4
+ Contains functions for optimizing and improving schema definitions.
5
+ """
6
+
7
+ import re
8
+ from typing import Dict, Any, List, Optional
9
+ import logging
10
+
11
+ from .string_parser import validate_string_schema
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def optimize_string_schema(schema_str: str) -> str:
17
+ """Optimize enhanced schema string for better readability"""
18
+ try:
19
+ validation = validate_string_schema(schema_str)
20
+ if not validation['valid']:
21
+ return schema_str
22
+
23
+ # For now, return the original schema
24
+ # Future: Could implement smart formatting, type inference, etc.
25
+ optimized = _format_schema_string(schema_str)
26
+ return optimized
27
+
28
+ except Exception as e:
29
+ logger.warning(f"Failed to optimize schema string: {e}")
30
+ return schema_str
31
+
32
+
33
+ def _format_schema_string(schema_str: str) -> str:
34
+ """Format schema string for better readability"""
35
+ # Remove extra whitespace
36
+ schema_str = re.sub(r'\s+', ' ', schema_str.strip())
37
+
38
+ # Add spacing around operators
39
+ schema_str = re.sub(r'([:|,])', r'\1 ', schema_str)
40
+ schema_str = re.sub(r'\s+', ' ', schema_str)
41
+
42
+ # Format nested structures
43
+ if '{' in schema_str and '}' in schema_str:
44
+ schema_str = _format_nested_objects(schema_str)
45
+
46
+ return schema_str.strip()
47
+
48
+
49
+ def _format_nested_objects(schema_str: str) -> str:
50
+ """Format nested object structures with proper indentation"""
51
+ # This is a simplified formatter - could be enhanced
52
+ result = ""
53
+ indent_level = 0
54
+
55
+ for char in schema_str:
56
+ if char == '{':
57
+ result += char + '\n' + ' ' * (indent_level + 1)
58
+ indent_level += 1
59
+ elif char == '}':
60
+ indent_level -= 1
61
+ result += '\n' + ' ' * indent_level + char
62
+ elif char == ',':
63
+ result += char + '\n' + ' ' * indent_level
64
+ else:
65
+ result += char
66
+
67
+ return result
68
+
69
+
70
+ def suggest_improvements(schema_str: str) -> List[str]:
71
+ """Suggest improvements for a schema string"""
72
+ suggestions = []
73
+
74
+ try:
75
+ validation = validate_string_schema(schema_str)
76
+
77
+ # Check for missing constraints
78
+ if 'arrays' in validation.get('features_used', []):
79
+ if 'constraints' not in validation.get('features_used', []):
80
+ suggestions.append("Consider adding array size constraints (e.g., [string](max=5)) for better LLM guidance")
81
+
82
+ # Check for missing special types
83
+ if 'email' in schema_str.lower() and 'special_types' not in validation.get('features_used', []):
84
+ suggestions.append("Use 'email' type instead of 'string' for email fields")
85
+
86
+ if 'url' in schema_str.lower() and 'special_types' not in validation.get('features_used', []):
87
+ suggestions.append("Use 'url' type instead of 'string' for URL fields")
88
+
89
+ # Check for overly complex schemas
90
+ field_count = len(validation.get('parsed_fields', {}))
91
+ if field_count > 15:
92
+ suggestions.append("Consider breaking down complex schemas into smaller, focused schemas")
93
+
94
+ # Check for missing optional markers
95
+ if '?' not in schema_str and field_count > 5:
96
+ suggestions.append("Consider marking some fields as optional with '?' for more flexible extraction")
97
+
98
+ # Check for enum opportunities
99
+ if 'status' in schema_str.lower() and 'enums' not in validation.get('features_used', []):
100
+ suggestions.append("Consider using enum for status fields (e.g., status:enum(active,inactive))")
101
+
102
+ except Exception as e:
103
+ logger.warning(f"Failed to generate suggestions: {e}")
104
+
105
+ return suggestions
106
+
107
+
108
+ def simplify_schema(schema_str: str, max_fields: int = 10) -> str:
109
+ """Simplify a complex schema by reducing fields"""
110
+ try:
111
+ validation = validate_string_schema(schema_str)
112
+ parsed_fields = validation.get('parsed_fields', {})
113
+
114
+ if len(parsed_fields) <= max_fields:
115
+ return schema_str
116
+
117
+ # This is a basic implementation - could be enhanced with smarter field selection
118
+ logger.info(f"Schema has {len(parsed_fields)} fields, simplifying to {max_fields}")
119
+
120
+ # For now, just return the original schema with a warning
121
+ return schema_str + " # Note: Consider simplifying this schema"
122
+
123
+ except Exception as e:
124
+ logger.warning(f"Failed to simplify schema: {e}")
125
+ return schema_str
126
+
127
+
128
+ def infer_types(data_sample: Dict[str, Any]) -> str:
129
+ """Infer schema string from a data sample"""
130
+ fields = []
131
+
132
+ for key, value in data_sample.items():
133
+ field_def = _infer_field_type(key, value)
134
+ fields.append(field_def)
135
+
136
+ return ", ".join(fields)
137
+
138
+
139
+ def _infer_field_type(field_name: str, value: Any) -> str:
140
+ """Infer field type from a value"""
141
+ if value is None:
142
+ return f"{field_name}:string?"
143
+
144
+ if isinstance(value, bool):
145
+ return f"{field_name}:bool"
146
+
147
+ if isinstance(value, int):
148
+ return f"{field_name}:int"
149
+
150
+ if isinstance(value, float):
151
+ return f"{field_name}:number"
152
+
153
+ if isinstance(value, str):
154
+ # Try to infer special types
155
+ if '@' in value and '.' in value:
156
+ return f"{field_name}:email"
157
+ elif value.startswith(('http://', 'https://')):
158
+ return f"{field_name}:url"
159
+ elif re.match(r'^\d{4}-\d{2}-\d{2}', value):
160
+ return f"{field_name}:date"
161
+ else:
162
+ return f"{field_name}:string"
163
+
164
+ if isinstance(value, list):
165
+ if not value:
166
+ return f"{field_name}:[string]"
167
+
168
+ # Infer from first item
169
+ first_item = value[0]
170
+ if isinstance(first_item, dict):
171
+ # Object array - this would need more complex inference
172
+ return f"{field_name}:[object]"
173
+ else:
174
+ item_type = _infer_simple_type(first_item)
175
+ return f"{field_name}:[{item_type}]"
176
+
177
+ if isinstance(value, dict):
178
+ # Nested object - would need recursive inference
179
+ return f"{field_name}:object"
180
+
181
+ return f"{field_name}:string"
182
+
183
+
184
+ def _infer_simple_type(value: Any) -> str:
185
+ """Infer simple type from a value"""
186
+ if isinstance(value, bool):
187
+ return "bool"
188
+ elif isinstance(value, int):
189
+ return "int"
190
+ elif isinstance(value, float):
191
+ return "number"
192
+ elif isinstance(value, str):
193
+ if '@' in value and '.' in value:
194
+ return "email"
195
+ elif value.startswith(('http://', 'https://')):
196
+ return "url"
197
+ else:
198
+ return "string"
199
+ else:
200
+ return "string"
201
+
202
+
203
+ def validate_optimization(original: str, optimized: str) -> Dict[str, Any]:
204
+ """Validate that optimization preserves schema semantics"""
205
+ result = {
206
+ 'valid': True,
207
+ 'errors': [],
208
+ 'warnings': [],
209
+ 'changes': []
210
+ }
211
+
212
+ try:
213
+ original_validation = validate_string_schema(original)
214
+ optimized_validation = validate_string_schema(optimized)
215
+
216
+ if not original_validation['valid']:
217
+ result['errors'].append("Original schema is invalid")
218
+
219
+ if not optimized_validation['valid']:
220
+ result['errors'].append("Optimized schema is invalid")
221
+
222
+ # Compare field counts
223
+ orig_fields = len(original_validation.get('parsed_fields', {}))
224
+ opt_fields = len(optimized_validation.get('parsed_fields', {}))
225
+
226
+ if orig_fields != opt_fields:
227
+ result['warnings'].append(f"Field count changed: {orig_fields} → {opt_fields}")
228
+
229
+ # Compare features
230
+ orig_features = set(original_validation.get('features_used', []))
231
+ opt_features = set(optimized_validation.get('features_used', []))
232
+
233
+ if orig_features != opt_features:
234
+ added = opt_features - orig_features
235
+ removed = orig_features - opt_features
236
+
237
+ if added:
238
+ result['changes'].append(f"Added features: {', '.join(added)}")
239
+ if removed:
240
+ result['changes'].append(f"Removed features: {', '.join(removed)}")
241
+
242
+ except Exception as e:
243
+ result['errors'].append(f"Validation error: {str(e)}")
244
+
245
+ result['valid'] = len(result['errors']) == 0
246
+ return result