stratifyai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. cli/__init__.py +5 -0
  2. cli/stratifyai_cli.py +1753 -0
  3. stratifyai/__init__.py +113 -0
  4. stratifyai/api_key_helper.py +372 -0
  5. stratifyai/caching.py +279 -0
  6. stratifyai/chat/__init__.py +54 -0
  7. stratifyai/chat/builder.py +366 -0
  8. stratifyai/chat/stratifyai_anthropic.py +194 -0
  9. stratifyai/chat/stratifyai_bedrock.py +200 -0
  10. stratifyai/chat/stratifyai_deepseek.py +194 -0
  11. stratifyai/chat/stratifyai_google.py +194 -0
  12. stratifyai/chat/stratifyai_grok.py +194 -0
  13. stratifyai/chat/stratifyai_groq.py +195 -0
  14. stratifyai/chat/stratifyai_ollama.py +201 -0
  15. stratifyai/chat/stratifyai_openai.py +209 -0
  16. stratifyai/chat/stratifyai_openrouter.py +201 -0
  17. stratifyai/chunking.py +158 -0
  18. stratifyai/client.py +292 -0
  19. stratifyai/config.py +1273 -0
  20. stratifyai/cost_tracker.py +257 -0
  21. stratifyai/embeddings.py +245 -0
  22. stratifyai/exceptions.py +91 -0
  23. stratifyai/models.py +59 -0
  24. stratifyai/providers/__init__.py +5 -0
  25. stratifyai/providers/anthropic.py +330 -0
  26. stratifyai/providers/base.py +183 -0
  27. stratifyai/providers/bedrock.py +634 -0
  28. stratifyai/providers/deepseek.py +39 -0
  29. stratifyai/providers/google.py +39 -0
  30. stratifyai/providers/grok.py +39 -0
  31. stratifyai/providers/groq.py +39 -0
  32. stratifyai/providers/ollama.py +43 -0
  33. stratifyai/providers/openai.py +344 -0
  34. stratifyai/providers/openai_compatible.py +372 -0
  35. stratifyai/providers/openrouter.py +39 -0
  36. stratifyai/py.typed +2 -0
  37. stratifyai/rag.py +381 -0
  38. stratifyai/retry.py +185 -0
  39. stratifyai/router.py +643 -0
  40. stratifyai/summarization.py +179 -0
  41. stratifyai/utils/__init__.py +11 -0
  42. stratifyai/utils/bedrock_validator.py +136 -0
  43. stratifyai/utils/code_extractor.py +327 -0
  44. stratifyai/utils/csv_extractor.py +197 -0
  45. stratifyai/utils/file_analyzer.py +192 -0
  46. stratifyai/utils/json_extractor.py +219 -0
  47. stratifyai/utils/log_extractor.py +267 -0
  48. stratifyai/utils/model_selector.py +324 -0
  49. stratifyai/utils/provider_validator.py +442 -0
  50. stratifyai/utils/token_counter.py +186 -0
  51. stratifyai/vectordb.py +344 -0
  52. stratifyai-0.1.0.dist-info/METADATA +263 -0
  53. stratifyai-0.1.0.dist-info/RECORD +57 -0
  54. stratifyai-0.1.0.dist-info/WHEEL +5 -0
  55. stratifyai-0.1.0.dist-info/entry_points.txt +2 -0
  56. stratifyai-0.1.0.dist-info/licenses/LICENSE +21 -0
  57. stratifyai-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,197 @@
1
+ """CSV/DataFrame schema extraction for intelligent file analysis.
2
+
3
+ This module extracts compact schema information from CSV files to reduce
4
+ token usage by 99%+ while preserving essential structure information.
5
+ """
6
+
7
+ from dataclasses import dataclass
8
+ from pathlib import Path
9
+ from typing import Dict, List, Optional, Any
10
+ import pandas as pd
11
+
12
+
13
+ @dataclass
14
+ class ColumnSchema:
15
+ """Schema information for a single column."""
16
+ name: str
17
+ dtype: str
18
+ null_count: int
19
+ null_percentage: float
20
+ unique_count: int
21
+ sample_values: List[Any]
22
+ numeric_stats: Optional[Dict[str, float]] = None
23
+
24
+
25
+ @dataclass
26
+ class CSVSchema:
27
+ """Complete schema information for a CSV file."""
28
+ file_path: str
29
+ row_count: int
30
+ column_count: int
31
+ columns: List[ColumnSchema]
32
+ memory_usage_mb: float
33
+
34
+ def to_text(self) -> str:
35
+ """Convert schema to human-readable text representation.
36
+
37
+ Returns:
38
+ Formatted schema description
39
+ """
40
+ lines = [
41
+ f"CSV File: {self.file_path}",
42
+ f"Dimensions: {self.row_count:,} rows × {self.column_count} columns",
43
+ f"Memory: {self.memory_usage_mb:.2f} MB",
44
+ "",
45
+ "Column Schema:"
46
+ ]
47
+
48
+ for col in self.columns:
49
+ # Basic info
50
+ lines.append(f"\n {col.name} ({col.dtype})")
51
+ lines.append(f" - Null: {col.null_count:,} ({col.null_percentage:.1f}%)")
52
+ lines.append(f" - Unique: {col.unique_count:,}")
53
+
54
+ # Numeric stats if available
55
+ if col.numeric_stats:
56
+ stats = col.numeric_stats
57
+ lines.append(f" - Range: {stats['min']:.2f} to {stats['max']:.2f}")
58
+ lines.append(f" - Mean: {stats['mean']:.2f}, Median: {stats['median']:.2f}")
59
+ lines.append(f" - Std: {stats['std']:.2f}")
60
+
61
+ # Sample values
62
+ samples_str = ", ".join(str(v) for v in col.sample_values[:5])
63
+ lines.append(f" - Samples: {samples_str}")
64
+
65
+ return "\n".join(lines)
66
+
67
+
68
+ def extract_csv_schema(
69
+ file_path: Path,
70
+ sample_size: int = 5,
71
+ max_rows: Optional[int] = None
72
+ ) -> CSVSchema:
73
+ """Extract schema information from a CSV file.
74
+
75
+ Args:
76
+ file_path: Path to CSV file
77
+ sample_size: Number of sample values to extract per column
78
+ max_rows: Maximum number of rows to read (None = all)
79
+
80
+ Returns:
81
+ CSVSchema object with extracted information
82
+
83
+ Raises:
84
+ FileNotFoundError: If file doesn't exist
85
+ pd.errors.EmptyDataError: If CSV is empty
86
+ pd.errors.ParserError: If CSV is malformed
87
+ """
88
+ if not file_path.exists():
89
+ raise FileNotFoundError(f"CSV file not found: {file_path}")
90
+
91
+ # Read CSV
92
+ df = pd.read_csv(file_path, nrows=max_rows)
93
+
94
+ if df.empty:
95
+ raise pd.errors.EmptyDataError(f"CSV file is empty: {file_path}")
96
+
97
+ # Extract column schemas
98
+ columns = []
99
+ for col_name in df.columns:
100
+ col_data = df[col_name]
101
+
102
+ # Basic stats
103
+ null_count = col_data.isna().sum()
104
+ null_pct = (null_count / len(df)) * 100
105
+ unique_count = col_data.nunique()
106
+
107
+ # Sample values (exclude nulls)
108
+ non_null_values = col_data.dropna()
109
+ if len(non_null_values) > 0:
110
+ sample_values = non_null_values.sample(
111
+ min(sample_size, len(non_null_values)),
112
+ random_state=42
113
+ ).tolist()
114
+ else:
115
+ sample_values = []
116
+
117
+ # Numeric statistics if applicable
118
+ numeric_stats = None
119
+ if pd.api.types.is_numeric_dtype(col_data):
120
+ try:
121
+ numeric_stats = {
122
+ 'min': float(col_data.min()),
123
+ 'max': float(col_data.max()),
124
+ 'mean': float(col_data.mean()),
125
+ 'median': float(col_data.median()),
126
+ 'std': float(col_data.std())
127
+ }
128
+ except (ValueError, TypeError):
129
+ # Handle edge cases (e.g., all NaN)
130
+ pass
131
+
132
+ columns.append(ColumnSchema(
133
+ name=col_name,
134
+ dtype=str(col_data.dtype),
135
+ null_count=int(null_count),
136
+ null_percentage=float(null_pct),
137
+ unique_count=int(unique_count),
138
+ sample_values=sample_values,
139
+ numeric_stats=numeric_stats
140
+ ))
141
+
142
+ # Memory usage
143
+ memory_bytes = df.memory_usage(deep=True).sum()
144
+ memory_mb = memory_bytes / (1024 * 1024)
145
+
146
+ return CSVSchema(
147
+ file_path=str(file_path),
148
+ row_count=len(df),
149
+ column_count=len(df.columns),
150
+ columns=columns,
151
+ memory_usage_mb=memory_mb
152
+ )
153
+
154
+
155
+ def estimate_token_reduction(original_size: int, schema_size: int) -> float:
156
+ """Estimate token reduction percentage.
157
+
158
+ Args:
159
+ original_size: Size of original CSV in characters
160
+ schema_size: Size of extracted schema in characters
161
+
162
+ Returns:
163
+ Reduction percentage (0-100)
164
+ """
165
+ if original_size == 0:
166
+ return 0.0
167
+
168
+ reduction = ((original_size - schema_size) / original_size) * 100
169
+ return max(0.0, min(100.0, reduction))
170
+
171
+
172
+ def analyze_csv_file(file_path: Path) -> Dict[str, Any]:
173
+ """Analyze a CSV file and return comprehensive information.
174
+
175
+ Args:
176
+ file_path: Path to CSV file
177
+
178
+ Returns:
179
+ Dictionary with schema and metadata
180
+ """
181
+ schema = extract_csv_schema(file_path)
182
+ schema_text = schema.to_text()
183
+
184
+ # Calculate original size
185
+ original_size = file_path.stat().st_size
186
+ schema_size = len(schema_text)
187
+
188
+ reduction = estimate_token_reduction(original_size, schema_size)
189
+
190
+ return {
191
+ 'schema': schema,
192
+ 'schema_text': schema_text,
193
+ 'original_size_bytes': original_size,
194
+ 'schema_size_bytes': schema_size,
195
+ 'token_reduction_pct': reduction,
196
+ 'recommended_action': 'Use schema for LLM analysis instead of full CSV'
197
+ }
@@ -0,0 +1,192 @@
1
+ """File analysis utilities for detecting file types and estimating token usage."""
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from enum import Enum
7
+
8
+ from .token_counter import estimate_tokens, check_token_limit
9
+
10
+
11
+ class FileType(Enum):
12
+ """Supported file types for intelligent processing."""
13
+ CSV = "csv"
14
+ JSON = "json"
15
+ LOG = "log"
16
+ PYTHON = "python"
17
+ JAVASCRIPT = "javascript"
18
+ JAVA = "java"
19
+ GO = "go"
20
+ TEXT = "text"
21
+ MARKDOWN = "markdown"
22
+ UNKNOWN = "unknown"
23
+
24
+
25
+ @dataclass
26
+ class FileAnalysis:
27
+ """Results of file analysis."""
28
+ file_path: Path
29
+ file_type: FileType
30
+ file_size_bytes: int
31
+ file_size_mb: float
32
+ content_length: int
33
+ estimated_tokens: int
34
+ exceeds_threshold: bool
35
+ context_window: int
36
+ percentage_used: float
37
+ recommendation: str
38
+
39
+
40
+ def detect_file_type(file_path: Path) -> FileType:
41
+ """
42
+ Detect the type of file based on extension and content.
43
+
44
+ Args:
45
+ file_path: Path to the file
46
+
47
+ Returns:
48
+ FileType enum value
49
+ """
50
+ suffix = file_path.suffix.lower()
51
+
52
+ # Map file extensions to types
53
+ type_mapping = {
54
+ ".csv": FileType.CSV,
55
+ ".json": FileType.JSON,
56
+ ".log": FileType.LOG,
57
+ ".py": FileType.PYTHON,
58
+ ".js": FileType.JAVASCRIPT,
59
+ ".ts": FileType.JAVASCRIPT,
60
+ ".jsx": FileType.JAVASCRIPT,
61
+ ".tsx": FileType.JAVASCRIPT,
62
+ ".java": FileType.JAVA,
63
+ ".go": FileType.GO,
64
+ ".txt": FileType.TEXT,
65
+ ".md": FileType.MARKDOWN,
66
+ ".markdown": FileType.MARKDOWN,
67
+ }
68
+
69
+ return type_mapping.get(suffix, FileType.UNKNOWN)
70
+
71
+
72
+ def get_recommendation(
73
+ file_type: FileType,
74
+ estimated_tokens: int,
75
+ context_window: int,
76
+ percentage_used: float
77
+ ) -> str:
78
+ """
79
+ Get processing recommendation based on file analysis.
80
+
81
+ Args:
82
+ file_type: Detected file type
83
+ estimated_tokens: Estimated token count
84
+ context_window: Model's context window
85
+ percentage_used: Percentage of context window used
86
+
87
+ Returns:
88
+ Recommendation string
89
+ """
90
+ # File fits comfortably in context
91
+ if percentage_used < 0.5:
92
+ return "✓ File fits well in model context - direct upload recommended"
93
+
94
+ # File approaching context limit
95
+ elif percentage_used < 0.8:
96
+ return "⚠ File uses >50% of context - consider chunking for better performance"
97
+
98
+ # File exceeds safe threshold
99
+ else:
100
+ if file_type == FileType.CSV:
101
+ return "⚠ Large CSV detected - use schema extraction (--extract-mode schema) for 99% token reduction"
102
+ elif file_type == FileType.JSON:
103
+ return "⚠ Large JSON detected - use schema extraction (--extract-mode schema) for 95% token reduction"
104
+ elif file_type == FileType.LOG:
105
+ return "⚠ Large log file detected - use error extraction (--extract errors) for 90% token reduction"
106
+ elif file_type in [FileType.PYTHON, FileType.JAVASCRIPT, FileType.JAVA, FileType.GO]:
107
+ return "⚠ Large code file detected - use code extraction (--extract summary) for 80% token reduction"
108
+ elif estimated_tokens > context_window:
109
+ return "✗ File exceeds model context - chunking required (--chunked)"
110
+ else:
111
+ return "⚠ File near context limit - chunking recommended (--chunked) for 90% token reduction"
112
+
113
+
114
+ def analyze_file(
115
+ file_path: Path,
116
+ provider: str = "openai",
117
+ model: str = "gpt-4o",
118
+ threshold: float = 0.8
119
+ ) -> FileAnalysis:
120
+ """
121
+ Analyze a file and provide recommendations for processing.
122
+
123
+ Args:
124
+ file_path: Path to the file to analyze
125
+ provider: LLM provider for token estimation
126
+ model: LLM model for context window limits
127
+ threshold: Warning threshold (default 0.8 = 80%)
128
+
129
+ Returns:
130
+ FileAnalysis object with complete analysis
131
+
132
+ Raises:
133
+ FileNotFoundError: If file doesn't exist
134
+
135
+ Examples:
136
+ >>> analysis = analyze_file(Path("data.csv"), "openai", "gpt-4o")
137
+ >>> print(f"Tokens: {analysis.estimated_tokens}")
138
+ >>> print(analysis.recommendation)
139
+ """
140
+ if not file_path.exists():
141
+ raise FileNotFoundError(f"File not found: {file_path}")
142
+
143
+ # Get file info
144
+ file_size_bytes = file_path.stat().st_size
145
+ file_size_mb = file_size_bytes / (1024 * 1024)
146
+
147
+ # Detect file type
148
+ file_type = detect_file_type(file_path)
149
+
150
+ # Read file content (with size limit for very large files)
151
+ MAX_READ_SIZE = 10 * 1024 * 1024 # 10MB max for token estimation
152
+ try:
153
+ if file_size_bytes > MAX_READ_SIZE:
154
+ # For very large files, estimate based on sample
155
+ with open(file_path, 'r', encoding='utf-8') as f:
156
+ sample = f.read(MAX_READ_SIZE)
157
+ # Extrapolate token count
158
+ sample_tokens = estimate_tokens(sample, provider, model)
159
+ estimated_tokens = int(sample_tokens * (file_size_bytes / len(sample)))
160
+ content_length = file_size_bytes # Approximate
161
+ else:
162
+ with open(file_path, 'r', encoding='utf-8') as f:
163
+ content = f.read()
164
+ content_length = len(content)
165
+ estimated_tokens = estimate_tokens(content, provider, model)
166
+ except UnicodeDecodeError:
167
+ # Binary file - rough estimate based on size
168
+ content_length = file_size_bytes
169
+ estimated_tokens = int(file_size_bytes / 4) # Very rough estimate
170
+
171
+ # Check token limits
172
+ exceeds_threshold, context_window, percentage_used = check_token_limit(
173
+ estimated_tokens, provider, model, threshold
174
+ )
175
+
176
+ # Get recommendation
177
+ recommendation = get_recommendation(
178
+ file_type, estimated_tokens, context_window, percentage_used
179
+ )
180
+
181
+ return FileAnalysis(
182
+ file_path=file_path,
183
+ file_type=file_type,
184
+ file_size_bytes=file_size_bytes,
185
+ file_size_mb=file_size_mb,
186
+ content_length=content_length,
187
+ estimated_tokens=estimated_tokens,
188
+ exceeds_threshold=exceeds_threshold,
189
+ context_window=context_window,
190
+ percentage_used=percentage_used,
191
+ recommendation=recommendation
192
+ )
@@ -0,0 +1,219 @@
1
+ """JSON schema extraction for intelligent file analysis.
2
+
3
+ This module extracts compact schema information from JSON files to reduce
4
+ token usage by 95%+ while preserving essential structure information.
5
+ """
6
+
7
+ import json
8
+ from dataclasses import dataclass
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional, Any, Union
11
+
12
+
13
+ @dataclass
14
+ class JSONSchema:
15
+ """Schema information for a JSON structure."""
16
+ type: str # object, array, string, number, boolean, null
17
+ keys: Optional[List[str]] = None # For objects
18
+ value_schema: Optional['JSONSchema'] = None # For arrays
19
+ nested_schemas: Optional[Dict[str, 'JSONSchema']] = None # For objects
20
+ sample_values: Optional[List[Any]] = None # For primitives and arrays
21
+ depth: int = 0
22
+
23
+ def to_text(self, indent: int = 0) -> str:
24
+ """Convert schema to human-readable text representation.
25
+
26
+ Args:
27
+ indent: Current indentation level
28
+
29
+ Returns:
30
+ Formatted schema description
31
+ """
32
+ prefix = " " * indent
33
+ lines = []
34
+
35
+ if self.type == "object":
36
+ lines.append(f"{prefix}Object with {len(self.keys or [])} keys:")
37
+ if self.nested_schemas:
38
+ for key, schema in self.nested_schemas.items():
39
+ lines.append(f"{prefix} {key}:")
40
+ lines.append(schema.to_text(indent + 2))
41
+
42
+ elif self.type == "array":
43
+ lines.append(f"{prefix}Array:")
44
+ if self.value_schema:
45
+ lines.append(f"{prefix} Elements:")
46
+ lines.append(self.value_schema.to_text(indent + 2))
47
+ if self.sample_values:
48
+ sample_str = ", ".join(str(v)[:50] for v in self.sample_values[:3])
49
+ lines.append(f"{prefix} Sample: [{sample_str}]")
50
+
51
+ else:
52
+ # Primitive type
53
+ lines.append(f"{prefix}{self.type}")
54
+ if self.sample_values:
55
+ sample_str = ", ".join(str(v)[:50] for v in self.sample_values[:5])
56
+ lines.append(f"{prefix} Samples: {sample_str}")
57
+
58
+ return "\n".join(lines)
59
+
60
+
61
+ def infer_json_schema(
62
+ data: Any,
63
+ max_depth: int = 10,
64
+ current_depth: int = 0,
65
+ sample_size: int = 3
66
+ ) -> JSONSchema:
67
+ """Infer schema from JSON data structure.
68
+
69
+ Args:
70
+ data: JSON data (dict, list, or primitive)
71
+ max_depth: Maximum nesting depth to analyze
72
+ current_depth: Current depth in recursion
73
+ sample_size: Number of sample values to collect
74
+
75
+ Returns:
76
+ JSONSchema object describing the structure
77
+ """
78
+ if current_depth >= max_depth:
79
+ return JSONSchema(type="...", depth=current_depth)
80
+
81
+ if data is None:
82
+ return JSONSchema(type="null", depth=current_depth)
83
+
84
+ elif isinstance(data, bool):
85
+ return JSONSchema(
86
+ type="boolean",
87
+ sample_values=[data],
88
+ depth=current_depth
89
+ )
90
+
91
+ elif isinstance(data, (int, float)):
92
+ return JSONSchema(
93
+ type="number",
94
+ sample_values=[data],
95
+ depth=current_depth
96
+ )
97
+
98
+ elif isinstance(data, str):
99
+ return JSONSchema(
100
+ type="string",
101
+ sample_values=[data[:100]], # Truncate long strings
102
+ depth=current_depth
103
+ )
104
+
105
+ elif isinstance(data, list):
106
+ # Analyze array elements
107
+ sample_values = data[:sample_size] if data else []
108
+
109
+ # Infer schema from first element (assuming homogeneous array)
110
+ value_schema = None
111
+ if data:
112
+ value_schema = infer_json_schema(
113
+ data[0],
114
+ max_depth,
115
+ current_depth + 1,
116
+ sample_size
117
+ )
118
+
119
+ return JSONSchema(
120
+ type="array",
121
+ value_schema=value_schema,
122
+ sample_values=sample_values,
123
+ depth=current_depth
124
+ )
125
+
126
+ elif isinstance(data, dict):
127
+ # Analyze object keys and values
128
+ keys = list(data.keys())
129
+ nested_schemas = {}
130
+
131
+ for key in keys:
132
+ nested_schemas[key] = infer_json_schema(
133
+ data[key],
134
+ max_depth,
135
+ current_depth + 1,
136
+ sample_size
137
+ )
138
+
139
+ return JSONSchema(
140
+ type="object",
141
+ keys=keys,
142
+ nested_schemas=nested_schemas,
143
+ depth=current_depth
144
+ )
145
+
146
+ else:
147
+ return JSONSchema(type="unknown", depth=current_depth)
148
+
149
+
150
+ def extract_json_schema(file_path: Path) -> Dict[str, Any]:
151
+ """Extract schema information from a JSON file.
152
+
153
+ Args:
154
+ file_path: Path to JSON file
155
+
156
+ Returns:
157
+ Dictionary with schema and metadata
158
+
159
+ Raises:
160
+ FileNotFoundError: If file doesn't exist
161
+ json.JSONDecodeError: If JSON is malformed
162
+ """
163
+ if not file_path.exists():
164
+ raise FileNotFoundError(f"JSON file not found: {file_path}")
165
+
166
+ # Read and parse JSON
167
+ with open(file_path, 'r', encoding='utf-8') as f:
168
+ data = json.load(f)
169
+
170
+ # Infer schema
171
+ schema = infer_json_schema(data)
172
+ schema_text = schema.to_text()
173
+
174
+ # Calculate sizes
175
+ original_size = file_path.stat().st_size
176
+ schema_size = len(schema_text)
177
+
178
+ # Determine structure type
179
+ if isinstance(data, dict):
180
+ structure = f"Object with {len(data)} keys"
181
+ elif isinstance(data, list):
182
+ structure = f"Array with {len(data)} elements"
183
+ else:
184
+ structure = f"Primitive: {type(data).__name__}"
185
+
186
+ return {
187
+ 'schema': schema,
188
+ 'schema_text': schema_text,
189
+ 'structure': structure,
190
+ 'original_size_bytes': original_size,
191
+ 'schema_size_bytes': schema_size,
192
+ 'token_reduction_pct': ((original_size - schema_size) / original_size * 100) if original_size > 0 else 0.0,
193
+ 'recommended_action': 'Use schema for LLM analysis instead of full JSON'
194
+ }
195
+
196
+
197
+ def analyze_json_file(file_path: Path) -> str:
198
+ """Analyze a JSON file and return schema description.
199
+
200
+ Args:
201
+ file_path: Path to JSON file
202
+
203
+ Returns:
204
+ Human-readable schema description
205
+ """
206
+ result = extract_json_schema(file_path)
207
+
208
+ lines = [
209
+ f"JSON File: {file_path}",
210
+ f"Structure: {result['structure']}",
211
+ f"Original size: {result['original_size_bytes']:,} bytes",
212
+ f"Schema size: {result['schema_size_bytes']:,} bytes",
213
+ f"Token reduction: {result['token_reduction_pct']:.1f}%",
214
+ "",
215
+ "Schema:",
216
+ result['schema_text']
217
+ ]
218
+
219
+ return "\n".join(lines)