stratifyai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. cli/__init__.py +5 -0
  2. cli/stratifyai_cli.py +1753 -0
  3. stratifyai/__init__.py +113 -0
  4. stratifyai/api_key_helper.py +372 -0
  5. stratifyai/caching.py +279 -0
  6. stratifyai/chat/__init__.py +54 -0
  7. stratifyai/chat/builder.py +366 -0
  8. stratifyai/chat/stratifyai_anthropic.py +194 -0
  9. stratifyai/chat/stratifyai_bedrock.py +200 -0
  10. stratifyai/chat/stratifyai_deepseek.py +194 -0
  11. stratifyai/chat/stratifyai_google.py +194 -0
  12. stratifyai/chat/stratifyai_grok.py +194 -0
  13. stratifyai/chat/stratifyai_groq.py +195 -0
  14. stratifyai/chat/stratifyai_ollama.py +201 -0
  15. stratifyai/chat/stratifyai_openai.py +209 -0
  16. stratifyai/chat/stratifyai_openrouter.py +201 -0
  17. stratifyai/chunking.py +158 -0
  18. stratifyai/client.py +292 -0
  19. stratifyai/config.py +1273 -0
  20. stratifyai/cost_tracker.py +257 -0
  21. stratifyai/embeddings.py +245 -0
  22. stratifyai/exceptions.py +91 -0
  23. stratifyai/models.py +59 -0
  24. stratifyai/providers/__init__.py +5 -0
  25. stratifyai/providers/anthropic.py +330 -0
  26. stratifyai/providers/base.py +183 -0
  27. stratifyai/providers/bedrock.py +634 -0
  28. stratifyai/providers/deepseek.py +39 -0
  29. stratifyai/providers/google.py +39 -0
  30. stratifyai/providers/grok.py +39 -0
  31. stratifyai/providers/groq.py +39 -0
  32. stratifyai/providers/ollama.py +43 -0
  33. stratifyai/providers/openai.py +344 -0
  34. stratifyai/providers/openai_compatible.py +372 -0
  35. stratifyai/providers/openrouter.py +39 -0
  36. stratifyai/py.typed +2 -0
  37. stratifyai/rag.py +381 -0
  38. stratifyai/retry.py +185 -0
  39. stratifyai/router.py +643 -0
  40. stratifyai/summarization.py +179 -0
  41. stratifyai/utils/__init__.py +11 -0
  42. stratifyai/utils/bedrock_validator.py +136 -0
  43. stratifyai/utils/code_extractor.py +327 -0
  44. stratifyai/utils/csv_extractor.py +197 -0
  45. stratifyai/utils/file_analyzer.py +192 -0
  46. stratifyai/utils/json_extractor.py +219 -0
  47. stratifyai/utils/log_extractor.py +267 -0
  48. stratifyai/utils/model_selector.py +324 -0
  49. stratifyai/utils/provider_validator.py +442 -0
  50. stratifyai/utils/token_counter.py +186 -0
  51. stratifyai/vectordb.py +344 -0
  52. stratifyai-0.1.0.dist-info/METADATA +263 -0
  53. stratifyai-0.1.0.dist-info/RECORD +57 -0
  54. stratifyai-0.1.0.dist-info/WHEEL +5 -0
  55. stratifyai-0.1.0.dist-info/entry_points.txt +2 -0
  56. stratifyai-0.1.0.dist-info/licenses/LICENSE +21 -0
  57. stratifyai-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,179 @@
1
+ """Progressive summarization utilities for large files."""
2
+
3
+ from typing import List, Optional
4
+ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
5
+
6
+ from .client import LLMClient
7
+ from .models import ChatRequest, Message
8
+ from .chunking import chunk_content, get_chunk_metadata
9
+
10
+
11
+ def summarize_chunk(
12
+ chunk: str,
13
+ client: LLMClient,
14
+ model: str = "gpt-4o-mini",
15
+ max_tokens: int = 1000,
16
+ context: Optional[str] = None
17
+ ) -> str:
18
+ """
19
+ Summarize a single chunk of content.
20
+
21
+ Uses a cheaper model for cost efficiency.
22
+
23
+ Args:
24
+ chunk: The content chunk to summarize
25
+ client: LLMClient instance (already configured with provider)
26
+ model: Model to use for summarization (default: gpt-4o-mini for cost)
27
+ max_tokens: Maximum tokens for summary
28
+ context: Optional context about the overall document
29
+
30
+ Returns:
31
+ Summary of the chunk
32
+ """
33
+ # Build prompt
34
+ if context:
35
+ prompt = f"""Summarize the following section from a larger document.
36
+
37
+ Context: {context}
38
+
39
+ Section to summarize:
40
+ {chunk}
41
+
42
+ Provide a concise summary that preserves key information."""
43
+ else:
44
+ prompt = f"""Summarize the following text concisely, preserving key information:
45
+
46
+ {chunk}"""
47
+
48
+ # Create request
49
+ request = ChatRequest(
50
+ model=model,
51
+ messages=[Message(role="user", content=prompt)],
52
+ max_tokens=max_tokens
53
+ )
54
+
55
+ # Get summary
56
+ response = client.chat_completion(request)
57
+ return response.content
58
+
59
+
60
+ def summarize_chunks_progressive(
61
+ chunks: List[str],
62
+ client: LLMClient,
63
+ model: str = "gpt-4o-mini",
64
+ context: Optional[str] = None,
65
+ show_progress: bool = True
66
+ ) -> str:
67
+ """
68
+ Progressively summarize multiple chunks.
69
+
70
+ Each chunk is summarized individually, then all summaries are combined.
71
+
72
+ Args:
73
+ chunks: List of content chunks
74
+ client: LLMClient instance
75
+ model: Model to use for summarization
76
+ context: Optional context about the overall document
77
+ show_progress: Whether to show progress bar
78
+
79
+ Returns:
80
+ Combined summary of all chunks
81
+ """
82
+ if not chunks:
83
+ return ""
84
+
85
+ if len(chunks) == 1:
86
+ return summarize_chunk(chunks[0], client, model, context=context)
87
+
88
+ summaries = []
89
+
90
+ if show_progress:
91
+ with Progress(
92
+ SpinnerColumn(),
93
+ TextColumn("[progress.description]{task.description}"),
94
+ BarColumn(),
95
+ TaskProgressColumn(),
96
+ ) as progress:
97
+ task = progress.add_task(
98
+ f"[cyan]Summarizing {len(chunks)} chunks...",
99
+ total=len(chunks)
100
+ )
101
+
102
+ for i, chunk in enumerate(chunks, 1):
103
+ summary = summarize_chunk(
104
+ chunk,
105
+ client,
106
+ model,
107
+ context=f"{context} (Part {i}/{len(chunks)})" if context else f"Part {i}/{len(chunks)}"
108
+ )
109
+ summaries.append(f"**Part {i}/{len(chunks)}:**\n{summary}")
110
+ progress.update(task, advance=1)
111
+ else:
112
+ for i, chunk in enumerate(chunks, 1):
113
+ summary = summarize_chunk(
114
+ chunk,
115
+ client,
116
+ model,
117
+ context=f"{context} (Part {i}/{len(chunks)})" if context else f"Part {i}/{len(chunks)}"
118
+ )
119
+ summaries.append(f"**Part {i}/{len(chunks)}:**\n{summary}")
120
+
121
+ # Combine summaries
122
+ combined = "\n\n".join(summaries)
123
+
124
+ # If combined summaries are still very long, summarize the summaries
125
+ if len(combined) > 10000: # Arbitrary threshold
126
+ final_summary = summarize_chunk(
127
+ combined,
128
+ client,
129
+ model,
130
+ context="Combined summaries of document sections"
131
+ )
132
+ return f"**Overall Summary:**\n{final_summary}\n\n**Detailed Summaries:**\n{combined}"
133
+
134
+ return combined
135
+
136
+
137
+ def summarize_file(
138
+ content: str,
139
+ client: LLMClient,
140
+ chunk_size: int = 50000,
141
+ model: str = "gpt-4o-mini",
142
+ context: Optional[str] = None,
143
+ show_progress: bool = True
144
+ ) -> dict:
145
+ """
146
+ Summarize a large file using progressive chunking.
147
+
148
+ Args:
149
+ content: Full file content
150
+ client: LLMClient instance
151
+ chunk_size: Size of chunks in characters
152
+ model: Model to use for summarization
153
+ context: Optional context about the document
154
+ show_progress: Whether to show progress
155
+
156
+ Returns:
157
+ Dictionary with summary and metadata
158
+ """
159
+ # Chunk the content
160
+ chunks = chunk_content(content, chunk_size=chunk_size)
161
+ metadata = get_chunk_metadata(chunks)
162
+
163
+ # Summarize chunks
164
+ summary = summarize_chunks_progressive(
165
+ chunks,
166
+ client,
167
+ model=model,
168
+ context=context,
169
+ show_progress=show_progress
170
+ )
171
+
172
+ return {
173
+ "summary": summary,
174
+ "original_length": len(content),
175
+ "summary_length": len(summary),
176
+ "reduction_percentage": round((1 - len(summary) / len(content)) * 100, 1),
177
+ "num_chunks": metadata["num_chunks"],
178
+ "chunk_metadata": metadata
179
+ }
@@ -0,0 +1,11 @@
1
+ """Utility modules for StratifyAI."""
2
+
3
+ from .token_counter import estimate_tokens, count_tokens_for_messages
4
+ from .file_analyzer import analyze_file, FileAnalysis
5
+
6
+ __all__ = [
7
+ "estimate_tokens",
8
+ "count_tokens_for_messages",
9
+ "analyze_file",
10
+ "FileAnalysis",
11
+ ]
@@ -0,0 +1,136 @@
1
+ """Bedrock model validation utility.
2
+
3
+ Validates AWS Bedrock model availability using boto3.
4
+ """
5
+
6
+ import time
7
+ from typing import Dict, List, Any, Optional
8
+
9
+ try:
10
+ import boto3
11
+ from botocore.exceptions import ClientError, NoCredentialsError, BotoCoreError
12
+ BOTO3_AVAILABLE = True
13
+ except ImportError:
14
+ BOTO3_AVAILABLE = False
15
+
16
+
17
+ def validate_bedrock_models(
18
+ model_ids: List[str],
19
+ region_name: Optional[str] = None,
20
+ ) -> Dict[str, Any]:
21
+ """
22
+ Validate which Bedrock models are available in the user's AWS account/region.
23
+
24
+ Args:
25
+ model_ids: List of model IDs to validate
26
+ region_name: AWS region (defaults to AWS_DEFAULT_REGION or us-east-1)
27
+
28
+ Returns:
29
+ Dict containing:
30
+ - valid_models: List of model IDs that are available
31
+ - invalid_models: List of model IDs that are NOT available
32
+ - validation_time_ms: Time taken to validate in milliseconds
33
+ - error: Error message if validation failed (None if successful)
34
+ """
35
+ import os
36
+
37
+ result = {
38
+ "valid_models": [],
39
+ "invalid_models": [],
40
+ "validation_time_ms": 0,
41
+ "error": None,
42
+ }
43
+
44
+ if not BOTO3_AVAILABLE:
45
+ result["error"] = "boto3 not installed"
46
+ result["valid_models"] = model_ids # Assume all valid if can't check
47
+ return result
48
+
49
+ start_time = time.time()
50
+
51
+ try:
52
+ # Get region from env or default
53
+ region = region_name or os.getenv("AWS_DEFAULT_REGION", "us-east-1")
54
+
55
+ # Create bedrock client (not bedrock-runtime - we need list_foundation_models)
56
+ bedrock_client = boto3.client(
57
+ service_name="bedrock",
58
+ region_name=region
59
+ )
60
+
61
+ # Get available foundation models
62
+ response = bedrock_client.list_foundation_models()
63
+ available_model_ids = {model["modelId"] for model in response.get("modelSummaries", [])}
64
+
65
+ # Check each requested model
66
+ for model_id in model_ids:
67
+ if model_id in available_model_ids:
68
+ result["valid_models"].append(model_id)
69
+ else:
70
+ result["invalid_models"].append(model_id)
71
+
72
+ except NoCredentialsError:
73
+ result["error"] = "AWS credentials not configured"
74
+ result["valid_models"] = model_ids # Show all, let runtime handle auth
75
+
76
+ except ClientError as e:
77
+ error_code = e.response.get("Error", {}).get("Code", "Unknown")
78
+ error_msg = e.response.get("Error", {}).get("Message", str(e))
79
+ result["error"] = f"AWS API error ({error_code}): {error_msg}"
80
+ result["valid_models"] = model_ids # Show all on error
81
+
82
+ except BotoCoreError as e:
83
+ result["error"] = f"AWS connection error: {str(e)}"
84
+ result["valid_models"] = model_ids # Show all on error
85
+
86
+ except Exception as e:
87
+ result["error"] = f"Validation failed: {str(e)}"
88
+ result["valid_models"] = model_ids # Show all on error
89
+
90
+ finally:
91
+ result["validation_time_ms"] = int((time.time() - start_time) * 1000)
92
+
93
+ return result
94
+
95
+
96
+ def get_validated_interactive_models(
97
+ region_name: Optional[str] = None,
98
+ ) -> Dict[str, Any]:
99
+ """
100
+ Get validated interactive Bedrock models with metadata.
101
+
102
+ This is a convenience function that validates the curated interactive models
103
+ and returns them with their display metadata.
104
+
105
+ Args:
106
+ region_name: AWS region (defaults to AWS_DEFAULT_REGION or us-east-1)
107
+
108
+ Returns:
109
+ Dict containing:
110
+ - models: Dict mapping model_id to metadata (display_name, description, category)
111
+ - validation_result: Full validation result dict
112
+ """
113
+ from ..config import INTERACTIVE_BEDROCK_MODELS, BEDROCK_MODELS
114
+
115
+ # Get list of interactive model IDs
116
+ model_ids = list(INTERACTIVE_BEDROCK_MODELS.keys())
117
+
118
+ # Validate
119
+ validation_result = validate_bedrock_models(model_ids, region_name)
120
+
121
+ # Build validated models dict with full metadata
122
+ models = {}
123
+ for model_id in validation_result["valid_models"]:
124
+ # Merge interactive metadata with full model config
125
+ interactive_meta = INTERACTIVE_BEDROCK_MODELS.get(model_id, {})
126
+ full_config = BEDROCK_MODELS.get(model_id, {})
127
+
128
+ models[model_id] = {
129
+ **full_config,
130
+ **interactive_meta,
131
+ }
132
+
133
+ return {
134
+ "models": models,
135
+ "validation_result": validation_result,
136
+ }
@@ -0,0 +1,327 @@
1
+ """Code structure extraction for intelligent file analysis.
2
+
3
+ This module extracts structural information from code files using AST to reduce
4
+ token usage by 80%+ while preserving essential code structure.
5
+ """
6
+
7
+ import ast
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Dict, List, Optional, Any
11
+
12
+
13
+ @dataclass
14
+ class FunctionInfo:
15
+ """Information about a function."""
16
+ name: str
17
+ line_number: int
18
+ params: List[str]
19
+ returns: Optional[str] = None
20
+ docstring: Optional[str] = None
21
+ decorators: List[str] = field(default_factory=list)
22
+ is_async: bool = False
23
+
24
+
25
+ @dataclass
26
+ class ClassInfo:
27
+ """Information about a class."""
28
+ name: str
29
+ line_number: int
30
+ bases: List[str]
31
+ methods: List[FunctionInfo]
32
+ docstring: Optional[str] = None
33
+ decorators: List[str] = field(default_factory=list)
34
+
35
+
36
+ @dataclass
37
+ class CodeStructure:
38
+ """Complete code structure information."""
39
+ file_path: str
40
+ language: str
41
+ imports: List[str]
42
+ functions: List[FunctionInfo]
43
+ classes: List[ClassInfo]
44
+ total_lines: int
45
+ docstring: Optional[str] = None
46
+
47
+ def to_text(self) -> str:
48
+ """Convert structure to human-readable text.
49
+
50
+ Returns:
51
+ Formatted code structure
52
+ """
53
+ lines = [
54
+ f"Code File: {self.file_path}",
55
+ f"Language: {self.language}",
56
+ f"Total Lines: {self.total_lines:,}",
57
+ ]
58
+
59
+ if self.docstring:
60
+ lines.append(f"\nModule Docstring:\n {self.docstring[:200]}")
61
+
62
+ # Imports
63
+ if self.imports:
64
+ lines.append(f"\nImports ({len(self.imports)}):")
65
+ for imp in self.imports[:20]: # Show first 20
66
+ lines.append(f" - {imp}")
67
+ if len(self.imports) > 20:
68
+ lines.append(f" ... and {len(self.imports) - 20} more")
69
+
70
+ # Functions
71
+ if self.functions:
72
+ lines.append(f"\nFunctions ({len(self.functions)}):")
73
+ for func in self.functions:
74
+ decorators = f"@{', @'.join(func.decorators)} " if func.decorators else ""
75
+ async_prefix = "async " if func.is_async else ""
76
+ params = ", ".join(func.params)
77
+ returns = f" -> {func.returns}" if func.returns else ""
78
+ lines.append(f" [Line {func.line_number}] {decorators}{async_prefix}def {func.name}({params}){returns}")
79
+ if func.docstring:
80
+ lines.append(f" \"{func.docstring[:100]}\"")
81
+
82
+ # Classes
83
+ if self.classes:
84
+ lines.append(f"\nClasses ({len(self.classes)}):")
85
+ for cls in self.classes:
86
+ decorators = f"@{', @'.join(cls.decorators)} " if cls.decorators else ""
87
+ bases = f"({', '.join(cls.bases)})" if cls.bases else ""
88
+ lines.append(f" [Line {cls.line_number}] {decorators}class {cls.name}{bases}:")
89
+ if cls.docstring:
90
+ lines.append(f" \"{cls.docstring[:100]}\"")
91
+ if cls.methods:
92
+ lines.append(f" Methods ({len(cls.methods)}):")
93
+ for method in cls.methods[:10]: # Show first 10 methods
94
+ params = ", ".join(method.params)
95
+ lines.append(f" - {method.name}({params})")
96
+ if len(cls.methods) > 10:
97
+ lines.append(f" ... and {len(cls.methods) - 10} more")
98
+
99
+ return "\n".join(lines)
100
+
101
+
102
+ class PythonASTVisitor(ast.NodeVisitor):
103
+ """AST visitor to extract code structure from Python files."""
104
+
105
+ def __init__(self):
106
+ self.imports: List[str] = []
107
+ self.functions: List[FunctionInfo] = []
108
+ self.classes: List[ClassInfo] = []
109
+ self.current_class: Optional[str] = None
110
+
111
+ def visit_Import(self, node: ast.Import):
112
+ """Visit import statement."""
113
+ for alias in node.names:
114
+ import_str = alias.name
115
+ if alias.asname:
116
+ import_str += f" as {alias.asname}"
117
+ self.imports.append(f"import {import_str}")
118
+ self.generic_visit(node)
119
+
120
+ def visit_ImportFrom(self, node: ast.ImportFrom):
121
+ """Visit from...import statement."""
122
+ module = node.module or ""
123
+ for alias in node.names:
124
+ import_str = alias.name
125
+ if alias.asname:
126
+ import_str += f" as {alias.asname}"
127
+ self.imports.append(f"from {module} import {import_str}")
128
+ self.generic_visit(node)
129
+
130
+ def visit_FunctionDef(self, node: ast.FunctionDef):
131
+ """Visit function definition."""
132
+ self._process_function(node, is_async=False)
133
+ self.generic_visit(node)
134
+
135
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef):
136
+ """Visit async function definition."""
137
+ self._process_function(node, is_async=True)
138
+ self.generic_visit(node)
139
+
140
+ def _process_function(self, node, is_async: bool):
141
+ """Process function/method node."""
142
+ # Extract parameters
143
+ params = []
144
+ for arg in node.args.args:
145
+ param_name = arg.arg
146
+ if arg.annotation:
147
+ try:
148
+ param_name += f": {ast.unparse(arg.annotation)}"
149
+ except:
150
+ pass
151
+ params.append(param_name)
152
+
153
+ # Extract return type
154
+ returns = None
155
+ if node.returns:
156
+ try:
157
+ returns = ast.unparse(node.returns)
158
+ except:
159
+ pass
160
+
161
+ # Extract docstring
162
+ docstring = ast.get_docstring(node)
163
+ if docstring:
164
+ docstring = docstring.split('\n')[0] # First line only
165
+
166
+ # Extract decorators
167
+ decorators = []
168
+ for decorator in node.decorator_list:
169
+ try:
170
+ decorators.append(ast.unparse(decorator))
171
+ except:
172
+ decorators.append("@decorator")
173
+
174
+ func_info = FunctionInfo(
175
+ name=node.name,
176
+ line_number=node.lineno,
177
+ params=params,
178
+ returns=returns,
179
+ docstring=docstring,
180
+ decorators=decorators,
181
+ is_async=is_async
182
+ )
183
+
184
+ # Add to appropriate list
185
+ if self.current_class:
186
+ # Find the class and add method
187
+ for cls in self.classes:
188
+ if cls.name == self.current_class:
189
+ cls.methods.append(func_info)
190
+ break
191
+ else:
192
+ self.functions.append(func_info)
193
+
194
+ def visit_ClassDef(self, node: ast.ClassDef):
195
+ """Visit class definition."""
196
+ # Extract base classes
197
+ bases = []
198
+ for base in node.bases:
199
+ try:
200
+ bases.append(ast.unparse(base))
201
+ except:
202
+ bases.append("BaseClass")
203
+
204
+ # Extract docstring
205
+ docstring = ast.get_docstring(node)
206
+ if docstring:
207
+ docstring = docstring.split('\n')[0] # First line only
208
+
209
+ # Extract decorators
210
+ decorators = []
211
+ for decorator in node.decorator_list:
212
+ try:
213
+ decorators.append(ast.unparse(decorator))
214
+ except:
215
+ decorators.append("@decorator")
216
+
217
+ class_info = ClassInfo(
218
+ name=node.name,
219
+ line_number=node.lineno,
220
+ bases=bases,
221
+ methods=[],
222
+ docstring=docstring,
223
+ decorators=decorators
224
+ )
225
+
226
+ self.classes.append(class_info)
227
+
228
+ # Visit methods
229
+ old_class = self.current_class
230
+ self.current_class = node.name
231
+ self.generic_visit(node)
232
+ self.current_class = old_class
233
+
234
+
235
+ def extract_python_structure(file_path: Path) -> CodeStructure:
236
+ """Extract structure from Python file using AST.
237
+
238
+ Args:
239
+ file_path: Path to Python file
240
+
241
+ Returns:
242
+ CodeStructure object
243
+
244
+ Raises:
245
+ FileNotFoundError: If file doesn't exist
246
+ SyntaxError: If Python code is malformed
247
+ """
248
+ if not file_path.exists():
249
+ raise FileNotFoundError(f"Python file not found: {file_path}")
250
+
251
+ # Read file
252
+ with open(file_path, 'r', encoding='utf-8') as f:
253
+ source_code = f.read()
254
+
255
+ # Parse AST
256
+ try:
257
+ tree = ast.parse(source_code, filename=str(file_path))
258
+ except SyntaxError as e:
259
+ raise SyntaxError(f"Failed to parse {file_path}: {e}")
260
+
261
+ # Extract module docstring
262
+ docstring = ast.get_docstring(tree)
263
+ if docstring:
264
+ docstring = docstring.split('\n')[0] # First line only
265
+
266
+ # Visit AST
267
+ visitor = PythonASTVisitor()
268
+ visitor.visit(tree)
269
+
270
+ # Count lines
271
+ total_lines = source_code.count('\n') + 1
272
+
273
+ return CodeStructure(
274
+ file_path=str(file_path),
275
+ language="Python",
276
+ imports=visitor.imports,
277
+ functions=visitor.functions,
278
+ classes=visitor.classes,
279
+ total_lines=total_lines,
280
+ docstring=docstring
281
+ )
282
+
283
+
284
+ def analyze_code_file(file_path: Path) -> Dict[str, Any]:
285
+ """Analyze a code file and return structure information.
286
+
287
+ Args:
288
+ file_path: Path to code file
289
+
290
+ Returns:
291
+ Dictionary with structure and metadata
292
+ """
293
+ # Detect language from extension
294
+ extension = file_path.suffix.lower()
295
+
296
+ if extension == '.py':
297
+ structure = extract_python_structure(file_path)
298
+ structure_text = structure.to_text()
299
+
300
+ # Calculate sizes
301
+ original_size = file_path.stat().st_size
302
+ structure_size = len(structure_text)
303
+
304
+ return {
305
+ 'structure': structure,
306
+ 'structure_text': structure_text,
307
+ 'original_size_bytes': original_size,
308
+ 'structure_size_bytes': structure_size,
309
+ 'token_reduction_pct': ((original_size - structure_size) / original_size * 100) if original_size > 0 else 0.0,
310
+ 'recommended_action': 'Use structure for LLM analysis instead of full code'
311
+ }
312
+ else:
313
+ # For non-Python files, return basic info
314
+ original_size = file_path.stat().st_size
315
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
316
+ lines = f.readlines()
317
+
318
+ structure_text = f"Code File: {file_path}\nLanguage: {extension[1:] if extension else 'unknown'}\nTotal Lines: {len(lines)}\n\nNote: AST extraction only available for Python files."
319
+
320
+ return {
321
+ 'structure': None,
322
+ 'structure_text': structure_text,
323
+ 'original_size_bytes': original_size,
324
+ 'structure_size_bytes': len(structure_text),
325
+ 'token_reduction_pct': 0.0,
326
+ 'recommended_action': 'Full file analysis required (non-Python)'
327
+ }