tree-sitter-analyzer 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (78) hide show
  1. tree_sitter_analyzer/__init__.py +121 -0
  2. tree_sitter_analyzer/__main__.py +12 -0
  3. tree_sitter_analyzer/api.py +539 -0
  4. tree_sitter_analyzer/cli/__init__.py +39 -0
  5. tree_sitter_analyzer/cli/__main__.py +13 -0
  6. tree_sitter_analyzer/cli/commands/__init__.py +27 -0
  7. tree_sitter_analyzer/cli/commands/advanced_command.py +88 -0
  8. tree_sitter_analyzer/cli/commands/base_command.py +155 -0
  9. tree_sitter_analyzer/cli/commands/default_command.py +19 -0
  10. tree_sitter_analyzer/cli/commands/partial_read_command.py +133 -0
  11. tree_sitter_analyzer/cli/commands/query_command.py +82 -0
  12. tree_sitter_analyzer/cli/commands/structure_command.py +121 -0
  13. tree_sitter_analyzer/cli/commands/summary_command.py +93 -0
  14. tree_sitter_analyzer/cli/commands/table_command.py +233 -0
  15. tree_sitter_analyzer/cli/info_commands.py +121 -0
  16. tree_sitter_analyzer/cli_main.py +276 -0
  17. tree_sitter_analyzer/core/__init__.py +20 -0
  18. tree_sitter_analyzer/core/analysis_engine.py +574 -0
  19. tree_sitter_analyzer/core/cache_service.py +330 -0
  20. tree_sitter_analyzer/core/engine.py +560 -0
  21. tree_sitter_analyzer/core/parser.py +288 -0
  22. tree_sitter_analyzer/core/query.py +502 -0
  23. tree_sitter_analyzer/encoding_utils.py +460 -0
  24. tree_sitter_analyzer/exceptions.py +340 -0
  25. tree_sitter_analyzer/file_handler.py +222 -0
  26. tree_sitter_analyzer/formatters/__init__.py +1 -0
  27. tree_sitter_analyzer/formatters/base_formatter.py +168 -0
  28. tree_sitter_analyzer/formatters/formatter_factory.py +74 -0
  29. tree_sitter_analyzer/formatters/java_formatter.py +270 -0
  30. tree_sitter_analyzer/formatters/python_formatter.py +235 -0
  31. tree_sitter_analyzer/interfaces/__init__.py +10 -0
  32. tree_sitter_analyzer/interfaces/cli.py +557 -0
  33. tree_sitter_analyzer/interfaces/cli_adapter.py +319 -0
  34. tree_sitter_analyzer/interfaces/mcp_adapter.py +170 -0
  35. tree_sitter_analyzer/interfaces/mcp_server.py +416 -0
  36. tree_sitter_analyzer/java_analyzer.py +219 -0
  37. tree_sitter_analyzer/language_detector.py +400 -0
  38. tree_sitter_analyzer/language_loader.py +228 -0
  39. tree_sitter_analyzer/languages/__init__.py +11 -0
  40. tree_sitter_analyzer/languages/java_plugin.py +1113 -0
  41. tree_sitter_analyzer/languages/python_plugin.py +712 -0
  42. tree_sitter_analyzer/mcp/__init__.py +32 -0
  43. tree_sitter_analyzer/mcp/resources/__init__.py +47 -0
  44. tree_sitter_analyzer/mcp/resources/code_file_resource.py +213 -0
  45. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +550 -0
  46. tree_sitter_analyzer/mcp/server.py +319 -0
  47. tree_sitter_analyzer/mcp/tools/__init__.py +36 -0
  48. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +558 -0
  49. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +245 -0
  50. tree_sitter_analyzer/mcp/tools/base_tool.py +55 -0
  51. tree_sitter_analyzer/mcp/tools/get_positions_tool.py +448 -0
  52. tree_sitter_analyzer/mcp/tools/read_partial_tool.py +302 -0
  53. tree_sitter_analyzer/mcp/tools/table_format_tool.py +359 -0
  54. tree_sitter_analyzer/mcp/tools/universal_analyze_tool.py +476 -0
  55. tree_sitter_analyzer/mcp/utils/__init__.py +106 -0
  56. tree_sitter_analyzer/mcp/utils/error_handler.py +549 -0
  57. tree_sitter_analyzer/models.py +481 -0
  58. tree_sitter_analyzer/output_manager.py +264 -0
  59. tree_sitter_analyzer/plugins/__init__.py +334 -0
  60. tree_sitter_analyzer/plugins/base.py +446 -0
  61. tree_sitter_analyzer/plugins/java_plugin.py +625 -0
  62. tree_sitter_analyzer/plugins/javascript_plugin.py +439 -0
  63. tree_sitter_analyzer/plugins/manager.py +355 -0
  64. tree_sitter_analyzer/plugins/plugin_loader.py +83 -0
  65. tree_sitter_analyzer/plugins/python_plugin.py +598 -0
  66. tree_sitter_analyzer/plugins/registry.py +366 -0
  67. tree_sitter_analyzer/queries/__init__.py +27 -0
  68. tree_sitter_analyzer/queries/java.py +394 -0
  69. tree_sitter_analyzer/queries/javascript.py +149 -0
  70. tree_sitter_analyzer/queries/python.py +286 -0
  71. tree_sitter_analyzer/queries/typescript.py +230 -0
  72. tree_sitter_analyzer/query_loader.py +260 -0
  73. tree_sitter_analyzer/table_formatter.py +448 -0
  74. tree_sitter_analyzer/utils.py +201 -0
  75. tree_sitter_analyzer-0.1.0.dist-info/METADATA +581 -0
  76. tree_sitter_analyzer-0.1.0.dist-info/RECORD +78 -0
  77. tree_sitter_analyzer-0.1.0.dist-info/WHEEL +4 -0
  78. tree_sitter_analyzer-0.1.0.dist-info/entry_points.txt +8 -0
@@ -0,0 +1,460 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Optimized Encoding Utilities Module
5
+
6
+ This module provides unified encoding/decoding functionality with performance
7
+ optimizations including file-based encoding caching to reduce redundant
8
+ chardet.detect() calls.
9
+ """
10
+
11
+ import locale
12
+ import os
13
+ import sys
14
+ import threading
15
+ import time
16
+ from pathlib import Path
17
+ from typing import Any, Dict, Optional, Tuple, Union
18
+
19
+
20
+ # Set up encoding environment early
21
+ def _setup_encoding_environment() -> None:
22
+ """Set up proper encoding environment"""
23
+ try:
24
+ os.environ["PYTHONIOENCODING"] = "utf-8"
25
+ os.environ["PYTHONUTF8"] = "1"
26
+
27
+ # Ensure proper stdout/stderr encoding if possible
28
+ if hasattr(sys.stdout, "reconfigure"):
29
+ sys.stdout.reconfigure(encoding="utf-8", errors="replace")
30
+ if hasattr(sys.stderr, "reconfigure"):
31
+ sys.stderr.reconfigure(encoding="utf-8", errors="replace")
32
+ except Exception:
33
+ pass # Ignore setup errors, use defaults
34
+
35
+
36
+ # Set up environment when module is imported
37
+ _setup_encoding_environment()
38
+
39
+ # Try to import chardet with fallback
40
+ try:
41
+ import chardet
42
+
43
+ CHARDET_AVAILABLE = True
44
+ except ImportError:
45
+ CHARDET_AVAILABLE = False
46
+
47
+ # Import utilities with fallback
48
+ try:
49
+ from .utils import log_debug, log_warning
50
+ except ImportError:
51
+ # Fallback logging functions with compatible signatures
52
+ def log_debug(message: str, *args: Any, **kwargs: Any) -> None:
53
+ print(f"DEBUG: {message}")
54
+
55
+ def log_warning(message: str, *args: Any, **kwargs: Any) -> None:
56
+ print(f"WARNING: {message}")
57
+
58
+
59
+ class EncodingCache:
60
+ """Thread-safe encoding cache for file-based encoding detection optimization"""
61
+
62
+ def __init__(self, max_size: int = 1000, ttl_seconds: int = 3600):
63
+ """
64
+ Initialize encoding cache
65
+
66
+ Args:
67
+ max_size: Maximum number of cached entries
68
+ ttl_seconds: Time-to-live for cache entries in seconds
69
+ """
70
+ self._cache: Dict[str, Tuple[str, float]] = (
71
+ {}
72
+ ) # file_path -> (encoding, timestamp)
73
+ self._lock = threading.RLock()
74
+ self._max_size = max_size
75
+ self._ttl_seconds = ttl_seconds
76
+
77
+ def get(self, file_path: str) -> Optional[str]:
78
+ """
79
+ Get cached encoding for file path
80
+
81
+ Args:
82
+ file_path: Path to the file
83
+
84
+ Returns:
85
+ Cached encoding or None if not found/expired
86
+ """
87
+ with self._lock:
88
+ if file_path not in self._cache:
89
+ return None
90
+
91
+ encoding, timestamp = self._cache[file_path]
92
+ current_time = time.time()
93
+
94
+ # Check if entry has expired
95
+ if current_time - timestamp > self._ttl_seconds:
96
+ del self._cache[file_path]
97
+ return None
98
+
99
+ return encoding
100
+
101
+ def set(self, file_path: str, encoding: str) -> None:
102
+ """
103
+ Cache encoding for file path
104
+
105
+ Args:
106
+ file_path: Path to the file
107
+ encoding: Detected encoding
108
+ """
109
+ with self._lock:
110
+ current_time = time.time()
111
+
112
+ # Clean up expired entries if cache is getting full
113
+ if len(self._cache) >= self._max_size:
114
+ self._cleanup_expired()
115
+
116
+ # If still full after cleanup, remove oldest entry
117
+ if len(self._cache) >= self._max_size:
118
+ oldest_key = min(self._cache.keys(), key=lambda k: self._cache[k][1])
119
+ del self._cache[oldest_key]
120
+
121
+ self._cache[file_path] = (encoding, current_time)
122
+
123
+ def _cleanup_expired(self) -> None:
124
+ """Remove expired entries from cache"""
125
+ current_time = time.time()
126
+ expired_keys = [
127
+ key
128
+ for key, (_, timestamp) in self._cache.items()
129
+ if current_time - timestamp > self._ttl_seconds
130
+ ]
131
+ for key in expired_keys:
132
+ del self._cache[key]
133
+
134
+ def clear(self) -> None:
135
+ """Clear all cached entries"""
136
+ with self._lock:
137
+ self._cache.clear()
138
+
139
+ def size(self) -> int:
140
+ """Get current cache size"""
141
+ with self._lock:
142
+ return len(self._cache)
143
+
144
+
145
+ # Global encoding cache instance
146
+ _encoding_cache = EncodingCache()
147
+
148
+
149
+ class EncodingManager:
150
+ """Centralized encoding management for consistent text processing"""
151
+
152
+ DEFAULT_ENCODING = "utf-8"
153
+ FALLBACK_ENCODINGS = ["utf-8", "cp1252", "iso-8859-1", "shift_jis", "gbk"]
154
+
155
+ @classmethod
156
+ def safe_encode(cls, text: str, encoding: Optional[str] = None) -> bytes:
157
+ """
158
+ Safely encode text to bytes with fallback handling
159
+
160
+ Args:
161
+ text: Text to encode
162
+ encoding: Target encoding (defaults to UTF-8)
163
+
164
+ Returns:
165
+ Encoded bytes
166
+ """
167
+ if text is None:
168
+ return b""
169
+
170
+ target_encoding = encoding or cls.DEFAULT_ENCODING
171
+
172
+ try:
173
+ return text.encode(target_encoding)
174
+ except UnicodeEncodeError as e:
175
+ log_debug(f"Failed to encode with {target_encoding}, trying fallbacks: {e}")
176
+
177
+ # Try fallback encodings
178
+ for fallback in cls.FALLBACK_ENCODINGS:
179
+ if fallback != target_encoding:
180
+ try:
181
+ return text.encode(fallback, errors="replace")
182
+ except UnicodeEncodeError:
183
+ continue
184
+
185
+ # Last resort: encode with error replacement
186
+ log_warning(f"Using error replacement for encoding: {text[:50]}...")
187
+ return text.encode(cls.DEFAULT_ENCODING, errors="replace")
188
+
189
+ @classmethod
190
+ def safe_decode(cls, data: bytes, encoding: Optional[str] = None) -> str:
191
+ """
192
+ Safely decode bytes to text with fallback handling
193
+
194
+ Args:
195
+ data: Bytes to decode
196
+ encoding: Source encoding (auto-detected if None)
197
+
198
+ Returns:
199
+ Decoded text
200
+ """
201
+ if data is None or len(data) == 0:
202
+ return ""
203
+
204
+ # Use provided encoding or detect
205
+ target_encoding = encoding
206
+ if not target_encoding:
207
+ target_encoding = cls.detect_encoding(data)
208
+
209
+ try:
210
+ return data.decode(target_encoding)
211
+ except UnicodeDecodeError as e:
212
+ log_debug(f"Failed to decode with {target_encoding}, trying fallbacks: {e}")
213
+
214
+ # Try fallback encodings
215
+ for fallback in cls.FALLBACK_ENCODINGS:
216
+ if fallback != target_encoding:
217
+ try:
218
+ return data.decode(fallback, errors="replace")
219
+ except UnicodeDecodeError:
220
+ continue
221
+
222
+ # Last resort: decode with error replacement
223
+ log_warning(
224
+ f"Using error replacement for decoding data (length: {len(data)})"
225
+ )
226
+ return data.decode(cls.DEFAULT_ENCODING, errors="replace")
227
+
228
+ @classmethod
229
+ def detect_encoding(cls, data: bytes, file_path: Optional[str] = None) -> str:
230
+ """
231
+ Detect encoding of byte data with optional file-based caching
232
+
233
+ Args:
234
+ data: Bytes to analyze
235
+ file_path: Optional file path for caching (improves performance)
236
+
237
+ Returns:
238
+ Detected encoding name
239
+ """
240
+ if not data:
241
+ return cls.DEFAULT_ENCODING
242
+
243
+ # Check cache first if file_path is provided
244
+ if file_path:
245
+ cached_encoding = _encoding_cache.get(file_path)
246
+ if cached_encoding:
247
+ log_debug(f"Using cached encoding for {file_path}: {cached_encoding}")
248
+ return cached_encoding
249
+
250
+ detected_encoding = cls.DEFAULT_ENCODING
251
+
252
+ # If chardet is not available, use simple heuristics
253
+ if not CHARDET_AVAILABLE:
254
+ try:
255
+ # Try UTF-8 first
256
+ data.decode("utf-8")
257
+ detected_encoding = "utf-8"
258
+ except UnicodeDecodeError:
259
+ # Check for BOM
260
+ if data.startswith(b"\xff\xfe"):
261
+ detected_encoding = "utf-16-le"
262
+ elif data.startswith(b"\xfe\xff"):
263
+ detected_encoding = "utf-16-be"
264
+ elif data.startswith(b"\xef\xbb\xbf"):
265
+ detected_encoding = "utf-8-sig"
266
+ else:
267
+ detected_encoding = cls.DEFAULT_ENCODING
268
+ else:
269
+ try:
270
+ # Use chardet for detection
271
+ detection = chardet.detect(data)
272
+ if detection and detection["encoding"]:
273
+ confidence = detection.get("confidence", 0)
274
+ detected_encoding = detection["encoding"].lower()
275
+
276
+ # Only trust high-confidence detections
277
+ if confidence > 0.7:
278
+ log_debug(
279
+ f"Detected encoding: {detected_encoding} (confidence: {confidence:.2f})"
280
+ )
281
+ else:
282
+ log_debug(
283
+ f"Low confidence encoding detection: {detected_encoding} (confidence: {confidence:.2f}), using default"
284
+ )
285
+ detected_encoding = cls.DEFAULT_ENCODING
286
+
287
+ except Exception as e:
288
+ log_debug(f"Encoding detection failed: {e}")
289
+ detected_encoding = cls.DEFAULT_ENCODING
290
+
291
+ # Cache the result if file_path is provided
292
+ if file_path and detected_encoding:
293
+ _encoding_cache.set(file_path, detected_encoding)
294
+ log_debug(f"Cached encoding for {file_path}: {detected_encoding}")
295
+
296
+ return detected_encoding
297
+
298
+ @classmethod
299
+ def read_file_safe(cls, file_path: Union[str, Path]) -> Tuple[str, str]:
300
+ """
301
+ Safely read a file with automatic encoding detection and caching
302
+
303
+ Args:
304
+ file_path: Path to the file
305
+
306
+ Returns:
307
+ Tuple of (content, detected_encoding)
308
+ """
309
+ file_path = Path(file_path)
310
+
311
+ try:
312
+ # Read raw bytes first
313
+ with open(file_path, "rb") as f:
314
+ raw_data = f.read()
315
+
316
+ if not raw_data:
317
+ return "", cls.DEFAULT_ENCODING
318
+
319
+ # Detect and decode with file path for caching
320
+ detected_encoding = cls.detect_encoding(raw_data, str(file_path))
321
+ content = cls.safe_decode(raw_data, detected_encoding)
322
+
323
+ # Normalize line endings for consistency
324
+ content = cls.normalize_line_endings(content)
325
+
326
+ return content, detected_encoding
327
+
328
+ except IOError as e:
329
+ log_warning(f"Failed to read file {file_path}: {e}")
330
+ raise e
331
+
332
+ @classmethod
333
+ def write_file_safe(
334
+ cls, file_path: Union[str, Path], content: str, encoding: Optional[str] = None
335
+ ) -> bool:
336
+ """
337
+ Safely write content to a file
338
+
339
+ Args:
340
+ file_path: Path to the file
341
+ content: Content to write
342
+ encoding: Target encoding (defaults to UTF-8)
343
+
344
+ Returns:
345
+ True if successful, False otherwise
346
+ """
347
+ file_path = Path(file_path)
348
+ target_encoding = encoding or cls.DEFAULT_ENCODING
349
+
350
+ try:
351
+ encoded_content = cls.safe_encode(content, target_encoding)
352
+
353
+ with open(file_path, "wb") as f:
354
+ f.write(encoded_content)
355
+
356
+ return True
357
+
358
+ except IOError as e:
359
+ log_warning(f"Failed to write file {file_path}: {e}")
360
+ return False
361
+
362
+ @classmethod
363
+ def normalize_line_endings(cls, text: str) -> str:
364
+ """
365
+ Normalize line endings to Unix style (\n)
366
+
367
+ Args:
368
+ text: Text to normalize
369
+
370
+ Returns:
371
+ Text with normalized line endings
372
+ """
373
+ if not text:
374
+ return text
375
+
376
+ # Replace Windows (\r\n) and Mac (\r) line endings with Unix (\n)
377
+ return text.replace("\r\n", "\n").replace("\r", "\n")
378
+
379
+ @classmethod
380
+ def extract_text_slice(
381
+ cls,
382
+ content_bytes: bytes,
383
+ start_byte: int,
384
+ end_byte: int,
385
+ encoding: Optional[str] = None,
386
+ ) -> str:
387
+ """
388
+ Extract a slice of text from bytes with proper encoding handling
389
+
390
+ Args:
391
+ content_bytes: Source bytes
392
+ start_byte: Start position
393
+ end_byte: End position
394
+ encoding: Encoding to use (auto-detected if None)
395
+
396
+ Returns:
397
+ Extracted text slice
398
+ """
399
+ if not content_bytes or start_byte >= len(content_bytes):
400
+ return ""
401
+
402
+ # Ensure bounds are valid
403
+ start_byte = max(0, start_byte)
404
+ end_byte = min(len(content_bytes), end_byte)
405
+
406
+ if start_byte >= end_byte:
407
+ return ""
408
+
409
+ # Extract byte slice
410
+ byte_slice = content_bytes[start_byte:end_byte]
411
+
412
+ # Decode the slice
413
+ return cls.safe_decode(byte_slice, encoding)
414
+
415
+
416
+ # Convenience functions for backward compatibility
417
+ def safe_encode(text: str, encoding: Optional[str] = None) -> bytes:
418
+ """Convenience function for safe encoding"""
419
+ return EncodingManager.safe_encode(text, encoding)
420
+
421
+
422
+ def safe_decode(data: bytes, encoding: Optional[str] = None) -> str:
423
+ """Convenience function for safe decoding"""
424
+ return EncodingManager.safe_decode(data, encoding)
425
+
426
+
427
+ def detect_encoding(data: bytes, file_path: Optional[str] = None) -> str:
428
+ """Convenience function for encoding detection with optional caching"""
429
+ return EncodingManager.detect_encoding(data, file_path)
430
+
431
+
432
+ def read_file_safe(file_path: Union[str, Path]) -> Tuple[str, str]:
433
+ """Convenience function for safe file reading"""
434
+ return EncodingManager.read_file_safe(file_path)
435
+
436
+
437
+ def write_file_safe(
438
+ file_path: Union[str, Path], content: str, encoding: Optional[str] = None
439
+ ) -> bool:
440
+ """Convenience function for safe file writing"""
441
+ return EncodingManager.write_file_safe(file_path, content, encoding)
442
+
443
+
444
+ def extract_text_slice(
445
+ content_bytes: bytes, start_byte: int, end_byte: int, encoding: Optional[str] = None
446
+ ) -> str:
447
+ """Convenience function for text slice extraction"""
448
+ return EncodingManager.extract_text_slice(
449
+ content_bytes, start_byte, end_byte, encoding
450
+ )
451
+
452
+
453
+ def clear_encoding_cache() -> None:
454
+ """Clear the global encoding cache"""
455
+ _encoding_cache.clear()
456
+
457
+
458
+ def get_encoding_cache_size() -> int:
459
+ """Get the current size of the encoding cache"""
460
+ return _encoding_cache.size()