tree-sitter-analyzer 1.9.2__py3-none-any.whl → 1.9.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (64) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +216 -8
  3. tree_sitter_analyzer/cli/argument_validator.py +1 -1
  4. tree_sitter_analyzer/cli/commands/advanced_command.py +3 -6
  5. tree_sitter_analyzer/cli/commands/query_command.py +3 -1
  6. tree_sitter_analyzer/cli/commands/table_command.py +3 -3
  7. tree_sitter_analyzer/constants.py +5 -3
  8. tree_sitter_analyzer/core/analysis_engine.py +1 -1
  9. tree_sitter_analyzer/core/cache_service.py +1 -1
  10. tree_sitter_analyzer/core/engine.py +34 -10
  11. tree_sitter_analyzer/core/query.py +82 -2
  12. tree_sitter_analyzer/encoding_utils.py +64 -0
  13. tree_sitter_analyzer/exceptions.py +1 -1
  14. tree_sitter_analyzer/file_handler.py +49 -33
  15. tree_sitter_analyzer/formatters/base_formatter.py +1 -1
  16. tree_sitter_analyzer/formatters/html_formatter.py +24 -14
  17. tree_sitter_analyzer/formatters/javascript_formatter.py +28 -21
  18. tree_sitter_analyzer/formatters/language_formatter_factory.py +7 -4
  19. tree_sitter_analyzer/formatters/markdown_formatter.py +4 -4
  20. tree_sitter_analyzer/formatters/python_formatter.py +4 -4
  21. tree_sitter_analyzer/formatters/typescript_formatter.py +1 -1
  22. tree_sitter_analyzer/interfaces/mcp_adapter.py +4 -2
  23. tree_sitter_analyzer/interfaces/mcp_server.py +10 -10
  24. tree_sitter_analyzer/language_detector.py +30 -5
  25. tree_sitter_analyzer/language_loader.py +46 -26
  26. tree_sitter_analyzer/languages/css_plugin.py +6 -6
  27. tree_sitter_analyzer/languages/html_plugin.py +12 -8
  28. tree_sitter_analyzer/languages/java_plugin.py +330 -520
  29. tree_sitter_analyzer/languages/javascript_plugin.py +22 -78
  30. tree_sitter_analyzer/languages/markdown_plugin.py +277 -297
  31. tree_sitter_analyzer/languages/python_plugin.py +47 -85
  32. tree_sitter_analyzer/languages/typescript_plugin.py +48 -123
  33. tree_sitter_analyzer/mcp/resources/project_stats_resource.py +14 -8
  34. tree_sitter_analyzer/mcp/server.py +38 -23
  35. tree_sitter_analyzer/mcp/tools/analyze_scale_tool.py +10 -7
  36. tree_sitter_analyzer/mcp/tools/analyze_scale_tool_cli_compatible.py +51 -7
  37. tree_sitter_analyzer/mcp/tools/fd_rg_utils.py +11 -7
  38. tree_sitter_analyzer/mcp/tools/find_and_grep_tool.py +8 -6
  39. tree_sitter_analyzer/mcp/tools/list_files_tool.py +6 -6
  40. tree_sitter_analyzer/mcp/tools/output_format_validator.py +148 -0
  41. tree_sitter_analyzer/mcp/tools/search_content_tool.py +48 -15
  42. tree_sitter_analyzer/mcp/tools/table_format_tool.py +13 -8
  43. tree_sitter_analyzer/mcp/utils/file_output_manager.py +8 -3
  44. tree_sitter_analyzer/mcp/utils/gitignore_detector.py +24 -12
  45. tree_sitter_analyzer/mcp/utils/path_resolver.py +2 -2
  46. tree_sitter_analyzer/models.py +16 -0
  47. tree_sitter_analyzer/mypy_current_errors.txt +2 -0
  48. tree_sitter_analyzer/plugins/base.py +66 -0
  49. tree_sitter_analyzer/queries/java.py +9 -3
  50. tree_sitter_analyzer/queries/javascript.py +3 -8
  51. tree_sitter_analyzer/queries/markdown.py +1 -1
  52. tree_sitter_analyzer/queries/python.py +2 -2
  53. tree_sitter_analyzer/security/boundary_manager.py +2 -5
  54. tree_sitter_analyzer/security/regex_checker.py +2 -2
  55. tree_sitter_analyzer/security/validator.py +5 -1
  56. tree_sitter_analyzer/table_formatter.py +4 -4
  57. tree_sitter_analyzer/utils/__init__.py +27 -116
  58. tree_sitter_analyzer/{utils.py → utils/logging.py} +2 -2
  59. tree_sitter_analyzer/utils/tree_sitter_compat.py +2 -2
  60. {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/METADATA +87 -45
  61. tree_sitter_analyzer-1.9.4.dist-info/RECORD +111 -0
  62. tree_sitter_analyzer-1.9.2.dist-info/RECORD +0 -109
  63. {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/WHEEL +0 -0
  64. {tree_sitter_analyzer-1.9.2.dist-info → tree_sitter_analyzer-1.9.4.dist-info}/entry_points.txt +0 -0
@@ -21,9 +21,14 @@ except ImportError:
21
21
 
22
22
  from ..core.analysis_engine import AnalysisRequest
23
23
  from ..encoding_utils import extract_text_slice, safe_encode
24
- from ..models import AnalysisResult, CodeElement
24
+ from ..models import AnalysisResult
25
+ from ..models import Class as ModelClass
26
+ from ..models import CodeElement
27
+ from ..models import Function as ModelFunction
28
+ from ..models import Import as ModelImport
29
+ from ..models import Variable as ModelVariable
25
30
  from ..plugins.base import ElementExtractor, LanguagePlugin
26
- from ..utils import log_debug, log_error, log_warning
31
+ from ..utils import log_debug, log_error
27
32
  from ..utils.tree_sitter_compat import TreeSitterQueryCompat
28
33
 
29
34
 
@@ -44,8 +49,8 @@ class MarkdownElement(CodeElement):
44
49
  title: str | None = None,
45
50
  language_info: str | None = None,
46
51
  is_checked: bool | None = None,
47
- **kwargs,
48
- ):
52
+ **kwargs: Any,
53
+ ) -> None:
49
54
  super().__init__(
50
55
  name=name,
51
56
  start_line=start_line,
@@ -62,6 +67,16 @@ class MarkdownElement(CodeElement):
62
67
  self.language_info = language_info # For code blocks
63
68
  self.is_checked = is_checked # For task list items
64
69
 
70
+ # Additional attributes used by formatters
71
+ self.text: str | None = None # Text content
72
+ self.type: str | None = None # Element type for formatters
73
+ self.line_count: int | None = None # For code blocks
74
+ self.alt: str | None = None # Alternative text for images
75
+ self.list_type: str | None = None # For lists (ordered/unordered/task)
76
+ self.item_count: int | None = None # For lists
77
+ self.row_count: int | None = None # For tables
78
+ self.column_count: int | None = None # For tables
79
+
65
80
 
66
81
  class MarkdownElementExtractor(ElementExtractor):
67
82
  """Markdown-specific element extractor with comprehensive feature support"""
@@ -80,30 +95,74 @@ class MarkdownElementExtractor(ElementExtractor):
80
95
 
81
96
  def extract_functions(
82
97
  self, tree: "tree_sitter.Tree", source_code: str
83
- ) -> list[CodeElement]:
98
+ ) -> list[ModelFunction]:
84
99
  """Extract Markdown elements (headers act as 'functions')"""
85
- return self.extract_headers(tree, source_code)
100
+ headers = self.extract_headers(tree, source_code)
101
+ functions = []
102
+ for header in headers:
103
+ func = ModelFunction(
104
+ name=header.name,
105
+ start_line=header.start_line,
106
+ end_line=header.end_line,
107
+ raw_text=header.raw_text,
108
+ language=header.language,
109
+ )
110
+ functions.append(func)
111
+ return functions
86
112
 
87
113
  def extract_classes(
88
114
  self, tree: "tree_sitter.Tree", source_code: str
89
- ) -> list[CodeElement]:
115
+ ) -> list[ModelClass]:
90
116
  """Extract Markdown sections (code blocks act as 'classes')"""
91
- return self.extract_code_blocks(tree, source_code)
117
+ code_blocks = self.extract_code_blocks(tree, source_code)
118
+ classes = []
119
+ for block in code_blocks:
120
+ cls = ModelClass(
121
+ name=block.name,
122
+ start_line=block.start_line,
123
+ end_line=block.end_line,
124
+ raw_text=block.raw_text,
125
+ language=block.language,
126
+ )
127
+ classes.append(cls)
128
+ return classes
92
129
 
93
130
  def extract_variables(
94
131
  self, tree: "tree_sitter.Tree", source_code: str
95
- ) -> list[CodeElement]:
132
+ ) -> list[ModelVariable]:
96
133
  """Extract Markdown links and images (act as 'variables')"""
97
134
  elements = []
98
135
  elements.extend(self.extract_links(tree, source_code))
99
136
  elements.extend(self.extract_images(tree, source_code))
100
- return elements
137
+
138
+ variables = []
139
+ for element in elements:
140
+ var = ModelVariable(
141
+ name=element.name,
142
+ start_line=element.start_line,
143
+ end_line=element.end_line,
144
+ raw_text=element.raw_text,
145
+ language=element.language,
146
+ )
147
+ variables.append(var)
148
+ return variables
101
149
 
102
150
  def extract_imports(
103
151
  self, tree: "tree_sitter.Tree", source_code: str
104
- ) -> list[CodeElement]:
152
+ ) -> list[ModelImport]:
105
153
  """Extract Markdown references and definitions"""
106
- return self.extract_references(tree, source_code)
154
+ references = self.extract_references(tree, source_code)
155
+ imports = []
156
+ for ref in references:
157
+ imp = ModelImport(
158
+ name=ref.name,
159
+ start_line=ref.start_line,
160
+ end_line=ref.end_line,
161
+ raw_text=ref.raw_text,
162
+ language=ref.language,
163
+ )
164
+ imports.append(imp)
165
+ return imports
107
166
 
108
167
  def extract_headers(
109
168
  self, tree: "tree_sitter.Tree", source_code: str
@@ -115,18 +174,14 @@ class MarkdownElementExtractor(ElementExtractor):
115
174
 
116
175
  headers: list[MarkdownElement] = []
117
176
 
118
- if tree is None or tree.root_node is None:
119
- log_debug("Tree or root_node is None, returning empty headers list")
120
- return headers
121
-
122
- try:
123
- # Extract ATX headers (# ## ### etc.)
124
- self._extract_atx_headers(tree.root_node, headers)
125
- # Extract Setext headers (underlined)
126
- self._extract_setext_headers(tree.root_node, headers)
127
- except Exception as e:
128
- log_debug(f"Error during header extraction: {e}")
129
- return []
177
+ if tree is not None and tree.root_node is not None:
178
+ try:
179
+ # Extract ATX headers (# ## ### etc.)
180
+ self._extract_atx_headers(tree.root_node, headers)
181
+ # Extract Setext headers (underlined)
182
+ self._extract_setext_headers(tree.root_node, headers)
183
+ except Exception as e:
184
+ log_debug(f"Error during header extraction: {e}")
130
185
 
131
186
  log_debug(f"Extracted {len(headers)} Markdown headers")
132
187
  return headers
@@ -141,16 +196,12 @@ class MarkdownElementExtractor(ElementExtractor):
141
196
 
142
197
  code_blocks: list[MarkdownElement] = []
143
198
 
144
- if tree is None or tree.root_node is None:
145
- log_debug("Tree or root_node is None, returning empty code blocks list")
146
- return code_blocks
147
-
148
- try:
149
- self._extract_fenced_code_blocks(tree.root_node, code_blocks)
150
- self._extract_indented_code_blocks(tree.root_node, code_blocks)
151
- except Exception as e:
152
- log_debug(f"Error during code block extraction: {e}")
153
- return []
199
+ if tree is not None and tree.root_node is not None:
200
+ try:
201
+ self._extract_fenced_code_blocks(tree.root_node, code_blocks)
202
+ self._extract_indented_code_blocks(tree.root_node, code_blocks)
203
+ except Exception as e:
204
+ log_debug(f"Error during code block extraction: {e}")
154
205
 
155
206
  log_debug(f"Extracted {len(code_blocks)} Markdown code blocks")
156
207
  return code_blocks
@@ -165,25 +216,21 @@ class MarkdownElementExtractor(ElementExtractor):
165
216
 
166
217
  links: list[MarkdownElement] = []
167
218
 
168
- if tree is None or tree.root_node is None:
169
- log_debug("Tree or root_node is None, returning empty links list")
170
- return links
171
-
172
- try:
173
- # Track extracted links to prevent global duplicates (ensure reset)
174
- self._extracted_links = set()
219
+ if tree is not None and tree.root_node is not None:
220
+ try:
221
+ # Track extracted links to prevent global duplicates (ensure reset)
222
+ self._extracted_links = set()
175
223
 
176
- self._extract_inline_links(tree.root_node, links)
177
- self._extract_reference_links(tree.root_node, links)
178
- self._extract_autolinks(tree.root_node, links)
224
+ self._extract_inline_links(tree.root_node, links)
225
+ self._extract_reference_links(tree.root_node, links)
226
+ self._extract_autolinks(tree.root_node, links)
179
227
 
180
- # Clean up after extraction is complete
181
- if hasattr(self, "_extracted_links"):
182
- delattr(self, "_extracted_links")
228
+ # Clean up after extraction is complete
229
+ if hasattr(self, "_extracted_links"):
230
+ delattr(self, "_extracted_links")
183
231
 
184
- except Exception as e:
185
- log_debug(f"Error during link extraction: {e}")
186
- return []
232
+ except Exception as e:
233
+ log_debug(f"Error during link extraction: {e}")
187
234
 
188
235
  # 重複除去: 同じtextとurlを持つ要素を除去
189
236
  seen = set()
@@ -209,17 +256,13 @@ class MarkdownElementExtractor(ElementExtractor):
209
256
 
210
257
  images: list[MarkdownElement] = []
211
258
 
212
- if tree is None or tree.root_node is None:
213
- log_debug("Tree or root_node is None, returning empty images list")
214
- return images
215
-
216
- try:
217
- self._extract_inline_images(tree.root_node, images)
218
- self._extract_reference_images(tree.root_node, images)
219
- self._extract_image_reference_definitions(tree.root_node, images)
220
- except Exception as e:
221
- log_debug(f"Error during image extraction: {e}")
222
- return []
259
+ if tree is not None and tree.root_node is not None:
260
+ try:
261
+ self._extract_inline_images(tree.root_node, images)
262
+ self._extract_reference_images(tree.root_node, images)
263
+ self._extract_image_reference_definitions(tree.root_node, images)
264
+ except Exception as e:
265
+ log_debug(f"Error during image extraction: {e}")
223
266
 
224
267
  # 重複除去: 同じalt_textとurlを持つ要素を除去
225
268
  seen = set()
@@ -245,15 +288,11 @@ class MarkdownElementExtractor(ElementExtractor):
245
288
 
246
289
  references: list[MarkdownElement] = []
247
290
 
248
- if tree is None or tree.root_node is None:
249
- log_debug("Tree or root_node is None, returning empty references list")
250
- return references
251
-
252
- try:
253
- self._extract_link_reference_definitions(tree.root_node, references)
254
- except Exception as e:
255
- log_debug(f"Error during reference extraction: {e}")
256
- return []
291
+ if tree is not None and tree.root_node is not None:
292
+ try:
293
+ self._extract_link_reference_definitions(tree.root_node, references)
294
+ except Exception as e:
295
+ log_debug(f"Error during reference extraction: {e}")
257
296
 
258
297
  log_debug(f"Extracted {len(references)} Markdown references")
259
298
  return references
@@ -268,15 +307,11 @@ class MarkdownElementExtractor(ElementExtractor):
268
307
 
269
308
  blockquotes: list[MarkdownElement] = []
270
309
 
271
- if tree is None or tree.root_node is None:
272
- log_debug("Tree or root_node is None, returning empty blockquotes list")
273
- return blockquotes
274
-
275
- try:
276
- self._extract_block_quotes(tree.root_node, blockquotes)
277
- except Exception as e:
278
- log_debug(f"Error during blockquote extraction: {e}")
279
- return []
310
+ if tree is not None and tree.root_node is not None:
311
+ try:
312
+ self._extract_block_quotes(tree.root_node, blockquotes)
313
+ except Exception as e:
314
+ log_debug(f"Error during blockquote extraction: {e}")
280
315
 
281
316
  log_debug(f"Extracted {len(blockquotes)} Markdown blockquotes")
282
317
  return blockquotes
@@ -291,17 +326,11 @@ class MarkdownElementExtractor(ElementExtractor):
291
326
 
292
327
  horizontal_rules: list[MarkdownElement] = []
293
328
 
294
- if tree is None or tree.root_node is None:
295
- log_debug(
296
- "Tree or root_node is None, returning empty horizontal rules list"
297
- )
298
- return horizontal_rules
299
-
300
- try:
301
- self._extract_thematic_breaks(tree.root_node, horizontal_rules)
302
- except Exception as e:
303
- log_debug(f"Error during horizontal rule extraction: {e}")
304
- return []
329
+ if tree is not None and tree.root_node is not None:
330
+ try:
331
+ self._extract_thematic_breaks(tree.root_node, horizontal_rules)
332
+ except Exception as e:
333
+ log_debug(f"Error during horizontal rule extraction: {e}")
305
334
 
306
335
  log_debug(f"Extracted {len(horizontal_rules)} Markdown horizontal rules")
307
336
  return horizontal_rules
@@ -316,16 +345,12 @@ class MarkdownElementExtractor(ElementExtractor):
316
345
 
317
346
  html_elements: list[MarkdownElement] = []
318
347
 
319
- if tree is None or tree.root_node is None:
320
- log_debug("Tree or root_node is None, returning empty HTML elements list")
321
- return html_elements
322
-
323
- try:
324
- self._extract_html_blocks(tree.root_node, html_elements)
325
- self._extract_inline_html(tree.root_node, html_elements)
326
- except Exception as e:
327
- log_debug(f"Error during HTML element extraction: {e}")
328
- return []
348
+ if tree is not None and tree.root_node is not None:
349
+ try:
350
+ self._extract_html_blocks(tree.root_node, html_elements)
351
+ self._extract_inline_html(tree.root_node, html_elements)
352
+ except Exception as e:
353
+ log_debug(f"Error during HTML element extraction: {e}")
329
354
 
330
355
  log_debug(f"Extracted {len(html_elements)} HTML elements")
331
356
  return html_elements
@@ -340,19 +365,15 @@ class MarkdownElementExtractor(ElementExtractor):
340
365
 
341
366
  formatting_elements: list[MarkdownElement] = []
342
367
 
343
- if tree is None or tree.root_node is None:
344
- log_debug(
345
- "Tree or root_node is None, returning empty formatting elements list"
346
- )
347
- return formatting_elements
348
-
349
- try:
350
- self._extract_emphasis_elements(tree.root_node, formatting_elements)
351
- self._extract_inline_code_spans(tree.root_node, formatting_elements)
352
- self._extract_strikethrough_elements(tree.root_node, formatting_elements)
353
- except Exception as e:
354
- log_debug(f"Error during text formatting extraction: {e}")
355
- return []
368
+ if tree is not None and tree.root_node is not None:
369
+ try:
370
+ self._extract_emphasis_elements(tree.root_node, formatting_elements)
371
+ self._extract_inline_code_spans(tree.root_node, formatting_elements)
372
+ self._extract_strikethrough_elements(
373
+ tree.root_node, formatting_elements
374
+ )
375
+ except Exception as e:
376
+ log_debug(f"Error during text formatting extraction: {e}")
356
377
 
357
378
  log_debug(f"Extracted {len(formatting_elements)} text formatting elements")
358
379
  return formatting_elements
@@ -367,15 +388,11 @@ class MarkdownElementExtractor(ElementExtractor):
367
388
 
368
389
  footnotes: list[MarkdownElement] = []
369
390
 
370
- if tree is None or tree.root_node is None:
371
- log_debug("Tree or root_node is None, returning empty footnotes list")
372
- return footnotes
373
-
374
- try:
375
- self._extract_footnote_elements(tree.root_node, footnotes)
376
- except Exception as e:
377
- log_debug(f"Error during footnote extraction: {e}")
378
- return []
391
+ if tree is not None and tree.root_node is not None:
392
+ try:
393
+ self._extract_footnote_elements(tree.root_node, footnotes)
394
+ except Exception as e:
395
+ log_debug(f"Error during footnote extraction: {e}")
379
396
 
380
397
  log_debug(f"Extracted {len(footnotes)} footnotes")
381
398
  return footnotes
@@ -390,15 +407,11 @@ class MarkdownElementExtractor(ElementExtractor):
390
407
 
391
408
  lists: list[MarkdownElement] = []
392
409
 
393
- if tree is None or tree.root_node is None:
394
- log_debug("Tree or root_node is None, returning empty lists list")
395
- return lists
396
-
397
- try:
398
- self._extract_list_items(tree.root_node, lists)
399
- except Exception as e:
400
- log_debug(f"Error during list extraction: {e}")
401
- return []
410
+ if tree is not None and tree.root_node is not None:
411
+ try:
412
+ self._extract_list_items(tree.root_node, lists)
413
+ except Exception as e:
414
+ log_debug(f"Error during list extraction: {e}")
402
415
 
403
416
  log_debug(f"Extracted {len(lists)} Markdown list items")
404
417
  return lists
@@ -413,15 +426,11 @@ class MarkdownElementExtractor(ElementExtractor):
413
426
 
414
427
  tables: list[MarkdownElement] = []
415
428
 
416
- if tree is None or tree.root_node is None:
417
- log_debug("Tree or root_node is None, returning empty tables list")
418
- return tables
419
-
420
- try:
421
- self._extract_pipe_tables(tree.root_node, tables)
422
- except Exception as e:
423
- log_debug(f"Error during table extraction: {e}")
424
- return []
429
+ if tree is not None and tree.root_node is not None:
430
+ try:
431
+ self._extract_pipe_tables(tree.root_node, tables)
432
+ except Exception as e:
433
+ log_debug(f"Error during table extraction: {e}")
425
434
 
426
435
  log_debug(f"Extracted {len(tables)} Markdown tables")
427
436
  return tables
@@ -468,7 +477,7 @@ class MarkdownElementExtractor(ElementExtractor):
468
477
  line = self.content_lines[start_point[0]]
469
478
  start_col = max(0, min(start_point[1], len(line)))
470
479
  end_col = max(start_col, min(end_point[1], len(line)))
471
- result = line[start_col:end_col]
480
+ result: str = line[start_col:end_col]
472
481
  self._node_text_cache[node_id] = result
473
482
  return result
474
483
  else:
@@ -900,7 +909,6 @@ class MarkdownElementExtractor(ElementExtractor):
900
909
 
901
910
  # Extract all reference definitions that could be used for images
902
911
  # We check if the URL points to an image file or if it's used by an image reference
903
-
904
912
  # First, collect all image references used in the document
905
913
  image_refs_used = set()
906
914
  for node in self._traverse_nodes(root_node):
@@ -932,12 +940,14 @@ class MarkdownElementExtractor(ElementExtractor):
932
940
 
933
941
  # Pattern: [label]: url "title"
934
942
  ref_pattern = r'^\[([^\]]+)\]:\s*([^\s]+)(?:\s+"([^"]*)")?'
935
- match = re.match(ref_pattern, raw_text.strip())
943
+ ref_match: re.Match[str] | None = re.match(
944
+ ref_pattern, raw_text.strip()
945
+ )
936
946
 
937
- if match:
938
- label = match.group(1) or ""
939
- url = match.group(2) or ""
940
- title = match.group(3) or ""
947
+ if ref_match:
948
+ label = ref_match.group(1) or ""
949
+ url = ref_match.group(2) or ""
950
+ title = ref_match.group(3) or ""
941
951
 
942
952
  # Include if this reference is used by an image OR if URL looks like an image
943
953
  is_used_by_image = label.lower() in image_refs_used
@@ -1124,9 +1134,11 @@ class MarkdownElementExtractor(ElementExtractor):
1124
1134
  content = "\n".join(content_lines).strip()
1125
1135
 
1126
1136
  blockquote = MarkdownElement(
1127
- name=f"Blockquote: {content[:50]}..."
1128
- if len(content) > 50
1129
- else f"Blockquote: {content}",
1137
+ name=(
1138
+ f"Blockquote: {content[:50]}..."
1139
+ if len(content) > 50
1140
+ else f"Blockquote: {content}"
1141
+ ),
1130
1142
  start_line=start_line,
1131
1143
  end_line=end_line,
1132
1144
  raw_text=raw_text,
@@ -1406,13 +1418,13 @@ class MarkdownElementExtractor(ElementExtractor):
1406
1418
 
1407
1419
  # Pattern for footnote definitions: [^1]: content
1408
1420
  footnote_def_pattern = r"^\[\^([^\]]+)\]:\s*(.+)$"
1409
- match = re.match(
1421
+ footnote_match: re.Match[str] | None = re.match(
1410
1422
  footnote_def_pattern, raw_text.strip(), re.MULTILINE
1411
1423
  )
1412
1424
 
1413
- if match:
1414
- ref_id = match.group(1) or ""
1415
- content = match.group(2) or ""
1425
+ if footnote_match:
1426
+ ref_id = footnote_match.group(1) or ""
1427
+ content = footnote_match.group(2) or ""
1416
1428
  start_line = node.start_point[0] + 1
1417
1429
  end_line = node.end_point[0] + 1
1418
1430
 
@@ -1430,7 +1442,7 @@ class MarkdownElementExtractor(ElementExtractor):
1430
1442
  except Exception as e:
1431
1443
  log_debug(f"Failed to extract footnote definition: {e}")
1432
1444
 
1433
- def _traverse_nodes(self, node: "tree_sitter.Node"):
1445
+ def _traverse_nodes(self, node: "tree_sitter.Node") -> Any:
1434
1446
  """Traverse all nodes in the tree"""
1435
1447
  yield node
1436
1448
  for child in node.children:
@@ -1507,28 +1519,68 @@ class MarkdownPlugin(LanguagePlugin):
1507
1519
  ) -> list[CodeElement]:
1508
1520
  """Extract functions from the tree (legacy compatibility)"""
1509
1521
  extractor = self.get_extractor()
1510
- return extractor.extract_functions(tree, source_code)
1522
+ functions = extractor.extract_functions(tree, source_code)
1523
+ return [
1524
+ CodeElement(
1525
+ name=f.name,
1526
+ start_line=f.start_line,
1527
+ end_line=f.end_line,
1528
+ raw_text=f.raw_text,
1529
+ language=f.language,
1530
+ )
1531
+ for f in functions
1532
+ ]
1511
1533
 
1512
1534
  def extract_classes(
1513
1535
  self, tree: "tree_sitter.Tree", source_code: str
1514
1536
  ) -> list[CodeElement]:
1515
1537
  """Extract classes from the tree (legacy compatibility)"""
1516
1538
  extractor = self.get_extractor()
1517
- return extractor.extract_classes(tree, source_code)
1539
+ classes = extractor.extract_classes(tree, source_code)
1540
+ return [
1541
+ CodeElement(
1542
+ name=c.name,
1543
+ start_line=c.start_line,
1544
+ end_line=c.end_line,
1545
+ raw_text=c.raw_text,
1546
+ language=c.language,
1547
+ )
1548
+ for c in classes
1549
+ ]
1518
1550
 
1519
1551
  def extract_variables(
1520
1552
  self, tree: "tree_sitter.Tree", source_code: str
1521
1553
  ) -> list[CodeElement]:
1522
1554
  """Extract variables from the tree (legacy compatibility)"""
1523
1555
  extractor = self.get_extractor()
1524
- return extractor.extract_variables(tree, source_code)
1556
+ variables = extractor.extract_variables(tree, source_code)
1557
+ return [
1558
+ CodeElement(
1559
+ name=v.name,
1560
+ start_line=v.start_line,
1561
+ end_line=v.end_line,
1562
+ raw_text=v.raw_text,
1563
+ language=v.language,
1564
+ )
1565
+ for v in variables
1566
+ ]
1525
1567
 
1526
1568
  def extract_imports(
1527
1569
  self, tree: "tree_sitter.Tree", source_code: str
1528
1570
  ) -> list[CodeElement]:
1529
1571
  """Extract imports from the tree (legacy compatibility)"""
1530
1572
  extractor = self.get_extractor()
1531
- return extractor.extract_imports(tree, source_code)
1573
+ imports = extractor.extract_imports(tree, source_code)
1574
+ return [
1575
+ CodeElement(
1576
+ name=i.name,
1577
+ start_line=i.start_line,
1578
+ end_line=i.end_line,
1579
+ raw_text=i.raw_text,
1580
+ language=i.language,
1581
+ )
1582
+ for i in imports
1583
+ ]
1532
1584
 
1533
1585
  def get_tree_sitter_language(self) -> Optional["tree_sitter.Language"]:
1534
1586
  """Get the Tree-sitter language object for Markdown"""
@@ -1633,33 +1685,49 @@ class MarkdownPlugin(LanguagePlugin):
1633
1685
  )
1634
1686
 
1635
1687
  try:
1636
- with open(file_path, encoding="utf-8") as f:
1637
- source_code = f.read()
1688
+ from ..encoding_utils import read_file_safe
1689
+
1690
+ source_code, _ = read_file_safe(file_path)
1638
1691
 
1639
1692
  parser = tree_sitter.Parser()
1640
1693
  parser.language = language
1641
- tree = parser.parse(bytes(source_code, "utf8"))
1694
+ tree = parser.parse(source_code.encode("utf-8"))
1642
1695
 
1643
1696
  extractor = self.create_extractor()
1644
1697
  extractor.current_file = file_path # Set current file for context
1645
1698
 
1646
1699
  elements: list[CodeElement] = []
1647
1700
 
1648
- # Extract all element types
1649
- headers = extractor.extract_headers(tree, source_code)
1650
- code_blocks = extractor.extract_code_blocks(tree, source_code)
1651
- links = extractor.extract_links(tree, source_code)
1652
- images = extractor.extract_images(tree, source_code)
1653
- references = extractor.extract_references(tree, source_code)
1654
- lists = extractor.extract_lists(tree, source_code)
1655
- tables = extractor.extract_tables(tree, source_code)
1656
-
1657
- # Extract new element types
1658
- blockquotes = extractor.extract_blockquotes(tree, source_code)
1659
- horizontal_rules = extractor.extract_horizontal_rules(tree, source_code)
1660
- html_elements = extractor.extract_html_elements(tree, source_code)
1661
- text_formatting = extractor.extract_text_formatting(tree, source_code)
1662
- footnotes = extractor.extract_footnotes(tree, source_code)
1701
+ # Extract all element types using the markdown-specific extractor
1702
+ if isinstance(extractor, MarkdownElementExtractor):
1703
+ headers = extractor.extract_headers(tree, source_code)
1704
+ code_blocks = extractor.extract_code_blocks(tree, source_code)
1705
+ links = extractor.extract_links(tree, source_code)
1706
+ images = extractor.extract_images(tree, source_code)
1707
+ references = extractor.extract_references(tree, source_code)
1708
+ lists = extractor.extract_lists(tree, source_code)
1709
+ tables = extractor.extract_tables(tree, source_code)
1710
+
1711
+ # Extract new element types
1712
+ blockquotes = extractor.extract_blockquotes(tree, source_code)
1713
+ horizontal_rules = extractor.extract_horizontal_rules(tree, source_code)
1714
+ html_elements = extractor.extract_html_elements(tree, source_code)
1715
+ text_formatting = extractor.extract_text_formatting(tree, source_code)
1716
+ footnotes = extractor.extract_footnotes(tree, source_code)
1717
+ else:
1718
+ # Fallback for base ElementExtractor
1719
+ headers = []
1720
+ code_blocks = []
1721
+ links = []
1722
+ images = []
1723
+ references = []
1724
+ lists = []
1725
+ tables = []
1726
+ blockquotes = []
1727
+ horizontal_rules = []
1728
+ html_elements = []
1729
+ text_formatting = []
1730
+ footnotes = []
1663
1731
 
1664
1732
  elements.extend(headers)
1665
1733
  elements.extend(code_blocks)
@@ -1732,129 +1800,43 @@ class MarkdownPlugin(LanguagePlugin):
1732
1800
  elements = []
1733
1801
 
1734
1802
  try:
1735
- elements.extend(extractor.extract_headers(tree, source_code))
1736
- elements.extend(extractor.extract_code_blocks(tree, source_code))
1737
- elements.extend(extractor.extract_links(tree, source_code))
1738
- elements.extend(extractor.extract_images(tree, source_code))
1739
- elements.extend(extractor.extract_references(tree, source_code))
1740
- elements.extend(extractor.extract_lists(tree, source_code))
1741
- elements.extend(extractor.extract_tables(tree, source_code))
1742
- elements.extend(extractor.extract_blockquotes(tree, source_code))
1743
- elements.extend(extractor.extract_horizontal_rules(tree, source_code))
1744
- elements.extend(extractor.extract_html_elements(tree, source_code))
1745
- elements.extend(extractor.extract_text_formatting(tree, source_code))
1746
- elements.extend(extractor.extract_footnotes(tree, source_code))
1803
+ if isinstance(extractor, MarkdownElementExtractor):
1804
+ elements.extend(extractor.extract_headers(tree, source_code))
1805
+ elements.extend(extractor.extract_code_blocks(tree, source_code))
1806
+ elements.extend(extractor.extract_links(tree, source_code))
1807
+ elements.extend(extractor.extract_images(tree, source_code))
1808
+ elements.extend(extractor.extract_references(tree, source_code))
1809
+ elements.extend(extractor.extract_lists(tree, source_code))
1810
+ elements.extend(extractor.extract_tables(tree, source_code))
1811
+ elements.extend(extractor.extract_blockquotes(tree, source_code))
1812
+ elements.extend(extractor.extract_horizontal_rules(tree, source_code))
1813
+ elements.extend(extractor.extract_html_elements(tree, source_code))
1814
+ elements.extend(extractor.extract_text_formatting(tree, source_code))
1815
+ elements.extend(extractor.extract_footnotes(tree, source_code))
1747
1816
  except Exception as e:
1748
1817
  log_error(f"Failed to extract elements: {e}")
1749
1818
 
1750
1819
  return elements
1751
1820
 
1752
1821
  def execute_query_strategy(
1753
- self, tree: "tree_sitter.Tree", source_code: str, query_key: str
1754
- ) -> list[CodeElement]:
1755
- """Execute Markdown-specific query strategy based on query_key"""
1756
- if not tree or not source_code:
1757
- return []
1758
-
1759
- # Initialize extractor with source code
1760
- self._extractor.source_code = source_code
1761
- self._extractor.content_lines = source_code.split("\n")
1762
- self._extractor._reset_caches()
1763
-
1764
- # Map query_key to appropriate extraction method
1765
- query_mapping = {
1766
- # Header-related queries (mapped to functions)
1767
- "function": lambda: self._extractor.extract_headers(tree, source_code),
1768
- "headers": lambda: self._extractor.extract_headers(tree, source_code),
1769
- "heading": lambda: self._extractor.extract_headers(tree, source_code),
1770
- # Code block-related queries (mapped to classes)
1771
- "class": lambda: self._extractor.extract_code_blocks(tree, source_code),
1772
- "code_blocks": lambda: self._extractor.extract_code_blocks(
1773
- tree, source_code
1774
- ),
1775
- "code_block": lambda: self._extractor.extract_code_blocks(
1776
- tree, source_code
1777
- ),
1778
- # Link and image queries (mapped to variables)
1779
- "variable": lambda: self._extractor.extract_links(tree, source_code)
1780
- + self._extractor.extract_images(tree, source_code),
1781
- "links": lambda: self._extractor.extract_links(tree, source_code),
1782
- "link": lambda: self._extractor.extract_links(tree, source_code),
1783
- "images": lambda: self._extractor.extract_images(tree, source_code),
1784
- "image": lambda: self._extractor.extract_images(tree, source_code),
1785
- # Reference queries (mapped to imports)
1786
- "import": lambda: self._extractor.extract_references(tree, source_code),
1787
- "references": lambda: self._extractor.extract_references(tree, source_code),
1788
- "reference": lambda: self._extractor.extract_references(tree, source_code),
1789
- # List and table queries
1790
- "lists": lambda: self._extractor.extract_lists(tree, source_code),
1791
- "list": lambda: self._extractor.extract_lists(tree, source_code),
1792
- "task_lists": lambda: [
1793
- lst
1794
- for lst in self._extractor.extract_lists(tree, source_code)
1795
- if getattr(lst, "element_type", "") == "task_list"
1796
- ],
1797
- "tables": lambda: self._extractor.extract_tables(tree, source_code),
1798
- "table": lambda: self._extractor.extract_tables(tree, source_code),
1799
- # Content structure queries
1800
- "blockquotes": lambda: self._extractor.extract_blockquotes(
1801
- tree, source_code
1802
- ),
1803
- "blockquote": lambda: self._extractor.extract_blockquotes(
1804
- tree, source_code
1805
- ),
1806
- "horizontal_rules": lambda: self._extractor.extract_horizontal_rules(
1807
- tree, source_code
1808
- ),
1809
- "horizontal_rule": lambda: self._extractor.extract_horizontal_rules(
1810
- tree, source_code
1811
- ),
1812
- # HTML and formatting queries
1813
- "html_blocks": lambda: self._extractor.extract_html_elements(
1814
- tree, source_code
1815
- ),
1816
- "html_block": lambda: self._extractor.extract_html_elements(
1817
- tree, source_code
1818
- ),
1819
- "html": lambda: self._extractor.extract_html_elements(tree, source_code),
1820
- "emphasis": lambda: self._extractor.extract_text_formatting(
1821
- tree, source_code
1822
- ),
1823
- "formatting": lambda: self._extractor.extract_text_formatting(
1824
- tree, source_code
1825
- ),
1826
- "text_formatting": lambda: self._extractor.extract_text_formatting(
1827
- tree, source_code
1828
- ),
1829
- "inline_code": lambda: [
1830
- f
1831
- for f in self._extractor.extract_text_formatting(tree, source_code)
1832
- if getattr(f, "element_type", "") == "inline_code"
1833
- ],
1834
- "strikethrough": lambda: [
1835
- f
1836
- for f in self._extractor.extract_text_formatting(tree, source_code)
1837
- if getattr(f, "element_type", "") == "strikethrough"
1838
- ],
1839
- # Footnote queries
1840
- "footnotes": lambda: self._extractor.extract_footnotes(tree, source_code),
1841
- "footnote": lambda: self._extractor.extract_footnotes(tree, source_code),
1842
- # Comprehensive queries
1843
- "all_elements": lambda: self.extract_elements(tree, source_code),
1844
- "text_content": lambda: self._extractor.extract_headers(tree, source_code)
1845
- + self._extractor.extract_text_formatting(tree, source_code),
1846
- }
1847
-
1848
- # Execute the appropriate extraction method
1849
- if query_key in query_mapping:
1850
- try:
1851
- return query_mapping[query_key]()
1852
- except Exception as e:
1853
- log_error(f"Error executing Markdown query '{query_key}': {e}")
1854
- return []
1855
- else:
1856
- log_warning(f"Unsupported Markdown query key: {query_key}")
1857
- return []
1822
+ self, query_key: str | None, language: str
1823
+ ) -> str | None:
1824
+ """Execute query strategy for Markdown language"""
1825
+ if not query_key:
1826
+ return None
1827
+
1828
+ # Use markdown-specific element categories instead of base queries
1829
+ element_categories = self.get_element_categories()
1830
+ if query_key in element_categories:
1831
+ # Return a simple query string for the category
1832
+ node_types = element_categories[query_key]
1833
+ if node_types:
1834
+ # Create a basic query for the first node type
1835
+ return f"({node_types[0]}) @{query_key}"
1836
+
1837
+ # Fallback to base implementation
1838
+ queries = self.get_queries()
1839
+ return queries.get(query_key) if queries else None
1858
1840
 
1859
1841
  def get_element_categories(self) -> dict[str, list[str]]:
1860
1842
  """Get Markdown element categories mapping query_key to node_types"""
@@ -1911,9 +1893,7 @@ class MarkdownPlugin(LanguagePlugin):
1911
1893
  "html_block": ["html_block", "inline"],
1912
1894
  "html": ["html_block", "inline"],
1913
1895
  # Text formatting categories
1914
- "emphasis": [
1915
- "inline" # Contains emphasis elements
1916
- ],
1896
+ "emphasis": ["inline"], # Contains emphasis elements
1917
1897
  "formatting": ["inline"],
1918
1898
  "text_formatting": ["inline"],
1919
1899
  "inline_code": ["inline"],