tree-sitter-analyzer 1.7.7__py3-none-any.whl → 1.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tree-sitter-analyzer might be problematic. Click here for more details.

Files changed (38) hide show
  1. tree_sitter_analyzer/__init__.py +1 -1
  2. tree_sitter_analyzer/api.py +23 -30
  3. tree_sitter_analyzer/cli/argument_validator.py +77 -0
  4. tree_sitter_analyzer/cli/commands/table_command.py +7 -2
  5. tree_sitter_analyzer/cli_main.py +17 -3
  6. tree_sitter_analyzer/core/cache_service.py +15 -5
  7. tree_sitter_analyzer/core/query.py +33 -22
  8. tree_sitter_analyzer/core/query_service.py +179 -154
  9. tree_sitter_analyzer/formatters/formatter_registry.py +355 -0
  10. tree_sitter_analyzer/formatters/html_formatter.py +462 -0
  11. tree_sitter_analyzer/formatters/language_formatter_factory.py +3 -0
  12. tree_sitter_analyzer/formatters/markdown_formatter.py +1 -1
  13. tree_sitter_analyzer/language_detector.py +80 -7
  14. tree_sitter_analyzer/languages/css_plugin.py +390 -0
  15. tree_sitter_analyzer/languages/html_plugin.py +395 -0
  16. tree_sitter_analyzer/languages/java_plugin.py +116 -0
  17. tree_sitter_analyzer/languages/javascript_plugin.py +113 -0
  18. tree_sitter_analyzer/languages/markdown_plugin.py +266 -46
  19. tree_sitter_analyzer/languages/python_plugin.py +176 -33
  20. tree_sitter_analyzer/languages/typescript_plugin.py +130 -1
  21. tree_sitter_analyzer/mcp/tools/query_tool.py +99 -58
  22. tree_sitter_analyzer/mcp/tools/table_format_tool.py +24 -10
  23. tree_sitter_analyzer/models.py +53 -0
  24. tree_sitter_analyzer/output_manager.py +1 -1
  25. tree_sitter_analyzer/plugins/base.py +50 -0
  26. tree_sitter_analyzer/plugins/manager.py +5 -1
  27. tree_sitter_analyzer/queries/css.py +634 -0
  28. tree_sitter_analyzer/queries/html.py +556 -0
  29. tree_sitter_analyzer/queries/markdown.py +54 -164
  30. tree_sitter_analyzer/query_loader.py +16 -3
  31. tree_sitter_analyzer/security/validator.py +182 -44
  32. tree_sitter_analyzer/utils/__init__.py +113 -0
  33. tree_sitter_analyzer/utils/tree_sitter_compat.py +282 -0
  34. tree_sitter_analyzer/utils.py +62 -24
  35. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/METADATA +120 -14
  36. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/RECORD +38 -29
  37. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/entry_points.txt +2 -0
  38. {tree_sitter_analyzer-1.7.7.dist-info → tree_sitter_analyzer-1.8.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,556 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ HTML Language Queries
4
+
5
+ Comprehensive Tree-sitter queries for HTML language constructs.
6
+ Covers elements, attributes, text content, and document structure.
7
+ """
8
+
9
+ # HTML-specific query library
10
+ HTML_QUERIES: dict[str, str] = {
11
+ # --- Basic Elements ---
12
+ "element": """
13
+ (element) @element
14
+ """,
15
+ "start_tag": """
16
+ (start_tag
17
+ name: (tag_name) @tag_name) @start_tag
18
+ """,
19
+ "end_tag": """
20
+ (end_tag
21
+ name: (tag_name) @tag_name) @end_tag
22
+ """,
23
+ "self_closing_tag": """
24
+ (self_closing_tag
25
+ name: (tag_name) @tag_name) @self_closing_tag
26
+ """,
27
+ "void_element": """
28
+ (element
29
+ (start_tag
30
+ name: (tag_name) @tag_name
31
+ (#match? @tag_name "^(area|base|br|col|embed|hr|img|input|link|meta|param|source|track|wbr)$"))) @void_element
32
+ """,
33
+
34
+ # --- Attributes ---
35
+ "attribute": """
36
+ (attribute
37
+ name: (attribute_name) @attribute_name
38
+ value: (quoted_attribute_value)? @attribute_value) @attribute
39
+ """,
40
+ "attribute_name": """
41
+ (attribute_name) @attribute_name
42
+ """,
43
+ "attribute_value": """
44
+ (quoted_attribute_value) @attribute_value
45
+ """,
46
+ "class_attribute": """
47
+ (attribute
48
+ name: (attribute_name) @attr_name
49
+ (#match? @attr_name "^class$")
50
+ value: (quoted_attribute_value) @class_value) @class_attribute
51
+ """,
52
+ "id_attribute": """
53
+ (attribute
54
+ name: (attribute_name) @attr_name
55
+ (#match? @attr_name "^id$")
56
+ value: (quoted_attribute_value) @id_value) @id_attribute
57
+ """,
58
+ "src_attribute": """
59
+ (attribute
60
+ name: (attribute_name) @attr_name
61
+ (#match? @attr_name "^src$")
62
+ value: (quoted_attribute_value) @src_value) @src_attribute
63
+ """,
64
+ "href_attribute": """
65
+ (attribute
66
+ name: (attribute_name) @attr_name
67
+ (#match? @attr_name "^href$")
68
+ value: (quoted_attribute_value) @href_value) @href_attribute
69
+ """,
70
+
71
+ # --- Text Content ---
72
+ "text": """
73
+ (text) @text
74
+ """,
75
+ "raw_text": """
76
+ (raw_text) @raw_text
77
+ """,
78
+
79
+ # --- Comments ---
80
+ "comment": """
81
+ (comment) @comment
82
+ """,
83
+
84
+ # --- Document Structure ---
85
+ "doctype": """
86
+ (doctype) @doctype
87
+ """,
88
+ "document": """
89
+ (document) @document
90
+ """,
91
+
92
+ # --- Semantic Elements ---
93
+ "heading": """
94
+ (element
95
+ (start_tag
96
+ name: (tag_name) @tag_name
97
+ (#match? @tag_name "^h[1-6]$"))) @heading
98
+ """,
99
+ "paragraph": """
100
+ (element
101
+ (start_tag
102
+ name: (tag_name) @tag_name
103
+ (#match? @tag_name "^p$"))) @paragraph
104
+ """,
105
+ "link": """
106
+ (element
107
+ (start_tag
108
+ name: (tag_name) @tag_name
109
+ (#match? @tag_name "^a$"))) @link
110
+ """,
111
+ "image": """
112
+ (element
113
+ (start_tag
114
+ name: (tag_name) @tag_name
115
+ (#match? @tag_name "^img$"))) @image
116
+ """,
117
+ "list": """
118
+ (element
119
+ (start_tag
120
+ name: (tag_name) @tag_name
121
+ (#match? @tag_name "^(ul|ol|dl)$"))) @list
122
+ """,
123
+ "list_item": """
124
+ (element
125
+ (start_tag
126
+ name: (tag_name) @tag_name
127
+ (#match? @tag_name "^(li|dt|dd)$"))) @list_item
128
+ """,
129
+ "table": """
130
+ (element
131
+ (start_tag
132
+ name: (tag_name) @tag_name
133
+ (#match? @tag_name "^table$"))) @table
134
+ """,
135
+ "table_row": """
136
+ (element
137
+ (start_tag
138
+ name: (tag_name) @tag_name
139
+ (#match? @tag_name "^tr$"))) @table_row
140
+ """,
141
+ "table_cell": """
142
+ (element
143
+ (start_tag
144
+ name: (tag_name) @tag_name
145
+ (#match? @tag_name "^(td|th)$"))) @table_cell
146
+ """,
147
+
148
+ # --- Structure Elements ---
149
+ "html": """
150
+ (element
151
+ (start_tag
152
+ name: (tag_name) @tag_name
153
+ (#match? @tag_name "^html$"))) @html
154
+ """,
155
+ "head": """
156
+ (element
157
+ (start_tag
158
+ name: (tag_name) @tag_name
159
+ (#match? @tag_name "^head$"))) @head
160
+ """,
161
+ "body": """
162
+ (element
163
+ (start_tag
164
+ name: (tag_name) @tag_name
165
+ (#match? @tag_name "^body$"))) @body
166
+ """,
167
+ "header": """
168
+ (element
169
+ (start_tag
170
+ name: (tag_name) @tag_name
171
+ (#match? @tag_name "^header$"))) @header
172
+ """,
173
+ "footer": """
174
+ (element
175
+ (start_tag
176
+ name: (tag_name) @tag_name
177
+ (#match? @tag_name "^footer$"))) @footer
178
+ """,
179
+ "main": """
180
+ (element
181
+ (start_tag
182
+ name: (tag_name) @tag_name
183
+ (#match? @tag_name "^main$"))) @main
184
+ """,
185
+ "section": """
186
+ (element
187
+ (start_tag
188
+ name: (tag_name) @tag_name
189
+ (#match? @tag_name "^section$"))) @section
190
+ """,
191
+ "article": """
192
+ (element
193
+ (start_tag
194
+ name: (tag_name) @tag_name
195
+ (#match? @tag_name "^article$"))) @article
196
+ """,
197
+ "aside": """
198
+ (element
199
+ (start_tag
200
+ name: (tag_name) @tag_name
201
+ (#match? @tag_name "^aside$"))) @aside
202
+ """,
203
+ "nav": """
204
+ (element
205
+ (start_tag
206
+ name: (tag_name) @tag_name
207
+ (#match? @tag_name "^nav$"))) @nav
208
+ """,
209
+ "div": """
210
+ (element
211
+ (start_tag
212
+ name: (tag_name) @tag_name
213
+ (#match? @tag_name "^div$"))) @div
214
+ """,
215
+ "span": """
216
+ (element
217
+ (start_tag
218
+ name: (tag_name) @tag_name
219
+ (#match? @tag_name "^span$"))) @span
220
+ """,
221
+
222
+ # --- Form Elements ---
223
+ "form": """
224
+ (element
225
+ (start_tag
226
+ name: (tag_name) @tag_name
227
+ (#match? @tag_name "^form$"))) @form
228
+ """,
229
+ "input": """
230
+ (element
231
+ (start_tag
232
+ name: (tag_name) @tag_name
233
+ (#match? @tag_name "^input$"))) @input
234
+ """,
235
+ "button": """
236
+ (element
237
+ (start_tag
238
+ name: (tag_name) @tag_name
239
+ (#match? @tag_name "^button$"))) @button
240
+ """,
241
+ "textarea": """
242
+ (element
243
+ (start_tag
244
+ name: (tag_name) @tag_name
245
+ (#match? @tag_name "^textarea$"))) @textarea
246
+ """,
247
+ "select": """
248
+ (element
249
+ (start_tag
250
+ name: (tag_name) @tag_name
251
+ (#match? @tag_name "^select$"))) @select
252
+ """,
253
+ "option": """
254
+ (element
255
+ (start_tag
256
+ name: (tag_name) @tag_name
257
+ (#match? @tag_name "^option$"))) @option
258
+ """,
259
+ "label": """
260
+ (element
261
+ (start_tag
262
+ name: (tag_name) @tag_name
263
+ (#match? @tag_name "^label$"))) @label
264
+ """,
265
+ "fieldset": """
266
+ (element
267
+ (start_tag
268
+ name: (tag_name) @tag_name
269
+ (#match? @tag_name "^fieldset$"))) @fieldset
270
+ """,
271
+ "legend": """
272
+ (element
273
+ (start_tag
274
+ name: (tag_name) @tag_name
275
+ (#match? @tag_name "^legend$"))) @legend
276
+ """,
277
+
278
+ # --- Media Elements ---
279
+ "video": """
280
+ (element
281
+ (start_tag
282
+ name: (tag_name) @tag_name
283
+ (#match? @tag_name "^video$"))) @video
284
+ """,
285
+ "audio": """
286
+ (element
287
+ (start_tag
288
+ name: (tag_name) @tag_name
289
+ (#match? @tag_name "^audio$"))) @audio
290
+ """,
291
+ "source": """
292
+ (element
293
+ (start_tag
294
+ name: (tag_name) @tag_name
295
+ (#match? @tag_name "^source$"))) @source
296
+ """,
297
+ "track": """
298
+ (element
299
+ (start_tag
300
+ name: (tag_name) @tag_name
301
+ (#match? @tag_name "^track$"))) @track
302
+ """,
303
+ "canvas": """
304
+ (element
305
+ (start_tag
306
+ name: (tag_name) @tag_name
307
+ (#match? @tag_name "^canvas$"))) @canvas
308
+ """,
309
+ "svg": """
310
+ (element
311
+ (start_tag
312
+ name: (tag_name) @tag_name
313
+ (#match? @tag_name "^svg$"))) @svg
314
+ """,
315
+
316
+ # --- Meta Elements ---
317
+ "meta": """
318
+ (element
319
+ (start_tag
320
+ name: (tag_name) @tag_name
321
+ (#match? @tag_name "^meta$"))) @meta
322
+ """,
323
+ "title": """
324
+ (element
325
+ (start_tag
326
+ name: (tag_name) @tag_name
327
+ (#match? @tag_name "^title$"))) @title
328
+ """,
329
+ "link_tag": """
330
+ (element
331
+ (start_tag
332
+ name: (tag_name) @tag_name
333
+ (#match? @tag_name "^link$"))) @link_tag
334
+ """,
335
+ "style": """
336
+ (element
337
+ (start_tag
338
+ name: (tag_name) @tag_name
339
+ (#match? @tag_name "^style$"))) @style
340
+ """,
341
+ "script": """
342
+ (element
343
+ (start_tag
344
+ name: (tag_name) @tag_name
345
+ (#match? @tag_name "^script$"))) @script
346
+ """,
347
+ "noscript": """
348
+ (element
349
+ (start_tag
350
+ name: (tag_name) @tag_name
351
+ (#match? @tag_name "^noscript$"))) @noscript
352
+ """,
353
+ "base": """
354
+ (element
355
+ (start_tag
356
+ name: (tag_name) @tag_name
357
+ (#match? @tag_name "^base$"))) @base
358
+ """,
359
+
360
+ # --- Script and Style Elements ---
361
+ "script_element": """
362
+ (script_element) @script_element
363
+ """,
364
+ "style_element": """
365
+ (style_element) @style_element
366
+ """,
367
+
368
+ # --- Name-only Extraction ---
369
+ "tag_name": """
370
+ (tag_name) @tag_name
371
+ """,
372
+ "element_name": """
373
+ (element
374
+ (start_tag
375
+ name: (tag_name) @element_name))
376
+ """,
377
+ }
378
+
379
+ # Query descriptions
380
+ HTML_QUERY_DESCRIPTIONS: dict[str, str] = {
381
+ "element": "Search all HTML elements",
382
+ "start_tag": "Search start tags",
383
+ "end_tag": "Search end tags",
384
+ "self_closing_tag": "Search self-closing tags",
385
+ "void_element": "Search void elements (br, img, input, etc.)",
386
+ "attribute": "Search all attributes",
387
+ "attribute_name": "Search attribute names",
388
+ "attribute_value": "Search attribute values",
389
+ "class_attribute": "Search class attributes",
390
+ "id_attribute": "Search id attributes",
391
+ "src_attribute": "Search src attributes",
392
+ "href_attribute": "Search href attributes",
393
+ "text": "Search text content",
394
+ "raw_text": "Search raw text content",
395
+ "comment": "Search HTML comments",
396
+ "doctype": "Search DOCTYPE declarations",
397
+ "document": "Search document root",
398
+ "heading": "Search heading elements (h1-h6)",
399
+ "paragraph": "Search paragraph elements",
400
+ "link": "Search anchor elements",
401
+ "image": "Search image elements",
402
+ "list": "Search list elements (ul, ol, dl)",
403
+ "list_item": "Search list item elements (li, dt, dd)",
404
+ "table": "Search table elements",
405
+ "table_row": "Search table row elements",
406
+ "table_cell": "Search table cell elements (td, th)",
407
+ "html": "Search html elements",
408
+ "head": "Search head elements",
409
+ "body": "Search body elements",
410
+ "header": "Search header elements",
411
+ "footer": "Search footer elements",
412
+ "main": "Search main elements",
413
+ "section": "Search section elements",
414
+ "article": "Search article elements",
415
+ "aside": "Search aside elements",
416
+ "nav": "Search nav elements",
417
+ "div": "Search div elements",
418
+ "span": "Search span elements",
419
+ "form": "Search form elements",
420
+ "input": "Search input elements",
421
+ "button": "Search button elements",
422
+ "textarea": "Search textarea elements",
423
+ "select": "Search select elements",
424
+ "option": "Search option elements",
425
+ "label": "Search label elements",
426
+ "fieldset": "Search fieldset elements",
427
+ "legend": "Search legend elements",
428
+ "video": "Search video elements",
429
+ "audio": "Search audio elements",
430
+ "source": "Search source elements",
431
+ "track": "Search track elements",
432
+ "canvas": "Search canvas elements",
433
+ "svg": "Search svg elements",
434
+ "meta": "Search meta elements",
435
+ "title": "Search title elements",
436
+ "link_tag": "Search link elements",
437
+ "style": "Search style elements",
438
+ "script": "Search script elements",
439
+ "noscript": "Search noscript elements",
440
+ "base": "Search base elements",
441
+ "script_element": "Search script elements with content",
442
+ "style_element": "Search style elements with content",
443
+ "tag_name": "Search tag names only",
444
+ "element_name": "Search element names only",
445
+ }
446
+
447
+ # Legacy query definitions for backward compatibility
448
+ ELEMENTS = """
449
+ (element
450
+ (start_tag
451
+ name: (tag_name) @element.name)
452
+ (text)? @element.text
453
+ (end_tag)?) @element.full
454
+ """
455
+
456
+ ATTRIBUTES = """
457
+ (attribute
458
+ name: (attribute_name) @attribute.name
459
+ value: (quoted_attribute_value)? @attribute.value) @attribute.full
460
+ """
461
+
462
+ COMMENTS = """
463
+ (comment) @comment
464
+ """
465
+
466
+ TEXT_CONTENT = """
467
+ (text) @text
468
+ """
469
+
470
+ # Convert to ALL_QUERIES format for dynamic loader compatibility
471
+ ALL_QUERIES = {}
472
+ for query_name, query_string in HTML_QUERIES.items():
473
+ description = HTML_QUERY_DESCRIPTIONS.get(query_name, "No description")
474
+ ALL_QUERIES[query_name] = {"query": query_string, "description": description}
475
+
476
+ # Add legacy queries for backward compatibility
477
+ ALL_QUERIES["elements"] = {
478
+ "query": ELEMENTS,
479
+ "description": "Search all HTML elements with names and text",
480
+ }
481
+ ALL_QUERIES["attributes"] = {
482
+ "query": ATTRIBUTES,
483
+ "description": "Search all HTML attributes",
484
+ }
485
+ ALL_QUERIES["comments"] = {
486
+ "query": COMMENTS,
487
+ "description": "Search all HTML comments",
488
+ }
489
+ ALL_QUERIES["text_content"] = {
490
+ "query": TEXT_CONTENT,
491
+ "description": "Search all text content",
492
+ }
493
+
494
+
495
+ def get_html_query(name: str) -> str:
496
+ """
497
+ Get the specified HTML query
498
+
499
+ Args:
500
+ name: Query name
501
+
502
+ Returns:
503
+ Query string
504
+
505
+ Raises:
506
+ ValueError: When query is not found
507
+ """
508
+ if name not in HTML_QUERIES:
509
+ available = list(HTML_QUERIES.keys())
510
+ raise ValueError(
511
+ f"HTML query '{name}' does not exist. Available: {available}"
512
+ )
513
+
514
+ return HTML_QUERIES[name]
515
+
516
+
517
+ def get_html_query_description(name: str) -> str:
518
+ """
519
+ Get the description of the specified HTML query
520
+
521
+ Args:
522
+ name: Query name
523
+
524
+ Returns:
525
+ Query description
526
+ """
527
+ return HTML_QUERY_DESCRIPTIONS.get(name, "No description")
528
+
529
+
530
+ def get_query(name: str) -> str:
531
+ """Get a specific query by name."""
532
+ if name in ALL_QUERIES:
533
+ return ALL_QUERIES[name]["query"]
534
+ raise ValueError(
535
+ f"Query '{name}' not found. Available queries: {list(ALL_QUERIES.keys())}"
536
+ )
537
+
538
+
539
+ def get_all_queries() -> dict:
540
+ """Get all available queries."""
541
+ return ALL_QUERIES
542
+
543
+
544
+ def list_queries() -> list:
545
+ """List all available query names."""
546
+ return list(ALL_QUERIES.keys())
547
+
548
+
549
+ def get_available_html_queries() -> list[str]:
550
+ """
551
+ Get list of available HTML queries
552
+
553
+ Returns:
554
+ List of query names
555
+ """
556
+ return list(HTML_QUERIES.keys())