webtap-tool 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webtap-tool might be problematic. Click here for more details.
- webtap/api.py +318 -9
- webtap/app.py +15 -9
- webtap/cdp/session.py +101 -1
- webtap/commands/DEVELOPER_GUIDE.md +108 -22
- webtap/commands/TIPS.md +24 -1
- webtap/commands/_builders.py +139 -1
- webtap/commands/body.py +1 -2
- webtap/commands/connection.py +1 -2
- webtap/commands/console.py +1 -2
- webtap/commands/events.py +1 -2
- webtap/commands/fetch.py +1 -2
- webtap/commands/filters.py +95 -62
- webtap/commands/inspect.py +1 -2
- webtap/commands/javascript.py +41 -26
- webtap/commands/navigation.py +1 -2
- webtap/commands/network.py +11 -7
- webtap/commands/selections.py +129 -0
- webtap/commands/server.py +19 -0
- webtap/filters.py +116 -56
- webtap/services/dom.py +512 -0
- webtap/services/main.py +14 -0
- {webtap_tool-0.3.0.dist-info → webtap_tool-0.5.0.dist-info}/METADATA +2 -2
- {webtap_tool-0.3.0.dist-info → webtap_tool-0.5.0.dist-info}/RECORD +25 -24
- webtap/commands/_errors.py +0 -108
- {webtap_tool-0.3.0.dist-info → webtap_tool-0.5.0.dist-info}/WHEEL +0 -0
- {webtap_tool-0.3.0.dist-info → webtap_tool-0.5.0.dist-info}/entry_points.txt +0 -0
webtap/commands/javascript.py
CHANGED
|
@@ -1,9 +1,7 @@
|
|
|
1
1
|
"""JavaScript code execution in browser context."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
3
|
from webtap.app import app
|
|
5
|
-
from webtap.commands.
|
|
6
|
-
from webtap.commands._builders import info_response, error_response
|
|
4
|
+
from webtap.commands._builders import check_connection, info_response, error_response, code_result_response
|
|
7
5
|
from webtap.commands._tips import get_mcp_description
|
|
8
6
|
|
|
9
7
|
|
|
@@ -17,11 +15,12 @@ mcp_desc = get_mcp_description("js")
|
|
|
17
15
|
},
|
|
18
16
|
fastmcp={"type": "tool", "description": mcp_desc} if mcp_desc else {"type": "tool"},
|
|
19
17
|
)
|
|
20
|
-
def js(state, code: str, wait_return: bool = True, await_promise: bool = False) -> dict:
|
|
21
|
-
"""Execute JavaScript in the browser.
|
|
18
|
+
def js(state, code: str, selection: int = None, wait_return: bool = True, await_promise: bool = False) -> dict: # pyright: ignore[reportArgumentType]
|
|
19
|
+
"""Execute JavaScript in the browser with optional element selection.
|
|
22
20
|
|
|
23
21
|
Args:
|
|
24
|
-
code: JavaScript code to execute
|
|
22
|
+
code: JavaScript code to execute (use 'element' variable if selection provided)
|
|
23
|
+
selection: Browser element selection number (e.g., 1 for #1) - makes element available
|
|
25
24
|
wait_return: Wait for and return result (default: True)
|
|
26
25
|
await_promise: Await promises before returning (default: False)
|
|
27
26
|
|
|
@@ -31,21 +30,53 @@ def js(state, code: str, wait_return: bool = True, await_promise: bool = False)
|
|
|
31
30
|
js("console.log('test')", wait_return=False) # Fire and forget
|
|
32
31
|
js("[...document.links].map(a => a.href)") # Get all links
|
|
33
32
|
|
|
33
|
+
# With browser element selection
|
|
34
|
+
js("element.offsetWidth", selection=1) # Use element #1 from browser()
|
|
35
|
+
js("element.classList", selection=2) # Use element #2
|
|
36
|
+
js("element.getBoundingClientRect()", selection=1)
|
|
37
|
+
|
|
34
38
|
# Async operations
|
|
35
39
|
js("fetch('/api').then(r => r.json())", await_promise=True)
|
|
36
40
|
|
|
37
41
|
# DOM manipulation (no return needed)
|
|
38
42
|
js("document.querySelectorAll('.ad').forEach(e => e.remove())", wait_return=False)
|
|
39
43
|
|
|
40
|
-
# Install interceptors
|
|
41
|
-
js("window.fetch = new Proxy(window.fetch, {get: (t, p) => console.log(p)})", wait_return=False)
|
|
42
|
-
|
|
43
44
|
Returns:
|
|
44
45
|
The evaluated result if wait_return=True, otherwise execution status
|
|
45
46
|
"""
|
|
46
47
|
if error := check_connection(state):
|
|
47
48
|
return error
|
|
48
49
|
|
|
50
|
+
# Handle browser element selection
|
|
51
|
+
if selection is not None:
|
|
52
|
+
# Check if browser data exists
|
|
53
|
+
if not hasattr(state, "browser_data") or not state.browser_data:
|
|
54
|
+
return error_response(
|
|
55
|
+
"No browser selections available",
|
|
56
|
+
suggestions=[
|
|
57
|
+
"Use browser() to select elements first",
|
|
58
|
+
"Or omit the selection parameter to run code directly",
|
|
59
|
+
],
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
# Get the jsPath for the selected element
|
|
63
|
+
selections = state.browser_data.get("selections", {})
|
|
64
|
+
sel_key = str(selection)
|
|
65
|
+
|
|
66
|
+
if sel_key not in selections:
|
|
67
|
+
available = ", ".join(selections.keys()) if selections else "none"
|
|
68
|
+
return error_response(
|
|
69
|
+
f"Selection #{selection} not found",
|
|
70
|
+
suggestions=[f"Available selections: {available}", "Use browser() to see all selections"],
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
js_path = selections[sel_key].get("jsPath")
|
|
74
|
+
if not js_path:
|
|
75
|
+
return error_response(f"Selection #{selection} has no jsPath")
|
|
76
|
+
|
|
77
|
+
# Wrap code with element variable
|
|
78
|
+
code = f"const element = {js_path}; {code}"
|
|
79
|
+
|
|
49
80
|
result = state.cdp.execute(
|
|
50
81
|
"Runtime.evaluate", {"expression": code, "returnByValue": wait_return, "awaitPromise": await_promise}
|
|
51
82
|
)
|
|
@@ -60,23 +91,7 @@ def js(state, code: str, wait_return: bool = True, await_promise: bool = False)
|
|
|
60
91
|
# Return based on wait_return flag
|
|
61
92
|
if wait_return:
|
|
62
93
|
value = result.get("result", {}).get("value")
|
|
63
|
-
|
|
64
|
-
# Format the result in markdown
|
|
65
|
-
elements = [
|
|
66
|
-
{"type": "heading", "content": "JavaScript Result", "level": 2},
|
|
67
|
-
{"type": "code_block", "content": code, "language": "javascript"}, # Full code
|
|
68
|
-
]
|
|
69
|
-
|
|
70
|
-
# Add the result
|
|
71
|
-
if value is not None:
|
|
72
|
-
if isinstance(value, (dict, list)):
|
|
73
|
-
elements.append({"type": "code_block", "content": json.dumps(value, indent=2), "language": "json"})
|
|
74
|
-
else:
|
|
75
|
-
elements.append({"type": "text", "content": f"**Result:** `{value}`"})
|
|
76
|
-
else:
|
|
77
|
-
elements.append({"type": "text", "content": "**Result:** _(no return value)_"})
|
|
78
|
-
|
|
79
|
-
return {"elements": elements}
|
|
94
|
+
return code_result_response("JavaScript Result", code, "javascript", result=value)
|
|
80
95
|
else:
|
|
81
96
|
return info_response(
|
|
82
97
|
title="JavaScript Execution",
|
webtap/commands/navigation.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
"""Browser page navigation and history commands."""
|
|
2
2
|
|
|
3
3
|
from webtap.app import app
|
|
4
|
-
from webtap.commands.
|
|
5
|
-
from webtap.commands._builders import info_response, table_response, error_response
|
|
4
|
+
from webtap.commands._builders import check_connection, info_response, table_response, error_response
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
@app.command(display="markdown", fastmcp={"type": "tool"})
|
webtap/commands/network.py
CHANGED
|
@@ -3,8 +3,7 @@
|
|
|
3
3
|
from typing import List
|
|
4
4
|
|
|
5
5
|
from webtap.app import app
|
|
6
|
-
from webtap.commands._builders import table_response
|
|
7
|
-
from webtap.commands._errors import check_connection
|
|
6
|
+
from webtap.commands._builders import check_connection, table_response
|
|
8
7
|
from webtap.commands._tips import get_tips
|
|
9
8
|
|
|
10
9
|
|
|
@@ -69,17 +68,22 @@ def network(state, limit: int = 20, filters: List[str] = None, no_filters: bool
|
|
|
69
68
|
if limit and len(results) == limit:
|
|
70
69
|
warnings.append(f"Showing first {limit} results (use limit parameter to see more)")
|
|
71
70
|
|
|
72
|
-
# Get tips from TIPS.md with context
|
|
73
|
-
|
|
71
|
+
# Get tips from TIPS.md with context, and add filter guidance
|
|
72
|
+
combined_tips = [
|
|
73
|
+
"Reduce noise with `filters()` - filter by type (XHR, Fetch) or domain (*/api/*)",
|
|
74
|
+
]
|
|
75
|
+
|
|
74
76
|
if rows:
|
|
75
77
|
example_id = rows[0]["ID"]
|
|
76
|
-
|
|
78
|
+
context_tips = get_tips("network", context={"id": example_id})
|
|
79
|
+
if context_tips:
|
|
80
|
+
combined_tips.extend(context_tips)
|
|
77
81
|
|
|
78
82
|
return table_response(
|
|
79
83
|
title="Network Requests",
|
|
80
84
|
headers=["ID", "ReqID", "Method", "Status", "URL", "Type", "Size"],
|
|
81
85
|
rows=rows,
|
|
82
|
-
summary=f"{len(rows)} requests",
|
|
86
|
+
summary=f"{len(rows)} requests" if rows else None,
|
|
83
87
|
warnings=warnings,
|
|
84
|
-
tips=
|
|
88
|
+
tips=combined_tips,
|
|
85
89
|
)
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Browser element selection and prompt analysis commands.
|
|
2
|
+
|
|
3
|
+
PUBLIC API:
|
|
4
|
+
- browser: Analyze browser element selections with prompt
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from webtap.app import app
|
|
8
|
+
from webtap.commands._utils import evaluate_expression, format_expression_result
|
|
9
|
+
from webtap.commands._builders import error_response
|
|
10
|
+
from webtap.commands._tips import get_tips
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@app.command(
|
|
14
|
+
display="markdown",
|
|
15
|
+
fastmcp=[{"type": "resource", "mime_type": "application/json"}, {"type": "tool"}],
|
|
16
|
+
)
|
|
17
|
+
def selections(state, expr: str = None) -> dict: # pyright: ignore[reportArgumentType]
|
|
18
|
+
"""Browser element selections with prompt and analysis.
|
|
19
|
+
|
|
20
|
+
As Resource (no parameters):
|
|
21
|
+
browser # Returns current prompt and all selections
|
|
22
|
+
|
|
23
|
+
As Tool (with parameters):
|
|
24
|
+
browser(expr="data['prompt']") # Get prompt text
|
|
25
|
+
browser(expr="data['selections']['1']['styles']") # Get styles for #1
|
|
26
|
+
browser(expr="len(data['selections'])") # Count selections
|
|
27
|
+
browser(expr="{k: v['selector'] for k, v in data['selections'].items()}") # All selectors
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
expr: Python expression with 'data' variable containing prompt and selections
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
Formatted browser data or expression result
|
|
34
|
+
"""
|
|
35
|
+
# Check if browser data exists
|
|
36
|
+
if not hasattr(state, "browser_data") or not state.browser_data:
|
|
37
|
+
return error_response(
|
|
38
|
+
"No browser selections available",
|
|
39
|
+
suggestions=[
|
|
40
|
+
"Use the Chrome extension to select elements",
|
|
41
|
+
"Click 'Start Selection Mode' in the extension popup",
|
|
42
|
+
"Select elements on the page and submit a prompt",
|
|
43
|
+
],
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
data = state.browser_data
|
|
47
|
+
|
|
48
|
+
# No expression - RESOURCE MODE: Return formatted view
|
|
49
|
+
if not expr:
|
|
50
|
+
return _format_browser_data(data)
|
|
51
|
+
|
|
52
|
+
# TOOL MODE: Evaluate expression
|
|
53
|
+
try:
|
|
54
|
+
namespace = {"data": data}
|
|
55
|
+
result, output = evaluate_expression(expr, namespace)
|
|
56
|
+
formatted_result = format_expression_result(result, output)
|
|
57
|
+
|
|
58
|
+
# Build markdown response
|
|
59
|
+
return {
|
|
60
|
+
"elements": [
|
|
61
|
+
{"type": "heading", "content": "Expression Result", "level": 2},
|
|
62
|
+
{"type": "code_block", "content": expr, "language": "python"},
|
|
63
|
+
{"type": "text", "content": "**Result:**"},
|
|
64
|
+
{"type": "code_block", "content": formatted_result, "language": ""},
|
|
65
|
+
]
|
|
66
|
+
}
|
|
67
|
+
except Exception as e:
|
|
68
|
+
# Provide helpful suggestions
|
|
69
|
+
suggestions = [
|
|
70
|
+
"The data is available as 'data' variable",
|
|
71
|
+
"Access prompt: data['prompt']",
|
|
72
|
+
"Access selections: data['selections']",
|
|
73
|
+
"Access specific element: data['selections']['1']",
|
|
74
|
+
"Available fields: outerHTML, selector, jsPath, styles, xpath, fullXpath, preview",
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
if "KeyError" in str(type(e).__name__):
|
|
78
|
+
suggestions.extend(
|
|
79
|
+
[
|
|
80
|
+
"Check available selection IDs: list(data['selections'].keys())",
|
|
81
|
+
"Check available fields: data['selections']['1'].keys()",
|
|
82
|
+
]
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return error_response(f"{type(e).__name__}: {e}", suggestions=suggestions)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _format_browser_data(data: dict) -> dict:
|
|
89
|
+
"""Format browser data as markdown for resource view."""
|
|
90
|
+
elements = []
|
|
91
|
+
|
|
92
|
+
# Show prompt
|
|
93
|
+
elements.append({"type": "heading", "content": "Browser Prompt", "level": 2})
|
|
94
|
+
elements.append({"type": "text", "content": data.get("prompt", "")})
|
|
95
|
+
|
|
96
|
+
# Show selection count
|
|
97
|
+
selection_count = len(data.get("selections", {}))
|
|
98
|
+
elements.append({"type": "text", "content": f"\n**Selected Elements:** {selection_count}"})
|
|
99
|
+
|
|
100
|
+
# Show each selection with preview
|
|
101
|
+
if selection_count > 0:
|
|
102
|
+
elements.append({"type": "heading", "content": "Element Selections", "level": 3})
|
|
103
|
+
|
|
104
|
+
for sel_id in sorted(data["selections"].keys(), key=lambda x: int(x)):
|
|
105
|
+
sel = data["selections"][sel_id]
|
|
106
|
+
preview = sel.get("preview", {})
|
|
107
|
+
|
|
108
|
+
# Build preview line
|
|
109
|
+
preview_parts = [f"**#{sel_id}:**", preview.get("tag", "unknown")]
|
|
110
|
+
if preview.get("id"):
|
|
111
|
+
preview_parts.append(f"#{preview['id']}")
|
|
112
|
+
if preview.get("classes"):
|
|
113
|
+
preview_parts.append(f".{preview['classes'][0]}")
|
|
114
|
+
|
|
115
|
+
elements.append({"type": "text", "content": " ".join(preview_parts)})
|
|
116
|
+
|
|
117
|
+
# Show selector
|
|
118
|
+
elements.append({"type": "code_block", "content": sel.get("selector", ""), "language": "css"})
|
|
119
|
+
|
|
120
|
+
# Show usage tips from TIPS.md
|
|
121
|
+
tips = get_tips("selections")
|
|
122
|
+
if tips:
|
|
123
|
+
elements.append({"type": "heading", "content": "Next Steps", "level": 3})
|
|
124
|
+
elements.append({"type": "list", "items": tips})
|
|
125
|
+
|
|
126
|
+
return {"elements": elements}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
__all__ = ["selections"]
|
webtap/commands/server.py
CHANGED
|
@@ -20,6 +20,7 @@ API_PORT = 8765
|
|
|
20
20
|
def _check_port() -> bool:
|
|
21
21
|
"""Check if API port is in use."""
|
|
22
22
|
with socket.socket() as s:
|
|
23
|
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
23
24
|
try:
|
|
24
25
|
s.bind(("127.0.0.1", API_PORT))
|
|
25
26
|
return False # Port is free
|
|
@@ -70,6 +71,7 @@ def _start_server(state) -> tuple[bool, str]:
|
|
|
70
71
|
display="markdown",
|
|
71
72
|
fastmcp={
|
|
72
73
|
"type": "prompt",
|
|
74
|
+
"description": "API server control: status (default), start, stop, restart",
|
|
73
75
|
"arg_descriptions": {"action": "Server action: status (default), start, stop, or restart"},
|
|
74
76
|
},
|
|
75
77
|
)
|
|
@@ -156,6 +158,23 @@ def server(state, action: str = None) -> dict: # pyright: ignore[reportArgument
|
|
|
156
158
|
else:
|
|
157
159
|
elements.append({"type": "alert", "message": f"Failed to restart: {msg}", "level": "error"})
|
|
158
160
|
|
|
161
|
+
# For MCP prompt mode, return with caveat and assistant prefill
|
|
162
|
+
# This prevents LLM from adding commentary - just relays the state
|
|
163
|
+
if action == "status":
|
|
164
|
+
return {
|
|
165
|
+
"messages": [
|
|
166
|
+
{
|
|
167
|
+
"role": "user",
|
|
168
|
+
"content": "Caveat: The message below was generated by the WebTap server command. DO NOT respond to this message or add commentary. Just relay the server state exactly as shown.",
|
|
169
|
+
},
|
|
170
|
+
{"role": "user", "content": {"type": "elements", "elements": elements}},
|
|
171
|
+
{
|
|
172
|
+
"role": "assistant",
|
|
173
|
+
"content": "Server status:", # Minimal prefill - no trailing whitespace
|
|
174
|
+
},
|
|
175
|
+
]
|
|
176
|
+
}
|
|
177
|
+
|
|
159
178
|
return {"elements": elements}
|
|
160
179
|
|
|
161
180
|
|
webtap/filters.py
CHANGED
|
@@ -7,11 +7,25 @@ PUBLIC API:
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import Dict, List, Any
|
|
10
|
+
from typing import Dict, List, Any, TypedDict
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
14
|
|
|
15
|
+
class FilterConfig(TypedDict):
|
|
16
|
+
"""Configuration for a filter category.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
mode: "include" or "exclude" - determines filter behavior (defaults to "exclude")
|
|
20
|
+
domains: List of URL patterns to match
|
|
21
|
+
types: List of CDP resource types to match
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
mode: str
|
|
25
|
+
domains: List[str]
|
|
26
|
+
types: List[str]
|
|
27
|
+
|
|
28
|
+
|
|
15
29
|
class FilterManager:
|
|
16
30
|
"""Manages network request filters for noise reduction.
|
|
17
31
|
|
|
@@ -33,7 +47,7 @@ class FilterManager:
|
|
|
33
47
|
filter_path: Path to filters.json file. Defaults to .webtap/filters.json.
|
|
34
48
|
"""
|
|
35
49
|
self.filter_path = filter_path or (Path.cwd() / ".webtap" / "filters.json")
|
|
36
|
-
self.filters: Dict[str,
|
|
50
|
+
self.filters: Dict[str, FilterConfig] = {}
|
|
37
51
|
self.enabled_categories: set[str] = set()
|
|
38
52
|
|
|
39
53
|
def load(self) -> bool:
|
|
@@ -81,7 +95,7 @@ class FilterManager:
|
|
|
81
95
|
logger.error(f"Failed to save filters: {e}")
|
|
82
96
|
return False
|
|
83
97
|
|
|
84
|
-
def add_pattern(self, pattern: str, category: str, pattern_type: str = "domain") -> bool:
|
|
98
|
+
def add_pattern(self, pattern: str, category: str, pattern_type: str = "domain", mode: str | None = None) -> bool:
|
|
85
99
|
"""Add a filter pattern to a category.
|
|
86
100
|
|
|
87
101
|
Creates the category if it doesn't exist and enables it. Supports wildcard
|
|
@@ -91,13 +105,17 @@ class FilterManager:
|
|
|
91
105
|
pattern: Pattern to add (e.g., "*ads*", "googletagmanager.com").
|
|
92
106
|
category: Category name (e.g., "ads", "tracking").
|
|
93
107
|
pattern_type: "domain" or "type". Defaults to "domain".
|
|
108
|
+
mode: "include" or "exclude". Required for new categories.
|
|
94
109
|
|
|
95
110
|
Returns:
|
|
96
111
|
True if pattern was added, False if it already existed.
|
|
97
112
|
"""
|
|
98
113
|
if category not in self.filters:
|
|
99
|
-
|
|
114
|
+
if mode is None:
|
|
115
|
+
raise ValueError(f"Mode required when creating new category '{category}'")
|
|
116
|
+
self.filters[category] = {"mode": mode, "domains": [], "types": []}
|
|
100
117
|
self.enabled_categories.add(category)
|
|
118
|
+
# Existing category keeps its mode
|
|
101
119
|
|
|
102
120
|
key = "domains" if pattern_type == "domain" else "types"
|
|
103
121
|
if pattern not in self.filters[category][key]:
|
|
@@ -125,7 +143,9 @@ class FilterManager:
|
|
|
125
143
|
return category
|
|
126
144
|
return ""
|
|
127
145
|
|
|
128
|
-
def update_category(
|
|
146
|
+
def update_category(
|
|
147
|
+
self, category: str, domains: List[str] | None = None, types: List[str] | None = None, mode: str | None = None
|
|
148
|
+
):
|
|
129
149
|
"""Update or create a category with new patterns.
|
|
130
150
|
|
|
131
151
|
Creates the category if it doesn't exist and enables it. If patterns are
|
|
@@ -135,10 +155,15 @@ class FilterManager:
|
|
|
135
155
|
category: Category name.
|
|
136
156
|
domains: List of domain patterns. None leaves existing unchanged.
|
|
137
157
|
types: List of type patterns. None leaves existing unchanged.
|
|
158
|
+
mode: "include" or "exclude". None leaves existing unchanged.
|
|
138
159
|
"""
|
|
139
160
|
if category not in self.filters:
|
|
140
|
-
|
|
161
|
+
if mode is None:
|
|
162
|
+
raise ValueError(f"Mode required when creating new category '{category}'")
|
|
163
|
+
self.filters[category] = {"mode": mode, "domains": [], "types": []}
|
|
141
164
|
|
|
165
|
+
if mode is not None:
|
|
166
|
+
self.filters[category]["mode"] = mode
|
|
142
167
|
if domains is not None:
|
|
143
168
|
self.filters[category]["domains"] = domains
|
|
144
169
|
if types is not None:
|
|
@@ -181,8 +206,8 @@ class FilterManager:
|
|
|
181
206
|
def get_filter_sql(self, use_all: bool = True, categories: List[str] | None = None) -> str:
|
|
182
207
|
"""Generate SQL WHERE clause for filtering CDP events.
|
|
183
208
|
|
|
184
|
-
Creates SQL conditions
|
|
185
|
-
|
|
209
|
+
Creates SQL conditions based on filter mode (include/exclude) for network requests.
|
|
210
|
+
Handles wildcard patterns by converting them to SQL LIKE patterns
|
|
186
211
|
and properly escapes SQL strings.
|
|
187
212
|
|
|
188
213
|
Args:
|
|
@@ -206,41 +231,77 @@ class FilterManager:
|
|
|
206
231
|
if not active_categories:
|
|
207
232
|
return ""
|
|
208
233
|
|
|
209
|
-
|
|
210
|
-
all_domains = []
|
|
211
|
-
all_types = []
|
|
212
|
-
|
|
213
|
-
for category in active_categories:
|
|
214
|
-
all_domains.extend(self.filters[category].get("domains", []))
|
|
215
|
-
all_types.extend(self.filters[category].get("types", []))
|
|
216
|
-
|
|
217
|
-
# Build filter conditions - exclude matching items
|
|
234
|
+
include_conditions = []
|
|
218
235
|
exclude_conditions = []
|
|
219
236
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
#
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
237
|
+
for category in active_categories:
|
|
238
|
+
config = self.filters[category]
|
|
239
|
+
mode = config.get("mode")
|
|
240
|
+
if mode is None:
|
|
241
|
+
logger.error(f"Filter category '{category}' missing required 'mode' field. Skipping.")
|
|
242
|
+
continue # Skip this category entirely
|
|
243
|
+
domains = config.get("domains", [])
|
|
244
|
+
types = config.get("types", [])
|
|
245
|
+
|
|
246
|
+
category_conditions = []
|
|
247
|
+
|
|
248
|
+
# Domain filtering
|
|
249
|
+
if domains:
|
|
250
|
+
domain_conditions = []
|
|
251
|
+
for pattern in domains:
|
|
252
|
+
sql_pattern = pattern.replace("'", "''").replace("*", "%")
|
|
253
|
+
if mode == "include":
|
|
254
|
+
domain_conditions.append(
|
|
255
|
+
f"json_extract_string(event, '$.params.response.url') LIKE '{sql_pattern}'"
|
|
256
|
+
)
|
|
257
|
+
else: # exclude
|
|
258
|
+
domain_conditions.append(
|
|
259
|
+
f"json_extract_string(event, '$.params.response.url') NOT LIKE '{sql_pattern}'"
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# For include: OR (match any pattern), for exclude: AND (match none)
|
|
263
|
+
if mode == "include":
|
|
264
|
+
if domain_conditions:
|
|
265
|
+
category_conditions.append(f"({' OR '.join(domain_conditions)})")
|
|
266
|
+
else:
|
|
267
|
+
if domain_conditions:
|
|
268
|
+
category_conditions.append(f"({' AND '.join(domain_conditions)})")
|
|
269
|
+
|
|
270
|
+
# Type filtering
|
|
271
|
+
if types:
|
|
272
|
+
escaped_types = [t.replace("'", "''") for t in types]
|
|
273
|
+
type_list = ", ".join(f"'{t}'" for t in escaped_types)
|
|
274
|
+
|
|
275
|
+
if mode == "include":
|
|
276
|
+
category_conditions.append(f"json_extract_string(event, '$.params.type') IN ({type_list})")
|
|
277
|
+
else: # exclude
|
|
278
|
+
category_conditions.append(
|
|
279
|
+
f"(COALESCE(json_extract_string(event, '$.params.type'), '') NOT IN ({type_list}) OR "
|
|
280
|
+
f"json_extract_string(event, '$.params.type') IS NULL)"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Combine domain and type conditions for this category
|
|
284
|
+
if category_conditions:
|
|
285
|
+
category_sql = f"({' AND '.join(category_conditions)})"
|
|
286
|
+
if mode == "include":
|
|
287
|
+
include_conditions.append(category_sql)
|
|
288
|
+
else:
|
|
289
|
+
exclude_conditions.append(category_sql)
|
|
290
|
+
|
|
291
|
+
# Combine all conditions: (include1 OR include2) AND exclude1 AND exclude2
|
|
292
|
+
final_parts = []
|
|
293
|
+
|
|
294
|
+
if include_conditions:
|
|
295
|
+
if len(include_conditions) > 1:
|
|
296
|
+
final_parts.append(f"({' OR '.join(include_conditions)})")
|
|
297
|
+
else:
|
|
298
|
+
final_parts.append(include_conditions[0])
|
|
240
299
|
|
|
241
300
|
if exclude_conditions:
|
|
242
|
-
|
|
243
|
-
|
|
301
|
+
final_parts.extend(exclude_conditions)
|
|
302
|
+
|
|
303
|
+
if final_parts:
|
|
304
|
+
return f"({' AND '.join(final_parts)})"
|
|
244
305
|
|
|
245
306
|
return ""
|
|
246
307
|
|
|
@@ -263,27 +324,26 @@ class FilterManager:
|
|
|
263
324
|
"path": str(self.filter_path),
|
|
264
325
|
}
|
|
265
326
|
|
|
266
|
-
def
|
|
267
|
-
"""Get
|
|
268
|
-
|
|
269
|
-
Creates a human-readable summary of all filter categories with their
|
|
270
|
-
enabled status and pattern counts.
|
|
327
|
+
def get_categories_summary(self) -> List[Dict[str, Any]]:
|
|
328
|
+
"""Get summary data for all filter categories.
|
|
271
329
|
|
|
272
330
|
Returns:
|
|
273
|
-
|
|
331
|
+
List of dicts with category information including name, enabled status,
|
|
332
|
+
mode, and pattern counts.
|
|
274
333
|
"""
|
|
275
|
-
|
|
276
|
-
return f"No filters loaded (would load from {self.filter_path})"
|
|
277
|
-
|
|
278
|
-
lines = [f"Loaded filters from {self.filter_path}:"]
|
|
334
|
+
categories = []
|
|
279
335
|
for category in sorted(self.filters.keys()):
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
336
|
+
config = self.filters[category]
|
|
337
|
+
categories.append(
|
|
338
|
+
{
|
|
339
|
+
"name": category,
|
|
340
|
+
"enabled": category in self.enabled_categories,
|
|
341
|
+
"mode": config.get("mode"), # None if missing
|
|
342
|
+
"domain_count": len(config.get("domains", [])),
|
|
343
|
+
"type_count": len(config.get("types", [])),
|
|
344
|
+
}
|
|
345
|
+
)
|
|
346
|
+
return categories
|
|
287
347
|
|
|
288
348
|
|
|
289
349
|
__all__ = ["FilterManager"]
|