webtap-tool 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webtap-tool might be problematic. Click here for more details.
- webtap/cdp/session.py +92 -5
- webtap/commands/TIPS.md +6 -5
- webtap/commands/to_model.py +10 -3
- webtap/services/main.py +2 -2
- {webtap_tool-0.6.0.dist-info → webtap_tool-0.7.1.dist-info}/METADATA +1 -1
- {webtap_tool-0.6.0.dist-info → webtap_tool-0.7.1.dist-info}/RECORD +8 -8
- {webtap_tool-0.6.0.dist-info → webtap_tool-0.7.1.dist-info}/WHEEL +0 -0
- {webtap_tool-0.6.0.dist-info → webtap_tool-0.7.1.dist-info}/entry_points.txt +0 -0
webtap/cdp/session.py
CHANGED
|
@@ -6,6 +6,7 @@ PUBLIC API:
|
|
|
6
6
|
|
|
7
7
|
import json
|
|
8
8
|
import logging
|
|
9
|
+
import queue
|
|
9
10
|
import threading
|
|
10
11
|
from concurrent.futures import Future, TimeoutError
|
|
11
12
|
from typing import Any
|
|
@@ -54,9 +55,18 @@ class CDPSession:
|
|
|
54
55
|
self._lock = threading.Lock()
|
|
55
56
|
|
|
56
57
|
# DuckDB storage - store events AS-IS
|
|
58
|
+
# DuckDB connections are NOT thread-safe - use dedicated DB thread
|
|
57
59
|
self.db = duckdb.connect(":memory:")
|
|
60
|
+
self._db_work_queue: queue.Queue = queue.Queue()
|
|
61
|
+
self._db_result_queues: dict[int, queue.Queue] = {}
|
|
62
|
+
self._db_running = True
|
|
58
63
|
|
|
59
|
-
|
|
64
|
+
# Start dedicated database thread
|
|
65
|
+
self._db_thread = threading.Thread(target=self._db_worker, daemon=True)
|
|
66
|
+
self._db_thread.start()
|
|
67
|
+
|
|
68
|
+
# Initialize schema via queue
|
|
69
|
+
self._db_execute("CREATE TABLE events (event JSON)", wait_result=False)
|
|
60
70
|
|
|
61
71
|
# Live field path lookup for fast discovery
|
|
62
72
|
# Maps lowercase field names to their full paths with original case
|
|
@@ -71,6 +81,78 @@ class CDPSession:
|
|
|
71
81
|
self._last_broadcast_time = 0.0
|
|
72
82
|
self._broadcast_debounce = 1.0 # 1 second debounce
|
|
73
83
|
|
|
84
|
+
def _db_worker(self) -> None:
|
|
85
|
+
"""Dedicated thread for all database operations.
|
|
86
|
+
|
|
87
|
+
Ensures thread safety by serializing all DuckDB access through one thread.
|
|
88
|
+
DuckDB connections are not thread-safe - sharing them causes malloc corruption.
|
|
89
|
+
"""
|
|
90
|
+
while self._db_running:
|
|
91
|
+
try:
|
|
92
|
+
task = self._db_work_queue.get(timeout=1)
|
|
93
|
+
|
|
94
|
+
if task is None: # Shutdown signal
|
|
95
|
+
break
|
|
96
|
+
|
|
97
|
+
operation_type, sql, params, result_queue_id = task
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
if operation_type == "execute":
|
|
101
|
+
result = self.db.execute(sql, params or [])
|
|
102
|
+
data = result.fetchall() if result else []
|
|
103
|
+
elif operation_type == "delete":
|
|
104
|
+
self.db.execute(sql, params or [])
|
|
105
|
+
data = None
|
|
106
|
+
else:
|
|
107
|
+
data = None
|
|
108
|
+
|
|
109
|
+
# Send result back if requested
|
|
110
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
111
|
+
self._db_result_queues[result_queue_id].put(("success", data))
|
|
112
|
+
|
|
113
|
+
except Exception as e:
|
|
114
|
+
logger.error(f"Database error: {e}")
|
|
115
|
+
if result_queue_id and result_queue_id in self._db_result_queues:
|
|
116
|
+
self._db_result_queues[result_queue_id].put(("error", str(e)))
|
|
117
|
+
|
|
118
|
+
finally:
|
|
119
|
+
self._db_work_queue.task_done()
|
|
120
|
+
|
|
121
|
+
except queue.Empty:
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
def _db_execute(self, sql: str, params: list | None = None, wait_result: bool = True) -> Any:
|
|
125
|
+
"""Submit database operation to dedicated thread.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
sql: SQL query or command
|
|
129
|
+
params: Optional query parameters
|
|
130
|
+
wait_result: Block until operation completes and return result
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Query results if wait_result=True, None otherwise
|
|
134
|
+
"""
|
|
135
|
+
result_queue_id = None
|
|
136
|
+
result_queue = None
|
|
137
|
+
|
|
138
|
+
if wait_result:
|
|
139
|
+
result_queue_id = id(threading.current_thread())
|
|
140
|
+
result_queue = queue.Queue()
|
|
141
|
+
self._db_result_queues[result_queue_id] = result_queue
|
|
142
|
+
|
|
143
|
+
# Submit to work queue
|
|
144
|
+
self._db_work_queue.put(("execute", sql, params, result_queue_id))
|
|
145
|
+
|
|
146
|
+
if wait_result and result_queue and result_queue_id:
|
|
147
|
+
status, data = result_queue.get()
|
|
148
|
+
del self._db_result_queues[result_queue_id]
|
|
149
|
+
|
|
150
|
+
if status == "error":
|
|
151
|
+
raise RuntimeError(f"Database error: {data}")
|
|
152
|
+
return data
|
|
153
|
+
|
|
154
|
+
return None
|
|
155
|
+
|
|
74
156
|
def list_pages(self) -> list[dict]:
|
|
75
157
|
"""List available Chrome pages via HTTP API.
|
|
76
158
|
|
|
@@ -158,6 +240,12 @@ class CDPSession:
|
|
|
158
240
|
self.ws_thread.join(timeout=2)
|
|
159
241
|
self.ws_thread = None
|
|
160
242
|
|
|
243
|
+
# Shutdown database thread
|
|
244
|
+
self._db_running = False
|
|
245
|
+
self._db_work_queue.put(None) # Signal shutdown
|
|
246
|
+
if self._db_thread.is_alive():
|
|
247
|
+
self._db_thread.join(timeout=2)
|
|
248
|
+
|
|
161
249
|
self.connected.clear()
|
|
162
250
|
self.page_info = None
|
|
163
251
|
|
|
@@ -245,7 +333,7 @@ class CDPSession:
|
|
|
245
333
|
|
|
246
334
|
# CDP event - store AS-IS in DuckDB and update field lookup
|
|
247
335
|
elif "method" in data:
|
|
248
|
-
self.
|
|
336
|
+
self._db_execute("INSERT INTO events VALUES (?)", [json.dumps(data)], wait_result=False)
|
|
249
337
|
self._update_field_lookup(data)
|
|
250
338
|
|
|
251
339
|
# Call registered event callbacks
|
|
@@ -332,7 +420,7 @@ class CDPSession:
|
|
|
332
420
|
|
|
333
421
|
def clear_events(self) -> None:
|
|
334
422
|
"""Clear all stored events and reset field lookup."""
|
|
335
|
-
self.
|
|
423
|
+
self._db_execute("DELETE FROM events", wait_result=False)
|
|
336
424
|
self.field_paths.clear()
|
|
337
425
|
|
|
338
426
|
def query(self, sql: str, params: list | None = None) -> list:
|
|
@@ -352,8 +440,7 @@ class CDPSession:
|
|
|
352
440
|
query("SELECT * FROM events WHERE json_extract_string(event, '$.method') = 'Network.responseReceived'")
|
|
353
441
|
query("SELECT json_extract_string(event, '$.params.request.url') as url FROM events")
|
|
354
442
|
"""
|
|
355
|
-
|
|
356
|
-
return result.fetchall() if result else []
|
|
443
|
+
return self._db_execute(sql, params)
|
|
357
444
|
|
|
358
445
|
def fetch_body(self, request_id: str) -> dict | None:
|
|
359
446
|
"""Fetch response body via Network.getResponseBody CDP call.
|
webtap/commands/TIPS.md
CHANGED
|
@@ -26,7 +26,7 @@ body(123, "msgpack.unpackb(body)") # Binary formats
|
|
|
26
26
|
```
|
|
27
27
|
|
|
28
28
|
#### Tips
|
|
29
|
-
- **Generate models:** `to_model({id}, "models/model.py")` - create Pydantic models from JSON
|
|
29
|
+
- **Generate models:** `to_model({id}, "models/model.py", "Model")` - create Pydantic models from JSON
|
|
30
30
|
- **Chain requests:** `body({id}, "httpx.get(json.loads(body)['next_url']).text[:100]")`
|
|
31
31
|
- **Parse XML:** `body({id}, "ElementTree.fromstring(body).find('.//title').text")`
|
|
32
32
|
- **Extract forms:** `body({id}, "[f['action'] for f in bs4(body, 'html.parser').find_all('form')]")`
|
|
@@ -38,13 +38,14 @@ Generate Pydantic v2 models from JSON response bodies for reverse engineering AP
|
|
|
38
38
|
|
|
39
39
|
#### Examples
|
|
40
40
|
```python
|
|
41
|
-
to_model(123, "models/product.py")
|
|
42
|
-
to_model(123, "models/
|
|
43
|
-
to_model(123, "/tmp/
|
|
41
|
+
to_model(123, "models/product.py", "Product") # Generate from full response
|
|
42
|
+
to_model(123, "models/customers/group.py", "CustomerGroup", "Data[0]") # Extract nested + domain structure
|
|
43
|
+
to_model(123, "/tmp/item.py", "Item", "items[0]") # Extract array items
|
|
44
44
|
```
|
|
45
45
|
|
|
46
46
|
#### Tips
|
|
47
47
|
- **Check structure first:** `body({id})` - preview JSON before generating
|
|
48
|
+
- **Domain organization:** Use paths like `"models/customers/group.py"` for structure
|
|
48
49
|
- **Extract nested data:** Use `json_path="Data[0]"` to extract specific objects
|
|
49
50
|
- **Array items:** Extract first item with `json_path="items[0]"` for model generation
|
|
50
51
|
- **Auto-cleanup:** Generated models use snake_case fields and modern type hints (list, dict, | None)
|
|
@@ -80,7 +81,7 @@ Show network requests with full data.
|
|
|
80
81
|
|
|
81
82
|
#### Tips
|
|
82
83
|
- **Analyze responses:** `body({id})` - fetch response body
|
|
83
|
-
- **Generate models:** `to_model({id}, "models/model.py")` - create Pydantic models from JSON
|
|
84
|
+
- **Generate models:** `to_model({id}, "models/model.py", "Model")` - create Pydantic models from JSON
|
|
84
85
|
- **Parse HTML:** `body({id}, "bs4(body, 'html.parser').find('title').text")`
|
|
85
86
|
- **Extract JSON:** `body({id}, "json.loads(body)['data']")`
|
|
86
87
|
- **Find patterns:** `body({id}, "re.findall(r'/api/\\w+', body)")`
|
webtap/commands/to_model.py
CHANGED
|
@@ -12,12 +12,13 @@ mcp_desc = get_mcp_description("to_model")
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@app.command(display="markdown", fastmcp={"type": "tool", "description": mcp_desc} if mcp_desc else {"type": "tool"})
|
|
15
|
-
def to_model(state, response: int, output: str, json_path: str = None) -> dict: # pyright: ignore[reportArgumentType]
|
|
15
|
+
def to_model(state, response: int, output: str, model_name: str, json_path: str = None) -> dict: # pyright: ignore[reportArgumentType]
|
|
16
16
|
"""Generate Pydantic model from response body using datamodel-codegen.
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
19
|
response: Response row ID from network() table
|
|
20
|
-
output: Output file path for generated model (e.g., "models/
|
|
20
|
+
output: Output file path for generated model (e.g., "models/customers/group.py")
|
|
21
|
+
model_name: Class name for generated model (e.g., "CustomerGroup")
|
|
21
22
|
json_path: Optional JSON path to extract nested data (e.g., "Data[0]")
|
|
22
23
|
|
|
23
24
|
Returns:
|
|
@@ -102,6 +103,7 @@ def to_model(state, response: int, output: str, json_path: str = None) -> dict:
|
|
|
102
103
|
input_filename="response.json",
|
|
103
104
|
output=output_path,
|
|
104
105
|
output_model_type=DataModelType.PydanticV2BaseModel,
|
|
106
|
+
class_name=model_name, # Set generated class name
|
|
105
107
|
snake_case_field=True, # Convert to snake_case
|
|
106
108
|
use_standard_collections=True, # Use list instead of List
|
|
107
109
|
use_union_operator=True, # Use | instead of Union
|
|
@@ -125,5 +127,10 @@ def to_model(state, response: int, output: str, json_path: str = None) -> dict:
|
|
|
125
127
|
|
|
126
128
|
return success_response(
|
|
127
129
|
"Model generated successfully",
|
|
128
|
-
details={
|
|
130
|
+
details={
|
|
131
|
+
"Class": model_name,
|
|
132
|
+
"Output": str(output_path),
|
|
133
|
+
"Fields": field_count,
|
|
134
|
+
"Size": f"{output_path.stat().st_size} bytes",
|
|
135
|
+
},
|
|
129
136
|
)
|
webtap/services/main.py
CHANGED
|
@@ -81,8 +81,8 @@ class WebTapService:
|
|
|
81
81
|
if not self.cdp or not self.cdp.is_connected:
|
|
82
82
|
return 0
|
|
83
83
|
try:
|
|
84
|
-
result = self.cdp.
|
|
85
|
-
return result[0] if result else 0
|
|
84
|
+
result = self.cdp.query("SELECT COUNT(*) FROM events")
|
|
85
|
+
return result[0][0] if result else 0
|
|
86
86
|
except Exception:
|
|
87
87
|
return 0
|
|
88
88
|
|
|
@@ -6,12 +6,12 @@ webtap/filters.py,sha256=kRCicGMSV3R_zSvwzqZqksnry6jxJNdXRcgWvpoBLfc,13323
|
|
|
6
6
|
webtap/cdp/README.md,sha256=0TS0V_dRgRAzBqhddpXWD4S0YVi5wI4JgFJSll_KUBE,5660
|
|
7
7
|
webtap/cdp/__init__.py,sha256=c6NFG0XJnAa5GTe9MLr9mDZcLZqoTQN7A1cvvOfLcgY,453
|
|
8
8
|
webtap/cdp/query.py,sha256=x2Cy7KMolYkTelpROGezOfMFgYnbSlCvHkvvW1v_gLI,4229
|
|
9
|
-
webtap/cdp/session.py,sha256=
|
|
9
|
+
webtap/cdp/session.py,sha256=VAgzcysbRLjhbm9XLMyy1bEmfjQzNYIr-oNhvJosvps,19524
|
|
10
10
|
webtap/cdp/schema/README.md,sha256=hnWCzbXYcYtWaZb_SgjVaFBiG81S9b9Y3x-euQFwQDo,1222
|
|
11
11
|
webtap/cdp/schema/cdp_protocol.json,sha256=dp9_OLYLuVsQb1oV5r6MZfMzURscBLyAXUckdaPWyv4,1488452
|
|
12
12
|
webtap/cdp/schema/cdp_version.json,sha256=OhGy1qpfQjSe3Z7OqL6KynBFlDFBXxKGPZCY-ZN_lVU,399
|
|
13
13
|
webtap/commands/DEVELOPER_GUIDE.md,sha256=LYOhycZ3k5EHx5nREfkjvLz7vOs8pXCRLlcDm-keWao,11973
|
|
14
|
-
webtap/commands/TIPS.md,sha256=
|
|
14
|
+
webtap/commands/TIPS.md,sha256=XwnPKY5AgLsxNw3q0aF4Amr0P891Pt4QIkw0_3AF58g,9293
|
|
15
15
|
webtap/commands/__init__.py,sha256=rr3xM_bY0BgxkDOjsnsI8UBhjlz7nqiYlgJ8fjiJ1jQ,270
|
|
16
16
|
webtap/commands/_builders.py,sha256=SYacZmZTdkolQ7OOf3rFtFPCjkukY8z020WFA-i_O_A,7902
|
|
17
17
|
webtap/commands/_tips.py,sha256=SleMpwdghrHNqdzR60Cu8T0NZqJfWfcfrgIcyWI6GIQ,4793
|
|
@@ -30,14 +30,14 @@ webtap/commands/network.py,sha256=gEOg_u7VF9A5aKv5myzLCuvfAUkF1OPxsuj4UAgbS44,31
|
|
|
30
30
|
webtap/commands/selections.py,sha256=M001d_Gc51aSTuVeXGa19LDh2ZGR_qBJEjVGKpcGGFM,4895
|
|
31
31
|
webtap/commands/server.py,sha256=DOcIgYuKp0ydwrK9EA3hGwqOwfwM9DABhdPu3hk_jjo,6948
|
|
32
32
|
webtap/commands/setup.py,sha256=dov1LaN50nAEMNIuBLSK7mcnwhfn9rtqdTopBm1-PhA,9648
|
|
33
|
-
webtap/commands/to_model.py,sha256=
|
|
33
|
+
webtap/commands/to_model.py,sha256=jOb93t616m5weT75VyF506J6nydDXUWENF7cpscbe9Q,4962
|
|
34
34
|
webtap/services/README.md,sha256=rala_jtnNgSiQ1lFLM7x_UQ4SJZDceAm7dpkQMRTYaI,2346
|
|
35
35
|
webtap/services/__init__.py,sha256=IjFqu0Ak6D-r18aokcQMtenDV3fbelvfjTCejGv6CZ0,570
|
|
36
36
|
webtap/services/body.py,sha256=XQPa19y5eUc3XJ2TuwVK6kffO1VQoKqNs33MBBz7hzU,3913
|
|
37
37
|
webtap/services/console.py,sha256=XVfSKTvEHyyOdujsg85S3wtj1CdZhzKtWwlx25MvSv8,3768
|
|
38
38
|
webtap/services/dom.py,sha256=PC-mV56NMLvw37JNzI_jOnZrM7BiDPPn3kOQI9U81vI,19067
|
|
39
39
|
webtap/services/fetch.py,sha256=nl6bpU2Vnf40kau4-mqAnIkhC-7Lx2vbTJKUglz9KnE,13602
|
|
40
|
-
webtap/services/main.py,sha256=
|
|
40
|
+
webtap/services/main.py,sha256=5ZUxuIONf4qkKPpKnFi3Au-ZFvuqlpMeDi7fQT_yoQE,6274
|
|
41
41
|
webtap/services/network.py,sha256=0o_--F6YvmXqqFqrcjL1gc6Vr9V1Ytb_U7r_DSUWupA,3444
|
|
42
42
|
webtap/services/setup/__init__.py,sha256=lfoKCAroc-JoE_r7L-KZkF85ZWiB41MBIgrR7ZISSoE,7157
|
|
43
43
|
webtap/services/setup/chrome.py,sha256=zfPWeb6zm_xjIfiS2S_O9lR2BjGKaPXXo06pN_B9lAU,7187
|
|
@@ -45,7 +45,7 @@ webtap/services/setup/desktop.py,sha256=fXwQa201W-s2mengm_dJZ9BigJopVrO9YFUQcW_T
|
|
|
45
45
|
webtap/services/setup/extension.py,sha256=iJY43JlQO6Vicgd9Mz6Mw0LQfbBNUGhnwI8n-LnvHBY,3602
|
|
46
46
|
webtap/services/setup/filters.py,sha256=lAPSLMH_KZQO-7bRkmURwzforx7C3SDrKEw2ZogN-Lo,3220
|
|
47
47
|
webtap/services/setup/platform.py,sha256=7yn-7LQFffgerWzWRtOG-yNEsR36ICThYUAu_N2FAso,4532
|
|
48
|
-
webtap_tool-0.
|
|
49
|
-
webtap_tool-0.
|
|
50
|
-
webtap_tool-0.
|
|
51
|
-
webtap_tool-0.
|
|
48
|
+
webtap_tool-0.7.1.dist-info/METADATA,sha256=Of1VKvajnQxn9m3YdfBTXD2sdQEWCWnzzscaXHCthN4,17636
|
|
49
|
+
webtap_tool-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
50
|
+
webtap_tool-0.7.1.dist-info/entry_points.txt,sha256=iFe575I0CIb1MbfPt0oX2VYyY5gSU_dA551PKVR83TU,39
|
|
51
|
+
webtap_tool-0.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|