tapyr-cli 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tapyr_cli-0.1.0/CHANGELOG.md +80 -0
- tapyr_cli-0.1.0/CONTRIBUTING.md +53 -0
- tapyr_cli-0.1.0/LICENSE +21 -0
- tapyr_cli-0.1.0/MANIFEST.in +5 -0
- tapyr_cli-0.1.0/PKG-INFO +390 -0
- tapyr_cli-0.1.0/README.md +358 -0
- tapyr_cli-0.1.0/examples/README.md +159 -0
- tapyr_cli-0.1.0/examples/com.gojek.app.md +19 -0
- tapyr_cli-0.1.0/examples/gofood_order.yaml +199 -0
- tapyr_cli-0.1.0/examples/gojek_order_food.yaml +38 -0
- tapyr_cli-0.1.0/examples/instagram_reels_engagement.yaml +205 -0
- tapyr_cli-0.1.0/examples/spotify_play_playlist.yaml +177 -0
- tapyr_cli-0.1.0/examples/tiktok_natural_engagement.yaml +159 -0
- tapyr_cli-0.1.0/examples/whatsapp_send_message.yaml +89 -0
- tapyr_cli-0.1.0/pyproject.toml +47 -0
- tapyr_cli-0.1.0/setup.cfg +4 -0
- tapyr_cli-0.1.0/src/tapyr/__init__.py +5 -0
- tapyr_cli-0.1.0/src/tapyr/__main__.py +4 -0
- tapyr_cli-0.1.0/src/tapyr/actions.py +1033 -0
- tapyr_cli-0.1.0/src/tapyr/apps.py +202 -0
- tapyr_cli-0.1.0/src/tapyr/bench.py +165 -0
- tapyr_cli-0.1.0/src/tapyr/calibration.py +538 -0
- tapyr_cli-0.1.0/src/tapyr/cli.py +2677 -0
- tapyr_cli-0.1.0/src/tapyr/context.py +262 -0
- tapyr_cli-0.1.0/src/tapyr/coordinates.py +50 -0
- tapyr_cli-0.1.0/src/tapyr/demo.py +540 -0
- tapyr_cli-0.1.0/src/tapyr/doctor.py +338 -0
- tapyr_cli-0.1.0/src/tapyr/driver.py +454 -0
- tapyr_cli-0.1.0/src/tapyr/errors.py +103 -0
- tapyr_cli-0.1.0/src/tapyr/executor.py +2934 -0
- tapyr_cli-0.1.0/src/tapyr/installer.py +106 -0
- tapyr_cli-0.1.0/src/tapyr/intent.py +272 -0
- tapyr_cli-0.1.0/src/tapyr/knowledge.py +3099 -0
- tapyr_cli-0.1.0/src/tapyr/mcp_server.py +1857 -0
- tapyr_cli-0.1.0/src/tapyr/perception.py +927 -0
- tapyr_cli-0.1.0/src/tapyr/platform_tools.py +425 -0
- tapyr_cli-0.1.0/src/tapyr/py.typed +0 -0
- tapyr_cli-0.1.0/src/tapyr/safety.py +666 -0
- tapyr_cli-0.1.0/src/tapyr/shell.py +440 -0
- tapyr_cli-0.1.0/src/tapyr/vault.py +808 -0
- tapyr_cli-0.1.0/src/tapyr_cli.egg-info/PKG-INFO +390 -0
- tapyr_cli-0.1.0/src/tapyr_cli.egg-info/SOURCES.txt +67 -0
- tapyr_cli-0.1.0/src/tapyr_cli.egg-info/dependency_links.txt +1 -0
- tapyr_cli-0.1.0/src/tapyr_cli.egg-info/entry_points.txt +2 -0
- tapyr_cli-0.1.0/src/tapyr_cli.egg-info/requires.txt +9 -0
- tapyr_cli-0.1.0/src/tapyr_cli.egg-info/top_level.txt +1 -0
- tapyr_cli-0.1.0/tests/test_actions.py +1166 -0
- tapyr_cli-0.1.0/tests/test_apps.py +276 -0
- tapyr_cli-0.1.0/tests/test_bench.py +124 -0
- tapyr_cli-0.1.0/tests/test_calibration.py +662 -0
- tapyr_cli-0.1.0/tests/test_cli.py +2262 -0
- tapyr_cli-0.1.0/tests/test_context.py +190 -0
- tapyr_cli-0.1.0/tests/test_coordinates.py +82 -0
- tapyr_cli-0.1.0/tests/test_demo.py +444 -0
- tapyr_cli-0.1.0/tests/test_doctor.py +539 -0
- tapyr_cli-0.1.0/tests/test_driver.py +527 -0
- tapyr_cli-0.1.0/tests/test_errors.py +62 -0
- tapyr_cli-0.1.0/tests/test_examples.py +246 -0
- tapyr_cli-0.1.0/tests/test_executor.py +5718 -0
- tapyr_cli-0.1.0/tests/test_grid.py +82 -0
- tapyr_cli-0.1.0/tests/test_installer.py +82 -0
- tapyr_cli-0.1.0/tests/test_intent.py +260 -0
- tapyr_cli-0.1.0/tests/test_knowledge.py +3202 -0
- tapyr_cli-0.1.0/tests/test_mcp.py +1875 -0
- tapyr_cli-0.1.0/tests/test_perception.py +1386 -0
- tapyr_cli-0.1.0/tests/test_platform_tools.py +378 -0
- tapyr_cli-0.1.0/tests/test_safety.py +369 -0
- tapyr_cli-0.1.0/tests/test_shell.py +315 -0
- tapyr_cli-0.1.0/tests/test_vault.py +938 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 (2026-04-10)
|
|
4
|
+
|
|
5
|
+
Initial public release. **977 tests, 56 MCP tools, 35 CLI subcommands.**
|
|
6
|
+
|
|
7
|
+
### Intent Resolver
|
|
8
|
+
- `tapyr do "intent"`: natural-language intent → tapyr action, three-tier pipeline
|
|
9
|
+
(14 builtin regex patterns → workflow suggestion → element-text matching),
|
|
10
|
+
no LLM calls, dry-run support
|
|
11
|
+
|
|
12
|
+
### Perception
|
|
13
|
+
- Screenshot + UI dump + Set-of-Mark annotated PNG with numbered elements
|
|
14
|
+
- Vision-first fallback v0: grid-based region synthesis when uiautomator returns nothing
|
|
15
|
+
- Delta mode (`--since-last`): return only changed elements between calls
|
|
16
|
+
- Compact mode (`--compact`): minimal id/text/clickable payload for token efficiency
|
|
17
|
+
- Clickable-only filter (`--clickable-only`)
|
|
18
|
+
- Continuous watch polling (`tapyr watch`) with JSON Lines output on change
|
|
19
|
+
|
|
20
|
+
### Actions
|
|
21
|
+
- Semantic tap (`--text`, `--id`, `--content-desc`, `--xy` with percent coords)
|
|
22
|
+
- Swipe, type, key, scroll-until, wait-for
|
|
23
|
+
- Unlock (idempotent, PIN support)
|
|
24
|
+
- Device status (one-shot composed query)
|
|
25
|
+
- Read notifications with OTP extraction (`--extract-otp`)
|
|
26
|
+
- Named screenshot snapshots (`tapyr screenshot --save`)
|
|
27
|
+
|
|
28
|
+
### Workflow Engine
|
|
29
|
+
- YAML workflows with template variables, weighted `choose_one`, `repeat`, `sequence`
|
|
30
|
+
- `if` / `skip_if` / `run_if` conditional gates
|
|
31
|
+
- `expect_screen` / `verify` assertions with soft-mismatch `needs_llm_fallback` markers
|
|
32
|
+
- Per-step `retries` with exponential backoff
|
|
33
|
+
- Checkpoints with `requires_confirmation` halt + `--resume-from`
|
|
34
|
+
- Patching protocol: detect drift → respond with patches → persist as new workflow
|
|
35
|
+
- Dry-run with synthetic screen state (`--dry-run-state`)
|
|
36
|
+
- Workflow pipeline: `tapyr kb run A B C` chains multiple workflows
|
|
37
|
+
- Ad-hoc step execution (`tapyr exec --steps-json`)
|
|
38
|
+
|
|
39
|
+
### Knowledge Base
|
|
40
|
+
- CRUD: save, read, list, delete, rename, clone, update metadata
|
|
41
|
+
- Tag management: add/remove/list with FTS reindex
|
|
42
|
+
- Search with FTS + tag filter composition
|
|
43
|
+
- Suggest workflows based on screen content
|
|
44
|
+
- Diff, find-refs, check-calibration for workflow maintenance
|
|
45
|
+
- History: JSONL audit log, stats aggregation, CSV export
|
|
46
|
+
- Record: materialize workflows from action history
|
|
47
|
+
- Export/import/install bundles (zip + manifest, 3 merge strategies, URL fetch with sha256)
|
|
48
|
+
|
|
49
|
+
### Calibration
|
|
50
|
+
- Rail scanner (vertical icon columns) and nav scanner (horizontal bars)
|
|
51
|
+
- Unified v2 multi-region files with namespaced labels (`right_rail.comment`)
|
|
52
|
+
- `calibrate-app` mega-command composing both scanners
|
|
53
|
+
- `tap_rail` and `tap_saved` actions consuming calibrations with cross-device percent coords
|
|
54
|
+
|
|
55
|
+
### Safety
|
|
56
|
+
- Action allowlist/denylist via `~/.tapyr/safety.yaml`
|
|
57
|
+
- Three-tier rules: deny / require_confirmation / allow
|
|
58
|
+
- Predicate matching: equals, contains, regex, bare-field AND-semantics
|
|
59
|
+
- CLI management: `tapyr safety list/set/delete`
|
|
60
|
+
- `--confirm-safety` runtime bypass for require_confirmation rules
|
|
61
|
+
- Per-device override support
|
|
62
|
+
|
|
63
|
+
### Credential Vault
|
|
64
|
+
- macOS (security CLI), Linux (secret-tool/libsecret), Windows (DPAPI/PowerShell)
|
|
65
|
+
- `fill_credential` pastes without returning the secret to the agent
|
|
66
|
+
- No `vault_get` MCP tool by design — secrets never cross the agent boundary
|
|
67
|
+
- In-memory backend (`TAPYR_VAULT_TEST_BACKEND=memory`) for CI
|
|
68
|
+
|
|
69
|
+
### Distribution
|
|
70
|
+
- `tapyr install-adb`: Playwright-style platform-tools bootstrap
|
|
71
|
+
- `tapyr doctor --fix`: auto-remediate adb-not-found
|
|
72
|
+
- `tapyr demo`: 7-step zero-side-effect scripted showcase
|
|
73
|
+
- `tapyr install claude-desktop/claude-code/cursor`: MCP client setup
|
|
74
|
+
- PyPI-ready: 210KB wheel, PEP 561 py.typed marker
|
|
75
|
+
- GitHub Actions CI: Python 3.10-3.13 × Ubuntu + macOS
|
|
76
|
+
|
|
77
|
+
### Security
|
|
78
|
+
- Subprocess injection audit: 4 vectors patched with 28 regression tests
|
|
79
|
+
- Input validation on package names, keyevent names, deep-link URIs
|
|
80
|
+
- Shell metacharacter escaping for `adb shell input text`
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Contributing to Tapyr
|
|
2
|
+
|
|
3
|
+
Thanks for your interest in contributing.
|
|
4
|
+
|
|
5
|
+
## Development setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/RafieAmandio/tapyr.git
|
|
9
|
+
cd tapyr
|
|
10
|
+
pip install -e ".[dev]"
|
|
11
|
+
python -m pytest -q # 977 tests, ~12s
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Running tests
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
python -m pytest -q # full suite
|
|
18
|
+
python -m pytest tests/test_foo.py -x # single file, stop on first failure
|
|
19
|
+
python -m pytest -k "keyword" # filter by name
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
All tests are hermetic — no real device, no network calls. Monkeypatching
|
|
23
|
+
is used for adb, urllib, and OS-level subprocess calls.
|
|
24
|
+
|
|
25
|
+
## Smoke-testing with a device
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
tapyr doctor --fix # ensure adb is available
|
|
29
|
+
tapyr demo # 7-step safe walkthrough
|
|
30
|
+
tapyr do "go home" # AI CLI mode — intent resolver
|
|
31
|
+
tapyr perceive --pretty # inspect the screen
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Adding a feature
|
|
35
|
+
|
|
36
|
+
1. Read the existing code in the module you're extending.
|
|
37
|
+
2. Add tests alongside the implementation. Every PR must keep the suite green.
|
|
38
|
+
3. Follow existing patterns — match the idioms of adjacent functions.
|
|
39
|
+
4. No new dependencies without discussion.
|
|
40
|
+
|
|
41
|
+
## Workflow bundle contributions
|
|
42
|
+
|
|
43
|
+
Share your workflows via `tapyr kb export`:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
tapyr kb export ~/my-pack.tapyr.zip --workflow my_flow --calibration my_cal
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
See `examples/README.md` for the showcase workflow conventions.
|
|
50
|
+
|
|
51
|
+
## Code of conduct
|
|
52
|
+
|
|
53
|
+
Be kind, be constructive, ship working code.
|
tapyr_cli-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rafie Amandio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
tapyr_cli-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,390 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tapyr-cli
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Playwright for Android, built for AI agents. CLI + MCP server for LLM-driven phone control.
|
|
5
|
+
Author-email: Rafie Amandio <88525718+RafieAmandio@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/RafieAmandio/tapyr
|
|
8
|
+
Project-URL: Documentation, https://github.com/RafieAmandio/tapyr
|
|
9
|
+
Project-URL: Repository, https://github.com/RafieAmandio/tapyr
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/RafieAmandio/tapyr/issues
|
|
11
|
+
Keywords: android,adb,automation,mcp,ai-agents,playwright
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Testing
|
|
20
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: Pillow>=10.0
|
|
25
|
+
Requires-Dist: PyYAML>=6.0
|
|
26
|
+
Provides-Extra: mcp
|
|
27
|
+
Requires-Dist: mcp>=1.0; extra == "mcp"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
31
|
+
Dynamic: license-file
|
|
32
|
+
|
|
33
|
+

|
|
34
|
+
|
|
35
|
+
# Tapyr
|
|
36
|
+
|
|
37
|
+
**Playwright for Android, built for AI agents.**
|
|
38
|
+
|
|
39
|
+
A CLI and stdio MCP server that gives an LLM (Claude, Cursor, any MCP-capable
|
|
40
|
+
agent) structured perception and action primitives for a connected Android
|
|
41
|
+
device, plus a file-based knowledge layer so learned workflows and app notes
|
|
42
|
+
persist across sessions.
|
|
43
|
+
|
|
44
|
+
Tapyr is **not** an agent. It is the hands, eyes, and memory that an agent
|
|
45
|
+
uses. Bring your own brain.
|
|
46
|
+
|
|
47
|
+
## Why
|
|
48
|
+
|
|
49
|
+
LLMs can already read text and reason about intent. They cannot, by default,
|
|
50
|
+
open your Gojek app, tap "GoFood", type "sate", and hand off to checkout.
|
|
51
|
+
Tapyr closes that gap with a small, opinionated set of primitives designed
|
|
52
|
+
for tool-use from the ground up:
|
|
53
|
+
|
|
54
|
+
- **JSON-first output.** Every command returns `{"ok": bool, ...}` — no
|
|
55
|
+
log-scraping.
|
|
56
|
+
- **Semantic targeting.** `tap --text "Order"` or `tap --id 7` (from the
|
|
57
|
+
last perception), not `tap 540 1820`.
|
|
58
|
+
- **Set-of-Mark perception.** Screenshot + UI dump + an annotated PNG with
|
|
59
|
+
numbered boxes so vision models can pick an element by number.
|
|
60
|
+
- **Cross-device coordinates.** `tap_xy` and `swipe` accept percent strings
|
|
61
|
+
(`"50%"`) that resolve against the live `wm size` at replay time. Workflows
|
|
62
|
+
written on 720x1604 replay on 1080x2400 unchanged.
|
|
63
|
+
- **Workflow executor.** YAML workflows with template variables, weighted
|
|
64
|
+
`choose_one` branches, `repeat`, `sequence`, `expect_screen`, `verify`,
|
|
65
|
+
`if` / `skip_if` / `run_if` conditional gates, per-step `retries` with exponential
|
|
66
|
+
backoff, and a debug mode that snapshots fingerprints and annotated PNGs
|
|
67
|
+
per step. `kb run tiktok_engage --arg count=10` — done.
|
|
68
|
+
- **Safety gates.** `checkpoint: name, requires_confirmation: true` halts a
|
|
69
|
+
replay cleanly at payment / destructive steps; resume past the gate with
|
|
70
|
+
`tapyr kb run NAME --resume-from checkpoint_name` without re-executing the
|
|
71
|
+
expensive setup.
|
|
72
|
+
- **Observability.** `tapyr kb stats` surfaces per-workflow success rate,
|
|
73
|
+
halted runs, and drift-marker counts from the JSONL audit log — flaky
|
|
74
|
+
workflows bubble to the top. Soft `expect_screen` mismatches emit a
|
|
75
|
+
structured `needs_llm_fallback` marker so an orchestrating agent can
|
|
76
|
+
machine-detect drift without parsing prose warnings.
|
|
77
|
+
- **Calibration for canvas apps.** Apps that render via custom canvas
|
|
78
|
+
(TikTok, Instagram Reels, YouTube Shorts) are invisible to uiautomator.
|
|
79
|
+
`calibrate-rail` scans for a vertical icon rail (right-side action column),
|
|
80
|
+
`calibrate-nav` scans for a horizontal one (bottom nav), and the `tap_rail`
|
|
81
|
+
workflow action consumes saved calibrations by semantic name (`icon:
|
|
82
|
+
comment`). See below.
|
|
83
|
+
- **Lock screen + device status.** `tapyr unlock [--pin XXXX]` dismisses the
|
|
84
|
+
keyguard idempotently (no-ops if already unlocked). `tapyr status` returns
|
|
85
|
+
a one-shot snapshot — locked? foreground? screen size? wakefulness?
|
|
86
|
+
battery? — in a single call instead of five sequential adb round trips.
|
|
87
|
+
- **File-backed memory.** Workflows are YAML. App notes are Markdown. The
|
|
88
|
+
agent can read, write, and share them.
|
|
89
|
+
- **One binary, many front doors.** The same core runs as a CLI, an MCP
|
|
90
|
+
stdio server, or a self-installing plugin for Claude Desktop / Claude
|
|
91
|
+
Code / Cursor.
|
|
92
|
+
|
|
93
|
+
## Architecture
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
┌─────────────────────────────────────────────┐
|
|
97
|
+
│ executor replay workflows, verify │
|
|
98
|
+
├─────────────────────────────────────────────┤
|
|
99
|
+
│ knowledge workflows + app notes + FTS │
|
|
100
|
+
├─────────────────────────────────────────────┤
|
|
101
|
+
│ calibration whiteness scan for canvas UIs │
|
|
102
|
+
├─────────────────────────────────────────────┤
|
|
103
|
+
│ actions semantic tap, swipe, type │
|
|
104
|
+
├─────────────────────────────────────────────┤
|
|
105
|
+
│ perception screenshot + ui_dump + SoM │
|
|
106
|
+
├─────────────────────────────────────────────┤
|
|
107
|
+
│ driver adb subprocess wrapper │
|
|
108
|
+
└─────────────────────────────────────────────┘
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Each layer is usable independently. Agents operate at whatever level the
|
|
112
|
+
task needs: replay a saved workflow (fast path), drop to semantic taps
|
|
113
|
+
(normal path), coordinate-based taps on a calibrated rail (canvas-app
|
|
114
|
+
escape hatch), or raw pixels (last resort).
|
|
115
|
+
|
|
116
|
+
## Install
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
pipx install tapyr-cli # or: pip install tapyr-cli
|
|
120
|
+
tapyr doctor # tells you exactly what's missing
|
|
121
|
+
tapyr doctor --fix # auto-install adb if missing, re-run checks
|
|
122
|
+
tapyr install-adb # one-shot Playwright-style fetch of platform-tools
|
|
123
|
+
tapyr demo --dry-run # preview the 7-step scripted walkthrough
|
|
124
|
+
tapyr demo # with a phone plugged in: the hero shot
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
`tapyr install-adb` fetches the official Android platform-tools bundle from
|
|
128
|
+
`dl.google.com` into `~/.tapyr/bin/`, falling back to a `find_adb()` chain
|
|
129
|
+
extension so PATH-installed adb still wins if present. Cached after first
|
|
130
|
+
run; `--force` re-downloads. Optional `TAPYR_ADB_SHA256` env var for
|
|
131
|
+
enterprise users who need a pinned checksum.
|
|
132
|
+
|
|
133
|
+
`tapyr demo` runs a zero-side-effect, 7-step walkthrough (doctor → wake →
|
|
134
|
+
perceive → HOME → deep-link Settings → BACK → save a marker workflow) that
|
|
135
|
+
gives the first-run experience a single command. No typing into fields, no
|
|
136
|
+
taps inside third-party apps, no installs. `--dry-run` works without a
|
|
137
|
+
phone for curious users and CI.
|
|
138
|
+
|
|
139
|
+
You will also need Android `platform-tools` on your `PATH` (for `adb`) and a
|
|
140
|
+
phone with USB debugging enabled. `doctor` walks you through both.
|
|
141
|
+
|
|
142
|
+
The `TAPYR_HOME` environment variable overrides the default `~/.tapyr`
|
|
143
|
+
state directory. Useful for multi-device test rigs or CI.
|
|
144
|
+
|
|
145
|
+
### Wire it into your agent
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
tapyr install claude-desktop # writes ~/Library/.../claude_desktop_config.json
|
|
149
|
+
tapyr install claude-code # writes ~/.claude/mcp.json
|
|
150
|
+
tapyr install cursor # writes ~/.cursor/mcp.json
|
|
151
|
+
tapyr install print # just prints the JSON snippet
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
Then restart the client. The agent now has a `tapyr` tool group with:
|
|
155
|
+
|
|
156
|
+
- **Intent resolver:** `do` (natural-language → action, no API knowledge needed)
|
|
157
|
+
- **Perception:** `perceive`, `screenshot`
|
|
158
|
+
- **Actions:** `tap`, `swipe`, `type_text`, `key`, `scroll_until`, `wait_for`,
|
|
159
|
+
`unlock`, `exec_steps`
|
|
160
|
+
- **Device:** `device_status`, `list_devices`
|
|
161
|
+
- **Apps:** `open_app`, `deep_link`, `foreground`, `list_packages`,
|
|
162
|
+
`read_notifications`
|
|
163
|
+
- **Calibration:** `calibrate_rail`, `calibrate_nav`, `calibrations_list`,
|
|
164
|
+
`calibration_show`, `calibration_delete`
|
|
165
|
+
- **Credential vault:** `fill_credential`, `vault_set`, `vault_list`,
|
|
166
|
+
`vault_delete` (macOS / Linux / Windows; no `vault_get` by design)
|
|
167
|
+
- **Safety policy:** `safety_list`, `safety_set_rule`, `safety_delete_rule`
|
|
168
|
+
- **Knowledge base:** `kb_search_workflows`, `kb_suggest_workflows`,
|
|
169
|
+
`kb_read_workflow`, `kb_run_workflow`, `kb_save_workflow`,
|
|
170
|
+
`kb_delete_workflow`, `kb_rename_workflow`, `kb_clone_workflow`,
|
|
171
|
+
`kb_update_workflow_metadata`, `kb_lint_workflow`, `kb_diff_workflows`,
|
|
172
|
+
`kb_find_references`, `kb_check_calibration`, `kb_record_workflow`,
|
|
173
|
+
`kb_add_tags`, `kb_remove_tags`, `kb_read_history`, `kb_stats`,
|
|
174
|
+
`kb_read_app_note`, `kb_append_app_note`, `kb_list_app_notes`,
|
|
175
|
+
`kb_list_workflows`, `kb_export_bundle`, `kb_import_bundle`,
|
|
176
|
+
`kb_install_bundle`
|
|
177
|
+
- **Environment:** `doctor`
|
|
178
|
+
|
|
179
|
+
**56 MCP tools** across 11 families — the full surface an agent needs to
|
|
180
|
+
perceive, act, learn, and manage workflows on any connected Android device.
|
|
181
|
+
|
|
182
|
+
## Usage (CLI)
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Sanity check
|
|
186
|
+
tapyr doctor --pretty
|
|
187
|
+
tapyr devices
|
|
188
|
+
tapyr status --pretty # one-shot: locked, foreground, battery, ...
|
|
189
|
+
|
|
190
|
+
# See the screen the way an agent sees it
|
|
191
|
+
tapyr perceive --pretty
|
|
192
|
+
# -> ~/.tapyr/cache/last_screenshot.png
|
|
193
|
+
# -> ~/.tapyr/cache/last_annotated.png (numbered overlays)
|
|
194
|
+
# -> JSON element list with ids
|
|
195
|
+
tapyr perceive --clickable-only # trim layout noise, keep only tap targets
|
|
196
|
+
tapyr perceive --compact # minimal payload: id + text + clickable only
|
|
197
|
+
tapyr watch --interval 2 --max-polls 10 # poll for screen changes, JSON Lines output
|
|
198
|
+
|
|
199
|
+
# AI CLI mode — just say what you want
|
|
200
|
+
tapyr do "like this post" # resolves intent → action automatically
|
|
201
|
+
tapyr do "go home" # builtin: HOME key (no perceive needed)
|
|
202
|
+
tapyr do "type hello world" # detects type intent
|
|
203
|
+
tapyr do "open com.whatsapp" # opens app by package
|
|
204
|
+
tapyr do "scroll down" # swipe gesture
|
|
205
|
+
tapyr do "like this" --dry-run # preview the resolved action without executing
|
|
206
|
+
tapyr perceive --since-last # delta mode: only what changed since the last call
|
|
207
|
+
tapyr --timeout 60 perceive # global adb-shell timeout override (slow devices)
|
|
208
|
+
|
|
209
|
+
# Drive the phone
|
|
210
|
+
tapyr unlock --pin 1234 # dismisses keyguard, idempotent
|
|
211
|
+
tapyr tap --text "GoFood"
|
|
212
|
+
tapyr type "sate"
|
|
213
|
+
tapyr key enter
|
|
214
|
+
tapyr tap --id 3
|
|
215
|
+
tapyr tap --xy 50% 90% # percent coords work everywhere
|
|
216
|
+
tapyr swipe 50% 75% 50% 25%
|
|
217
|
+
tapyr scroll-until --text "Add to cart"
|
|
218
|
+
tapyr wait-for --text "Order confirmed" --timeout 15
|
|
219
|
+
|
|
220
|
+
# Skip the UI entirely when you know the intent
|
|
221
|
+
tapyr deep-link "gojek://gofood/home"
|
|
222
|
+
tapyr open com.whatsapp
|
|
223
|
+
|
|
224
|
+
# Read push notifications / SMS OTPs (needs --noredact on device)
|
|
225
|
+
tapyr read-notifications --package com.android.messaging --pretty
|
|
226
|
+
|
|
227
|
+
# Knowledge base
|
|
228
|
+
tapyr kb list
|
|
229
|
+
tapyr kb list --query "food order"
|
|
230
|
+
tapyr kb search "checkout" --tag food # FTS + tag filter
|
|
231
|
+
tapyr kb read gojek_order_food
|
|
232
|
+
tapyr kb run tiktok_natural_engagement --arg count=10
|
|
233
|
+
tapyr kb run unlock_flow gofood_order verify_order # pipeline: run 3 in sequence
|
|
234
|
+
tapyr kb run my_workflow --debug # per-step PNGs
|
|
235
|
+
tapyr kb run my_workflow --dry-run # preview only
|
|
236
|
+
tapyr kb run pay_flow --confirm-checkpoints # walk past gates
|
|
237
|
+
tapyr kb run pay_flow --confirm-safety # walk past require_confirmation safety rules
|
|
238
|
+
tapyr kb run pay_flow --resume-from confirm_payment # pick up from halt
|
|
239
|
+
tapyr kb run my_workflow --dry-run --dry-run-state '{"screen_texts":["OK"]}' # evaluate if/skip_if gates against a fixture
|
|
240
|
+
tapyr kb lint my_workflow # static validation
|
|
241
|
+
tapyr kb stats --days 7 # flaky workflows
|
|
242
|
+
tapyr kb history --limit 20 --action run_workflow
|
|
243
|
+
tapyr kb delete old_workflow
|
|
244
|
+
tapyr kb rename old_name new_name # atomic rename + FTS reindex
|
|
245
|
+
tapyr kb tag add gojek_order food delivery # add tags to a saved workflow
|
|
246
|
+
tapyr kb tag remove gojek_order wip # remove tags
|
|
247
|
+
tapyr kb clone gojek_order --as gojek_order_v2 # duplicate for safe editing
|
|
248
|
+
tapyr kb diff workflow_v1 workflow_v2 --pretty # colored unified-diff
|
|
249
|
+
tapyr kb history --export csv > history.csv # spreadsheet-friendly audit export
|
|
250
|
+
tapyr kb find-refs --calibration tiktok_rail # who uses this?
|
|
251
|
+
tapyr kb check-calibration tiktok_rail # did re-cal break anything?
|
|
252
|
+
tapyr kb app com.gojek.app
|
|
253
|
+
tapyr kb app com.gojek.app --append "Payment confirm button is bottom-right"
|
|
254
|
+
tapyr kb apps # list all documented packages
|
|
255
|
+
tapyr kb export ~/gojek-pack.tapyr.zip --all # pack workflows+cals+notes
|
|
256
|
+
tapyr kb import ~/gojek-pack.tapyr.zip --merge rename # skip/overwrite/rename
|
|
257
|
+
tapyr kb install https://example.com/gojek-pack.tapyr.zip --sha256 ... # fetch + verify + import
|
|
258
|
+
|
|
259
|
+
# Credential vault (macOS / Linux / Windows)
|
|
260
|
+
tapyr vault set com.whatsapp password --stdin # reads from stdin, never argv
|
|
261
|
+
tapyr vault list # {package, field} only — no secrets
|
|
262
|
+
tapyr fill-credential com.whatsapp password # paste into focused field
|
|
263
|
+
|
|
264
|
+
# Action safety policy
|
|
265
|
+
tapyr safety list # show current deny/require_confirmation/allow rules
|
|
266
|
+
tapyr safety set deny key --equals POWER # never POWER key on this device
|
|
267
|
+
tapyr safety set deny shell --contains "pm uninstall" # forbid uninstalls via shell action
|
|
268
|
+
tapyr safety set require_confirmation deep_link --package com.android.vending
|
|
269
|
+
tapyr safety delete deny key --equals POWER # exact-match delete
|
|
270
|
+
|
|
271
|
+
# Parallel test fleets
|
|
272
|
+
TAPYR_MULTI_DEVICE=1 tapyr --serial emulator-5554 perceive # scoped cache + history
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
## Calibration for canvas-rendered apps
|
|
276
|
+
|
|
277
|
+
Some apps render their entire UI through a custom canvas engine — TikTok,
|
|
278
|
+
Instagram Reels, YouTube Shorts. Their windows have focus but `shown=false`
|
|
279
|
+
in `dumpsys`, so `uiautomator dump` falls through to whatever's drawn
|
|
280
|
+
underneath (usually the launcher). `perceive` returns zero meaningful
|
|
281
|
+
elements. Semantic `tap --text` and `tap --content_desc` do not work.
|
|
282
|
+
|
|
283
|
+
Tapyr's workaround: scan the screen for white icon clusters along a vertical
|
|
284
|
+
rail and use the cluster centers as blind-tap targets.
|
|
285
|
+
|
|
286
|
+
```bash
|
|
287
|
+
# 1. One-time setup per app+device: scan the rail, label the clusters,
|
|
288
|
+
# save the result. The x-column auto-detects in the right 15% of the
|
|
289
|
+
# screen; pass --x-col to override.
|
|
290
|
+
tapyr calibrate-rail \
|
|
291
|
+
--y-min 500 --y-max 1400 \
|
|
292
|
+
--min-cluster-pixels 30 \
|
|
293
|
+
--labels heart,comment,bookmark,share \
|
|
294
|
+
--save tiktok_rail.json
|
|
295
|
+
|
|
296
|
+
# 2. In a workflow yaml, reference the saved calibration by name:
|
|
297
|
+
#
|
|
298
|
+
# - action: tap_rail
|
|
299
|
+
# calibration: tiktok_rail
|
|
300
|
+
# icon: comment # v1: bare label
|
|
301
|
+
# icon: right_rail.comment # v2: namespaced (region.label)
|
|
302
|
+
#
|
|
303
|
+
# Unified v2 calibrations merge multiple regions (right_rail, bottom_nav)
|
|
304
|
+
# into one file per app. Bare labels auto-resolve when unambiguous;
|
|
305
|
+
# namespaced labels (right_rail.comment) disambiguate across regions.
|
|
306
|
+
# v1 files (single rail) keep working unchanged.
|
|
307
|
+
|
|
308
|
+
# 3. Discover what's saved
|
|
309
|
+
tapyr calibrations --pretty
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
The `examples/tiktok_natural_engagement.yaml` workflow is a fully worked
|
|
313
|
+
reference: a weighted `choose_one` loop that randomly likes, favorites,
|
|
314
|
+
shares, or comments (with Indonesian gen-Z phrases) 5-45% of the time,
|
|
315
|
+
passively scrolls the rest, and posts real comments via TikTok's native
|
|
316
|
+
input field.
|
|
317
|
+
|
|
318
|
+
See `examples/README.md` for four more showcase workflows:
|
|
319
|
+
**whatsapp_send_message** (the hello-world tutorial),
|
|
320
|
+
**gofood_order** (Indonesian use case with a payment checkpoint),
|
|
321
|
+
**spotify_play_playlist** (morning-routine weighted rotation), and
|
|
322
|
+
**instagram_reels_engagement** (canvas-app mirror of the TikTok example).
|
|
323
|
+
Every file in `examples/` is pinned lint-clean by a regression test.
|
|
324
|
+
|
|
325
|
+
## Knowledge on disk
|
|
326
|
+
|
|
327
|
+
```
|
|
328
|
+
~/.tapyr/
|
|
329
|
+
├── workflows/ # agent-written task recipes (yaml)
|
|
330
|
+
├── apps/ # per-app declarative notes (markdown)
|
|
331
|
+
├── calibration/ # rail-icon calibrations for canvas apps (json)
|
|
332
|
+
├── history/ # jsonl audit log of every action
|
|
333
|
+
├── cache/ # last screenshot, dump, annotated PNG
|
|
334
|
+
├── debug/ # per-step snapshots from `kb run --debug`
|
|
335
|
+
└── index.sqlite # FTS index — rebuildable from the above
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
Everything is git-friendly. You can version-control your `~/.tapyr/`
|
|
339
|
+
directory and share workflows + calibrations across machines or with
|
|
340
|
+
teammates.
|
|
341
|
+
|
|
342
|
+
## Usage (from an agent, via MCP)
|
|
343
|
+
|
|
344
|
+
Once installed into your client, the agent can call the tools directly.
|
|
345
|
+
A typical loop for a well-known app:
|
|
346
|
+
|
|
347
|
+
1. `perceive` → read the element list + annotated PNG
|
|
348
|
+
2. `tap` with a selector that matches an element
|
|
349
|
+
3. `perceive` → confirm the new screen
|
|
350
|
+
4. `kb_save_workflow` once the task completes, so the next agent is faster
|
|
351
|
+
|
|
352
|
+
For a canvas-rendered app (TikTok etc.) where `uiautomator dump` returns
|
|
353
|
+
nothing: **vision-first fallback (v0)** automatically kicks in — the
|
|
354
|
+
screenshot is analyzed via grid-based variance detection to synthesize
|
|
355
|
+
clickable regions even without a UI tree. The agent sees numbered boxes
|
|
356
|
+
on the annotated PNG and can `tap --id N`. For higher precision on
|
|
357
|
+
known apps, calibrate once and replay:
|
|
358
|
+
|
|
359
|
+
1. `calibrate_app` with `--rail` and `--nav` flags to scan both icon
|
|
360
|
+
regions in one shot and save a unified v2 calibration file
|
|
361
|
+
2. `kb_save_workflow` with `tap_rail` or `tap_saved` steps referencing
|
|
362
|
+
namespaced labels (`right_rail.comment`, `bottom_nav.home`)
|
|
363
|
+
3. `kb_run_workflow` to replay
|
|
364
|
+
|
|
365
|
+
## Status
|
|
366
|
+
|
|
367
|
+
**v0 / alpha.** Validated on a real Oppo 720x1604 device through 40+ loop
|
|
368
|
+
iterations of incremental hardening. The TikTok engagement workflow posts
|
|
369
|
+
real comments, likes, bookmarks, and shares through the coordinate-based
|
|
370
|
+
`tap_rail` path. **977 tests passing**, covering the executor, actions,
|
|
371
|
+
calibration scanners, knowledge base, MCP server, CLI dispatch, and the
|
|
372
|
+
workflow observability surface.
|
|
373
|
+
|
|
374
|
+
What's here (as of this README revision): the full primitive set listed
|
|
375
|
+
above, 10 MCP tool families, `kb stats` observability over the JSONL audit
|
|
376
|
+
log, soft-drift `needs_llm_fallback` markers on `expect_screen` mismatches,
|
|
377
|
+
idempotent `unlock` and composed `device_status`, and a cross-device
|
|
378
|
+
coordinate system that works on any screen that `wm size` can report.
|
|
379
|
+
|
|
380
|
+
**Vision-first perception fallback (v0)** shipped — when `uiautomator dump`
|
|
381
|
+
returns nothing, perception now synthesizes clickable regions from the
|
|
382
|
+
screenshot via grid-based variance analysis so agents can at least `tap --id N`
|
|
383
|
+
on numbered areas in the annotated PNG. Still directional (v1): replacing
|
|
384
|
+
the heuristic grid with a segmentation model (SAM / YOLO) for element-level
|
|
385
|
+
precision, plus OCR for text field extraction. See `BACKLOG.md` for the full
|
|
386
|
+
list with priority ordering.
|
|
387
|
+
|
|
388
|
+
## License
|
|
389
|
+
|
|
390
|
+
MIT.
|