tapyr-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. tapyr_cli-0.1.0/CHANGELOG.md +80 -0
  2. tapyr_cli-0.1.0/CONTRIBUTING.md +53 -0
  3. tapyr_cli-0.1.0/LICENSE +21 -0
  4. tapyr_cli-0.1.0/MANIFEST.in +5 -0
  5. tapyr_cli-0.1.0/PKG-INFO +390 -0
  6. tapyr_cli-0.1.0/README.md +358 -0
  7. tapyr_cli-0.1.0/examples/README.md +159 -0
  8. tapyr_cli-0.1.0/examples/com.gojek.app.md +19 -0
  9. tapyr_cli-0.1.0/examples/gofood_order.yaml +199 -0
  10. tapyr_cli-0.1.0/examples/gojek_order_food.yaml +38 -0
  11. tapyr_cli-0.1.0/examples/instagram_reels_engagement.yaml +205 -0
  12. tapyr_cli-0.1.0/examples/spotify_play_playlist.yaml +177 -0
  13. tapyr_cli-0.1.0/examples/tiktok_natural_engagement.yaml +159 -0
  14. tapyr_cli-0.1.0/examples/whatsapp_send_message.yaml +89 -0
  15. tapyr_cli-0.1.0/pyproject.toml +47 -0
  16. tapyr_cli-0.1.0/setup.cfg +4 -0
  17. tapyr_cli-0.1.0/src/tapyr/__init__.py +5 -0
  18. tapyr_cli-0.1.0/src/tapyr/__main__.py +4 -0
  19. tapyr_cli-0.1.0/src/tapyr/actions.py +1033 -0
  20. tapyr_cli-0.1.0/src/tapyr/apps.py +202 -0
  21. tapyr_cli-0.1.0/src/tapyr/bench.py +165 -0
  22. tapyr_cli-0.1.0/src/tapyr/calibration.py +538 -0
  23. tapyr_cli-0.1.0/src/tapyr/cli.py +2677 -0
  24. tapyr_cli-0.1.0/src/tapyr/context.py +262 -0
  25. tapyr_cli-0.1.0/src/tapyr/coordinates.py +50 -0
  26. tapyr_cli-0.1.0/src/tapyr/demo.py +540 -0
  27. tapyr_cli-0.1.0/src/tapyr/doctor.py +338 -0
  28. tapyr_cli-0.1.0/src/tapyr/driver.py +454 -0
  29. tapyr_cli-0.1.0/src/tapyr/errors.py +103 -0
  30. tapyr_cli-0.1.0/src/tapyr/executor.py +2934 -0
  31. tapyr_cli-0.1.0/src/tapyr/installer.py +106 -0
  32. tapyr_cli-0.1.0/src/tapyr/intent.py +272 -0
  33. tapyr_cli-0.1.0/src/tapyr/knowledge.py +3099 -0
  34. tapyr_cli-0.1.0/src/tapyr/mcp_server.py +1857 -0
  35. tapyr_cli-0.1.0/src/tapyr/perception.py +927 -0
  36. tapyr_cli-0.1.0/src/tapyr/platform_tools.py +425 -0
  37. tapyr_cli-0.1.0/src/tapyr/py.typed +0 -0
  38. tapyr_cli-0.1.0/src/tapyr/safety.py +666 -0
  39. tapyr_cli-0.1.0/src/tapyr/shell.py +440 -0
  40. tapyr_cli-0.1.0/src/tapyr/vault.py +808 -0
  41. tapyr_cli-0.1.0/src/tapyr_cli.egg-info/PKG-INFO +390 -0
  42. tapyr_cli-0.1.0/src/tapyr_cli.egg-info/SOURCES.txt +67 -0
  43. tapyr_cli-0.1.0/src/tapyr_cli.egg-info/dependency_links.txt +1 -0
  44. tapyr_cli-0.1.0/src/tapyr_cli.egg-info/entry_points.txt +2 -0
  45. tapyr_cli-0.1.0/src/tapyr_cli.egg-info/requires.txt +9 -0
  46. tapyr_cli-0.1.0/src/tapyr_cli.egg-info/top_level.txt +1 -0
  47. tapyr_cli-0.1.0/tests/test_actions.py +1166 -0
  48. tapyr_cli-0.1.0/tests/test_apps.py +276 -0
  49. tapyr_cli-0.1.0/tests/test_bench.py +124 -0
  50. tapyr_cli-0.1.0/tests/test_calibration.py +662 -0
  51. tapyr_cli-0.1.0/tests/test_cli.py +2262 -0
  52. tapyr_cli-0.1.0/tests/test_context.py +190 -0
  53. tapyr_cli-0.1.0/tests/test_coordinates.py +82 -0
  54. tapyr_cli-0.1.0/tests/test_demo.py +444 -0
  55. tapyr_cli-0.1.0/tests/test_doctor.py +539 -0
  56. tapyr_cli-0.1.0/tests/test_driver.py +527 -0
  57. tapyr_cli-0.1.0/tests/test_errors.py +62 -0
  58. tapyr_cli-0.1.0/tests/test_examples.py +246 -0
  59. tapyr_cli-0.1.0/tests/test_executor.py +5718 -0
  60. tapyr_cli-0.1.0/tests/test_grid.py +82 -0
  61. tapyr_cli-0.1.0/tests/test_installer.py +82 -0
  62. tapyr_cli-0.1.0/tests/test_intent.py +260 -0
  63. tapyr_cli-0.1.0/tests/test_knowledge.py +3202 -0
  64. tapyr_cli-0.1.0/tests/test_mcp.py +1875 -0
  65. tapyr_cli-0.1.0/tests/test_perception.py +1386 -0
  66. tapyr_cli-0.1.0/tests/test_platform_tools.py +378 -0
  67. tapyr_cli-0.1.0/tests/test_safety.py +369 -0
  68. tapyr_cli-0.1.0/tests/test_shell.py +315 -0
  69. tapyr_cli-0.1.0/tests/test_vault.py +938 -0
@@ -0,0 +1,80 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2026-04-10)
4
+
5
+ Initial public release. **977 tests, 56 MCP tools, 35 CLI subcommands.**
6
+
7
+ ### Intent Resolver
8
+ - `tapyr do "intent"`: natural-language intent → tapyr action, three-tier pipeline
9
+ (14 builtin regex patterns → workflow suggestion → element-text matching),
10
+ no LLM calls, dry-run support
11
+
12
+ ### Perception
13
+ - Screenshot + UI dump + Set-of-Mark annotated PNG with numbered elements
14
+ - Vision-first fallback v0: grid-based region synthesis when uiautomator returns nothing
15
+ - Delta mode (`--since-last`): return only changed elements between calls
16
+ - Compact mode (`--compact`): minimal id/text/clickable payload for token efficiency
17
+ - Clickable-only filter (`--clickable-only`)
18
+ - Continuous watch polling (`tapyr watch`) with JSON Lines output on change
19
+
20
+ ### Actions
21
+ - Semantic tap (`--text`, `--id`, `--content-desc`, `--xy` with percent coords)
22
+ - Swipe, type, key, scroll-until, wait-for
23
+ - Unlock (idempotent, PIN support)
24
+ - Device status (one-shot composed query)
25
+ - Read notifications with OTP extraction (`--extract-otp`)
26
+ - Named screenshot snapshots (`tapyr screenshot --save`)
27
+
28
+ ### Workflow Engine
29
+ - YAML workflows with template variables, weighted `choose_one`, `repeat`, `sequence`
30
+ - `if` / `skip_if` / `run_if` conditional gates
31
+ - `expect_screen` / `verify` assertions with soft-mismatch `needs_llm_fallback` markers
32
+ - Per-step `retries` with exponential backoff
33
+ - Checkpoints with `requires_confirmation` halt + `--resume-from`
34
+ - Patching protocol: detect drift → respond with patches → persist as new workflow
35
+ - Dry-run with synthetic screen state (`--dry-run-state`)
36
+ - Workflow pipeline: `tapyr kb run A B C` chains multiple workflows
37
+ - Ad-hoc step execution (`tapyr exec --steps-json`)
38
+
39
+ ### Knowledge Base
40
+ - CRUD: save, read, list, delete, rename, clone, update metadata
41
+ - Tag management: add/remove/list with FTS reindex
42
+ - Search with FTS + tag filter composition
43
+ - Suggest workflows based on screen content
44
+ - Diff, find-refs, check-calibration for workflow maintenance
45
+ - History: JSONL audit log, stats aggregation, CSV export
46
+ - Record: materialize workflows from action history
47
+ - Export/import/install bundles (zip + manifest, 3 merge strategies, URL fetch with sha256)
48
+
49
+ ### Calibration
50
+ - Rail scanner (vertical icon columns) and nav scanner (horizontal bars)
51
+ - Unified v2 multi-region files with namespaced labels (`right_rail.comment`)
52
+ - `calibrate-app` mega-command composing both scanners
53
+ - `tap_rail` and `tap_saved` actions consuming calibrations with cross-device percent coords
54
+
55
+ ### Safety
56
+ - Action allowlist/denylist via `~/.tapyr/safety.yaml`
57
+ - Three-tier rules: deny / require_confirmation / allow
58
+ - Predicate matching: equals, contains, regex, bare-field AND-semantics
59
+ - CLI management: `tapyr safety list/set/delete`
60
+ - `--confirm-safety` runtime bypass for require_confirmation rules
61
+ - Per-device override support
62
+
63
+ ### Credential Vault
64
+ - macOS (security CLI), Linux (secret-tool/libsecret), Windows (DPAPI/PowerShell)
65
+ - `fill_credential` pastes without returning the secret to the agent
66
+ - No `vault_get` MCP tool by design — secrets never cross the agent boundary
67
+ - In-memory backend (`TAPYR_VAULT_TEST_BACKEND=memory`) for CI
68
+
69
+ ### Distribution
70
+ - `tapyr install-adb`: Playwright-style platform-tools bootstrap
71
+ - `tapyr doctor --fix`: auto-remediate adb-not-found
72
+ - `tapyr demo`: 7-step zero-side-effect scripted showcase
73
+ - `tapyr install claude-desktop/claude-code/cursor`: MCP client setup
74
+ - PyPI-ready: 210KB wheel, PEP 561 py.typed marker
75
+ - GitHub Actions CI: Python 3.10-3.13 × Ubuntu + macOS
76
+
77
+ ### Security
78
+ - Subprocess injection audit: 4 vectors patched with 28 regression tests
79
+ - Input validation on package names, keyevent names, deep-link URIs
80
+ - Shell metacharacter escaping for `adb shell input text`
@@ -0,0 +1,53 @@
1
+ # Contributing to Tapyr
2
+
3
+ Thanks for your interest in contributing.
4
+
5
+ ## Development setup
6
+
7
+ ```bash
8
+ git clone https://github.com/RafieAmandio/tapyr.git
9
+ cd tapyr
10
+ pip install -e ".[dev]"
11
+ python -m pytest -q # 977 tests, ~12s
12
+ ```
13
+
14
+ ## Running tests
15
+
16
+ ```bash
17
+ python -m pytest -q # full suite
18
+ python -m pytest tests/test_foo.py -x # single file, stop on first failure
19
+ python -m pytest -k "keyword" # filter by name
20
+ ```
21
+
22
+ All tests are hermetic — no real device, no network calls. Monkeypatching
23
+ is used for adb, urllib, and OS-level subprocess calls.
24
+
25
+ ## Smoke-testing with a device
26
+
27
+ ```bash
28
+ tapyr doctor --fix # ensure adb is available
29
+ tapyr demo # 7-step safe walkthrough
30
+ tapyr do "go home" # AI CLI mode — intent resolver
31
+ tapyr perceive --pretty # inspect the screen
32
+ ```
33
+
34
+ ## Adding a feature
35
+
36
+ 1. Read the existing code in the module you're extending.
37
+ 2. Add tests alongside the implementation. Every PR must keep the suite green.
38
+ 3. Follow existing patterns — match the idioms of adjacent functions.
39
+ 4. No new dependencies without discussion.
40
+
41
+ ## Workflow bundle contributions
42
+
43
+ Share your workflows via `tapyr kb export`:
44
+
45
+ ```bash
46
+ tapyr kb export ~/my-pack.tapyr.zip --workflow my_flow --calibration my_cal
47
+ ```
48
+
49
+ See `examples/README.md` for the showcase workflow conventions.
50
+
51
+ ## Code of conduct
52
+
53
+ Be kind, be constructive, ship working code.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rafie Amandio
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ include LICENSE
2
+ include README.md
3
+ include CHANGELOG.md
4
+ include CONTRIBUTING.md
5
+ recursive-include examples *.yaml *.yml *.md
@@ -0,0 +1,390 @@
1
+ Metadata-Version: 2.4
2
+ Name: tapyr-cli
3
+ Version: 0.1.0
4
+ Summary: Playwright for Android, built for AI agents. CLI + MCP server for LLM-driven phone control.
5
+ Author-email: Rafie Amandio <88525718+RafieAmandio@users.noreply.github.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/RafieAmandio/tapyr
8
+ Project-URL: Documentation, https://github.com/RafieAmandio/tapyr
9
+ Project-URL: Repository, https://github.com/RafieAmandio/tapyr
10
+ Project-URL: Bug Tracker, https://github.com/RafieAmandio/tapyr/issues
11
+ Keywords: android,adb,automation,mcp,ai-agents,playwright
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Testing
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.10
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: Pillow>=10.0
25
+ Requires-Dist: PyYAML>=6.0
26
+ Provides-Extra: mcp
27
+ Requires-Dist: mcp>=1.0; extra == "mcp"
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7; extra == "dev"
30
+ Requires-Dist: ruff>=0.4; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ ![Tests](https://github.com/RafieAmandio/tapyr/actions/workflows/test.yml/badge.svg)
34
+
35
+ # Tapyr
36
+
37
+ **Playwright for Android, built for AI agents.**
38
+
39
+ A CLI and stdio MCP server that gives an LLM (Claude, Cursor, any MCP-capable
40
+ agent) structured perception and action primitives for a connected Android
41
+ device, plus a file-based knowledge layer so learned workflows and app notes
42
+ persist across sessions.
43
+
44
+ Tapyr is **not** an agent. It is the hands, eyes, and memory that an agent
45
+ uses. Bring your own brain.
46
+
47
+ ## Why
48
+
49
+ LLMs can already read text and reason about intent. They cannot, by default,
50
+ open your Gojek app, tap "GoFood", type "sate", and hand off to checkout.
51
+ Tapyr closes that gap with a small, opinionated set of primitives designed
52
+ for tool-use from the ground up:
53
+
54
+ - **JSON-first output.** Every command returns `{"ok": bool, ...}` — no
55
+ log-scraping.
56
+ - **Semantic targeting.** `tap --text "Order"` or `tap --id 7` (from the
57
+ last perception), not `tap 540 1820`.
58
+ - **Set-of-Mark perception.** Screenshot + UI dump + an annotated PNG with
59
+ numbered boxes so vision models can pick an element by number.
60
+ - **Cross-device coordinates.** `tap_xy` and `swipe` accept percent strings
61
+ (`"50%"`) that resolve against the live `wm size` at replay time. Workflows
62
+ written on 720x1604 replay on 1080x2400 unchanged.
63
+ - **Workflow executor.** YAML workflows with template variables, weighted
64
+ `choose_one` branches, `repeat`, `sequence`, `expect_screen`, `verify`,
65
+ `if` / `skip_if` / `run_if` conditional gates, per-step `retries` with exponential
66
+ backoff, and a debug mode that snapshots fingerprints and annotated PNGs
67
+ per step. `kb run tiktok_engage --arg count=10` — done.
68
+ - **Safety gates.** `checkpoint: name, requires_confirmation: true` halts a
69
+ replay cleanly at payment / destructive steps; resume past the gate with
70
+ `tapyr kb run NAME --resume-from checkpoint_name` without re-executing the
71
+ expensive setup.
72
+ - **Observability.** `tapyr kb stats` surfaces per-workflow success rate,
73
+ halted runs, and drift-marker counts from the JSONL audit log — flaky
74
+ workflows bubble to the top. Soft `expect_screen` mismatches emit a
75
+ structured `needs_llm_fallback` marker so an orchestrating agent can
76
+ machine-detect drift without parsing prose warnings.
77
+ - **Calibration for canvas apps.** Apps that render via custom canvas
78
+ (TikTok, Instagram Reels, YouTube Shorts) are invisible to uiautomator.
79
+ `calibrate-rail` scans for a vertical icon rail (right-side action column),
80
+ `calibrate-nav` scans for a horizontal one (bottom nav), and the `tap_rail`
81
+ workflow action consumes saved calibrations by semantic name (`icon:
82
+ comment`). See below.
83
+ - **Lock screen + device status.** `tapyr unlock [--pin XXXX]` dismisses the
84
+ keyguard idempotently (no-ops if already unlocked). `tapyr status` returns
85
+ a one-shot snapshot — locked? foreground? screen size? wakefulness?
86
+ battery? — in a single call instead of five sequential adb round trips.
87
+ - **File-backed memory.** Workflows are YAML. App notes are Markdown. The
88
+ agent can read, write, and share them.
89
+ - **One binary, many front doors.** The same core runs as a CLI, an MCP
90
+ stdio server, or a self-installing plugin for Claude Desktop / Claude
91
+ Code / Cursor.
92
+
93
+ ## Architecture
94
+
95
+ ```
96
+ ┌─────────────────────────────────────────────┐
97
+ │ executor replay workflows, verify │
98
+ ├─────────────────────────────────────────────┤
99
+ │ knowledge workflows + app notes + FTS │
100
+ ├─────────────────────────────────────────────┤
101
+ │ calibration whiteness scan for canvas UIs │
102
+ ├─────────────────────────────────────────────┤
103
+ │ actions semantic tap, swipe, type │
104
+ ├─────────────────────────────────────────────┤
105
+ │ perception screenshot + ui_dump + SoM │
106
+ ├─────────────────────────────────────────────┤
107
+ │ driver adb subprocess wrapper │
108
+ └─────────────────────────────────────────────┘
109
+ ```
110
+
111
+ Each layer is usable independently. Agents operate at whatever level the
112
+ task needs: replay a saved workflow (fast path), drop to semantic taps
113
+ (normal path), coordinate-based taps on a calibrated rail (canvas-app
114
+ escape hatch), or raw pixels (last resort).
115
+
116
+ ## Install
117
+
118
+ ```bash
119
+ pipx install tapyr-cli # or: pip install tapyr-cli
120
+ tapyr doctor # tells you exactly what's missing
121
+ tapyr doctor --fix # auto-install adb if missing, re-run checks
122
+ tapyr install-adb # one-shot Playwright-style fetch of platform-tools
123
+ tapyr demo --dry-run # preview the 7-step scripted walkthrough
124
+ tapyr demo # with a phone plugged in: the hero shot
125
+ ```
126
+
127
+ `tapyr install-adb` fetches the official Android platform-tools bundle from
128
+ `dl.google.com` into `~/.tapyr/bin/`, falling back to a `find_adb()` chain
129
+ extension so PATH-installed adb still wins if present. Cached after first
130
+ run; `--force` re-downloads. Optional `TAPYR_ADB_SHA256` env var for
131
+ enterprise users who need a pinned checksum.
132
+
133
+ `tapyr demo` runs a zero-side-effect, 7-step walkthrough (doctor → wake →
134
+ perceive → HOME → deep-link Settings → BACK → save a marker workflow) that
135
+ gives the first-run experience a single command. No typing into fields, no
136
+ taps inside third-party apps, no installs. `--dry-run` works without a
137
+ phone for curious users and CI.
138
+
139
+ You will also need Android `platform-tools` on your `PATH` (for `adb`) and a
140
+ phone with USB debugging enabled. `doctor` walks you through both.
141
+
142
+ The `TAPYR_HOME` environment variable overrides the default `~/.tapyr`
143
+ state directory. Useful for multi-device test rigs or CI.
144
+
145
+ ### Wire it into your agent
146
+
147
+ ```bash
148
+ tapyr install claude-desktop # writes ~/Library/.../claude_desktop_config.json
149
+ tapyr install claude-code # writes ~/.claude/mcp.json
150
+ tapyr install cursor # writes ~/.cursor/mcp.json
151
+ tapyr install print # just prints the JSON snippet
152
+ ```
153
+
154
+ Then restart the client. The agent now has a `tapyr` tool group with:
155
+
156
+ - **Intent resolver:** `do` (natural-language → action, no API knowledge needed)
157
+ - **Perception:** `perceive`, `screenshot`
158
+ - **Actions:** `tap`, `swipe`, `type_text`, `key`, `scroll_until`, `wait_for`,
159
+ `unlock`, `exec_steps`
160
+ - **Device:** `device_status`, `list_devices`
161
+ - **Apps:** `open_app`, `deep_link`, `foreground`, `list_packages`,
162
+ `read_notifications`
163
+ - **Calibration:** `calibrate_rail`, `calibrate_nav`, `calibrations_list`,
164
+ `calibration_show`, `calibration_delete`
165
+ - **Credential vault:** `fill_credential`, `vault_set`, `vault_list`,
166
+ `vault_delete` (macOS / Linux / Windows; no `vault_get` by design)
167
+ - **Safety policy:** `safety_list`, `safety_set_rule`, `safety_delete_rule`
168
+ - **Knowledge base:** `kb_search_workflows`, `kb_suggest_workflows`,
169
+ `kb_read_workflow`, `kb_run_workflow`, `kb_save_workflow`,
170
+ `kb_delete_workflow`, `kb_rename_workflow`, `kb_clone_workflow`,
171
+ `kb_update_workflow_metadata`, `kb_lint_workflow`, `kb_diff_workflows`,
172
+ `kb_find_references`, `kb_check_calibration`, `kb_record_workflow`,
173
+ `kb_add_tags`, `kb_remove_tags`, `kb_read_history`, `kb_stats`,
174
+ `kb_read_app_note`, `kb_append_app_note`, `kb_list_app_notes`,
175
+ `kb_list_workflows`, `kb_export_bundle`, `kb_import_bundle`,
176
+ `kb_install_bundle`
177
+ - **Environment:** `doctor`
178
+
179
+ **56 MCP tools** across 11 families — the full surface an agent needs to
180
+ perceive, act, learn, and manage workflows on any connected Android device.
181
+
182
+ ## Usage (CLI)
183
+
184
+ ```bash
185
+ # Sanity check
186
+ tapyr doctor --pretty
187
+ tapyr devices
188
+ tapyr status --pretty # one-shot: locked, foreground, battery, ...
189
+
190
+ # See the screen the way an agent sees it
191
+ tapyr perceive --pretty
192
+ # -> ~/.tapyr/cache/last_screenshot.png
193
+ # -> ~/.tapyr/cache/last_annotated.png (numbered overlays)
194
+ # -> JSON element list with ids
195
+ tapyr perceive --clickable-only # trim layout noise, keep only tap targets
196
+ tapyr perceive --compact # minimal payload: id + text + clickable only
197
+ tapyr watch --interval 2 --max-polls 10 # poll for screen changes, JSON Lines output
198
+
199
+ # AI CLI mode — just say what you want
200
+ tapyr do "like this post" # resolves intent → action automatically
201
+ tapyr do "go home" # builtin: HOME key (no perceive needed)
202
+ tapyr do "type hello world" # detects type intent
203
+ tapyr do "open com.whatsapp" # opens app by package
204
+ tapyr do "scroll down" # swipe gesture
205
+ tapyr do "like this" --dry-run # preview the resolved action without executing
206
+ tapyr perceive --since-last # delta mode: only what changed since the last call
207
+ tapyr --timeout 60 perceive # global adb-shell timeout override (slow devices)
208
+
209
+ # Drive the phone
210
+ tapyr unlock --pin 1234 # dismisses keyguard, idempotent
211
+ tapyr tap --text "GoFood"
212
+ tapyr type "sate"
213
+ tapyr key enter
214
+ tapyr tap --id 3
215
+ tapyr tap --xy 50% 90% # percent coords work everywhere
216
+ tapyr swipe 50% 75% 50% 25%
217
+ tapyr scroll-until --text "Add to cart"
218
+ tapyr wait-for --text "Order confirmed" --timeout 15
219
+
220
+ # Skip the UI entirely when you know the intent
221
+ tapyr deep-link "gojek://gofood/home"
222
+ tapyr open com.whatsapp
223
+
224
+ # Read push notifications / SMS OTPs (needs --noredact on device)
225
+ tapyr read-notifications --package com.android.messaging --pretty
226
+
227
+ # Knowledge base
228
+ tapyr kb list
229
+ tapyr kb list --query "food order"
230
+ tapyr kb search "checkout" --tag food # FTS + tag filter
231
+ tapyr kb read gojek_order_food
232
+ tapyr kb run tiktok_natural_engagement --arg count=10
233
+ tapyr kb run unlock_flow gofood_order verify_order # pipeline: run 3 in sequence
234
+ tapyr kb run my_workflow --debug # per-step PNGs
235
+ tapyr kb run my_workflow --dry-run # preview only
236
+ tapyr kb run pay_flow --confirm-checkpoints # walk past gates
237
+ tapyr kb run pay_flow --confirm-safety # walk past require_confirmation safety rules
238
+ tapyr kb run pay_flow --resume-from confirm_payment # pick up from halt
239
+ tapyr kb run my_workflow --dry-run --dry-run-state '{"screen_texts":["OK"]}' # evaluate if/skip_if gates against a fixture
240
+ tapyr kb lint my_workflow # static validation
241
+ tapyr kb stats --days 7 # flaky workflows
242
+ tapyr kb history --limit 20 --action run_workflow
243
+ tapyr kb delete old_workflow
244
+ tapyr kb rename old_name new_name # atomic rename + FTS reindex
245
+ tapyr kb tag add gojek_order food delivery # add tags to a saved workflow
246
+ tapyr kb tag remove gojek_order wip # remove tags
247
+ tapyr kb clone gojek_order --as gojek_order_v2 # duplicate for safe editing
248
+ tapyr kb diff workflow_v1 workflow_v2 --pretty # colored unified-diff
249
+ tapyr kb history --export csv > history.csv # spreadsheet-friendly audit export
250
+ tapyr kb find-refs --calibration tiktok_rail # who uses this?
251
+ tapyr kb check-calibration tiktok_rail # did re-cal break anything?
252
+ tapyr kb app com.gojek.app
253
+ tapyr kb app com.gojek.app --append "Payment confirm button is bottom-right"
254
+ tapyr kb apps # list all documented packages
255
+ tapyr kb export ~/gojek-pack.tapyr.zip --all # pack workflows+cals+notes
256
+ tapyr kb import ~/gojek-pack.tapyr.zip --merge rename # skip/overwrite/rename
257
+ tapyr kb install https://example.com/gojek-pack.tapyr.zip --sha256 ... # fetch + verify + import
258
+
259
+ # Credential vault (macOS / Linux / Windows)
260
+ tapyr vault set com.whatsapp password --stdin # reads from stdin, never argv
261
+ tapyr vault list # {package, field} only — no secrets
262
+ tapyr fill-credential com.whatsapp password # paste into focused field
263
+
264
+ # Action safety policy
265
+ tapyr safety list # show current deny/require_confirmation/allow rules
266
+ tapyr safety set deny key --equals POWER # never POWER key on this device
267
+ tapyr safety set deny shell --contains "pm uninstall" # forbid uninstalls via shell action
268
+ tapyr safety set require_confirmation deep_link --package com.android.vending
269
+ tapyr safety delete deny key --equals POWER # exact-match delete
270
+
271
+ # Parallel test fleets
272
+ TAPYR_MULTI_DEVICE=1 tapyr --serial emulator-5554 perceive # scoped cache + history
273
+ ```
274
+
275
+ ## Calibration for canvas-rendered apps
276
+
277
+ Some apps render their entire UI through a custom canvas engine — TikTok,
278
+ Instagram Reels, YouTube Shorts. Their windows have focus but `shown=false`
279
+ in `dumpsys`, so `uiautomator dump` falls through to whatever's drawn
280
+ underneath (usually the launcher). `perceive` returns zero meaningful
281
+ elements. Semantic `tap --text` and `tap --content_desc` do not work.
282
+
283
+ Tapyr's workaround: scan the screen for white icon clusters along a vertical
284
+ rail and use the cluster centers as blind-tap targets.
285
+
286
+ ```bash
287
+ # 1. One-time setup per app+device: scan the rail, label the clusters,
288
+ # save the result. The x-column auto-detects in the right 15% of the
289
+ # screen; pass --x-col to override.
290
+ tapyr calibrate-rail \
291
+ --y-min 500 --y-max 1400 \
292
+ --min-cluster-pixels 30 \
293
+ --labels heart,comment,bookmark,share \
294
+ --save tiktok_rail.json
295
+
296
+ # 2. In a workflow yaml, reference the saved calibration by name:
297
+ #
298
+ # - action: tap_rail
299
+ # calibration: tiktok_rail
300
+ # icon: comment # v1: bare label
301
+ # icon: right_rail.comment # v2: namespaced (region.label)
302
+ #
303
+ # Unified v2 calibrations merge multiple regions (right_rail, bottom_nav)
304
+ # into one file per app. Bare labels auto-resolve when unambiguous;
305
+ # namespaced labels (right_rail.comment) disambiguate across regions.
306
+ # v1 files (single rail) keep working unchanged.
307
+
308
+ # 3. Discover what's saved
309
+ tapyr calibrations --pretty
310
+ ```
311
+
312
+ The `examples/tiktok_natural_engagement.yaml` workflow is a fully worked
313
+ reference: a weighted `choose_one` loop that randomly likes, favorites,
314
+ shares, or comments (with Indonesian gen-Z phrases) 5-45% of the time,
315
+ passively scrolls the rest, and posts real comments via TikTok's native
316
+ input field.
317
+
318
+ See `examples/README.md` for four more showcase workflows:
319
+ **whatsapp_send_message** (the hello-world tutorial),
320
+ **gofood_order** (Indonesian use case with a payment checkpoint),
321
+ **spotify_play_playlist** (morning-routine weighted rotation), and
322
+ **instagram_reels_engagement** (canvas-app mirror of the TikTok example).
323
+ Every file in `examples/` is pinned lint-clean by a regression test.
324
+
325
+ ## Knowledge on disk
326
+
327
+ ```
328
+ ~/.tapyr/
329
+ ├── workflows/ # agent-written task recipes (yaml)
330
+ ├── apps/ # per-app declarative notes (markdown)
331
+ ├── calibration/ # rail-icon calibrations for canvas apps (json)
332
+ ├── history/ # jsonl audit log of every action
333
+ ├── cache/ # last screenshot, dump, annotated PNG
334
+ ├── debug/ # per-step snapshots from `kb run --debug`
335
+ └── index.sqlite # FTS index — rebuildable from the above
336
+ ```
337
+
338
+ Everything is git-friendly. You can version-control your `~/.tapyr/`
339
+ directory and share workflows + calibrations across machines or with
340
+ teammates.
341
+
342
+ ## Usage (from an agent, via MCP)
343
+
344
+ Once installed into your client, the agent can call the tools directly.
345
+ A typical loop for a well-known app:
346
+
347
+ 1. `perceive` → read the element list + annotated PNG
348
+ 2. `tap` with a selector that matches an element
349
+ 3. `perceive` → confirm the new screen
350
+ 4. `kb_save_workflow` once the task completes, so the next agent is faster
351
+
352
+ For a canvas-rendered app (TikTok etc.) where `uiautomator dump` returns
353
+ nothing: **vision-first fallback (v0)** automatically kicks in — the
354
+ screenshot is analyzed via grid-based variance detection to synthesize
355
+ clickable regions even without a UI tree. The agent sees numbered boxes
356
+ on the annotated PNG and can `tap --id N`. For higher precision on
357
+ known apps, calibrate once and replay:
358
+
359
+ 1. `calibrate_app` with `--rail` and `--nav` flags to scan both icon
360
+ regions in one shot and save a unified v2 calibration file
361
+ 2. `kb_save_workflow` with `tap_rail` or `tap_saved` steps referencing
362
+ namespaced labels (`right_rail.comment`, `bottom_nav.home`)
363
+ 3. `kb_run_workflow` to replay
364
+
365
+ ## Status
366
+
367
+ **v0 / alpha.** Validated on a real Oppo 720x1604 device through 40+ loop
368
+ iterations of incremental hardening. The TikTok engagement workflow posts
369
+ real comments, likes, bookmarks, and shares through the coordinate-based
370
+ `tap_rail` path. **977 tests passing**, covering the executor, actions,
371
+ calibration scanners, knowledge base, MCP server, CLI dispatch, and the
372
+ workflow observability surface.
373
+
374
+ What's here (as of this README revision): the full primitive set listed
375
+ above, 10 MCP tool families, `kb stats` observability over the JSONL audit
376
+ log, soft-drift `needs_llm_fallback` markers on `expect_screen` mismatches,
377
+ idempotent `unlock` and composed `device_status`, and a cross-device
378
+ coordinate system that works on any screen that `wm size` can report.
379
+
380
+ **Vision-first perception fallback (v0)** shipped — when `uiautomator dump`
381
+ returns nothing, perception now synthesizes clickable regions from the
382
+ screenshot via grid-based variance analysis so agents can at least `tap --id N`
383
+ on numbered areas in the annotated PNG. Still directional (v1): replacing
384
+ the heuristic grid with a segmentation model (SAM / YOLO) for element-level
385
+ precision, plus OCR for text field extraction. See `BACKLOG.md` for the full
386
+ list with priority ordering.
387
+
388
+ ## License
389
+
390
+ MIT.