understand-quickly 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,64 @@
1
+ """understand-quickly Python SDK.
2
+
3
+ A thin client for the public registry of code-knowledge graphs at
4
+ https://looptech-ai.github.io/understand-quickly/.
5
+
6
+ >>> from understand_quickly import Registry
7
+ >>> reg = Registry()
8
+ >>> entries = reg.list(status="ok")
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from .aclient import AsyncRegistry
14
+ from .client import (
15
+ DEFAULT_REGISTRY_URL,
16
+ ENV_VAR,
17
+ Registry,
18
+ RegistryError,
19
+ RegistryHTTPError,
20
+ RegistryParseError,
21
+ )
22
+ from .types import (
23
+ Entry,
24
+ EntryStatus,
25
+ Graph,
26
+ SearchHit,
27
+ Stats,
28
+ StatsConcept,
29
+ StatsKind,
30
+ StatsLanguage,
31
+ StatsTotals,
32
+ TopKind,
33
+ WellKnown,
34
+ WellKnownRepo,
35
+ )
36
+
37
+ __version__ = "0.1.0"
38
+
39
+ __all__ = [
40
+ "__version__",
41
+ # clients
42
+ "Registry",
43
+ "AsyncRegistry",
44
+ # errors
45
+ "RegistryError",
46
+ "RegistryHTTPError",
47
+ "RegistryParseError",
48
+ # constants
49
+ "DEFAULT_REGISTRY_URL",
50
+ "ENV_VAR",
51
+ # types
52
+ "Entry",
53
+ "EntryStatus",
54
+ "Graph",
55
+ "SearchHit",
56
+ "Stats",
57
+ "StatsConcept",
58
+ "StatsKind",
59
+ "StatsLanguage",
60
+ "StatsTotals",
61
+ "TopKind",
62
+ "WellKnown",
63
+ "WellKnownRepo",
64
+ ]
@@ -0,0 +1,6 @@
1
+ """Allow ``python -m understand_quickly``."""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
@@ -0,0 +1,217 @@
1
+ """Asynchronous client for the understand-quickly registry, backed by ``httpx``."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import time
6
+ from typing import Any, Iterable, Optional
7
+
8
+ import httpx
9
+
10
+ from .client import (
11
+ DEFAULT_CACHE_TTL_SECONDS,
12
+ DEFAULT_TIMEOUT_SECONDS,
13
+ RegistryError,
14
+ RegistryHTTPError,
15
+ RegistryParseError,
16
+ USER_AGENT,
17
+ _decode_json,
18
+ _join,
19
+ _matches,
20
+ _normalize_repo_url,
21
+ _resolve_base_url,
22
+ )
23
+ from .types import Entry, Graph, Registry as RegistryDoc, SearchHit, Stats, WellKnown
24
+
25
+
26
+ class AsyncRegistry:
27
+ """Async client mirroring :class:`understand_quickly.Registry`.
28
+
29
+ Use as an async context manager so the underlying ``httpx.AsyncClient``
30
+ is closed cleanly::
31
+
32
+ async with AsyncRegistry() as reg:
33
+ entries = await reg.list(status="ok")
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ base_url: Optional[str] = None,
39
+ *,
40
+ cache_ttl: float = DEFAULT_CACHE_TTL_SECONDS,
41
+ timeout: float = DEFAULT_TIMEOUT_SECONDS,
42
+ client: Optional[httpx.AsyncClient] = None,
43
+ transport: Optional[httpx.AsyncBaseTransport] = None,
44
+ ) -> None:
45
+ self.base_url = _resolve_base_url(base_url)
46
+ self.cache_ttl = float(cache_ttl)
47
+ self.timeout = float(timeout)
48
+ self._cache: dict[str, tuple[float, Any]] = {}
49
+ self._owns_client = client is None
50
+ if client is not None:
51
+ self._client = client
52
+ else:
53
+ kwargs: dict[str, Any] = {
54
+ "timeout": self.timeout,
55
+ "headers": {"Accept": "application/json", "User-Agent": USER_AGENT},
56
+ }
57
+ if transport is not None:
58
+ kwargs["transport"] = transport
59
+ self._client = httpx.AsyncClient(**kwargs)
60
+
61
+ # ---- lifecycle -------------------------------------------------------
62
+
63
+ async def __aenter__(self) -> "AsyncRegistry":
64
+ return self
65
+
66
+ async def __aexit__(self, *exc_info: Any) -> None:
67
+ await self.aclose()
68
+
69
+ async def aclose(self) -> None:
70
+ """Close the underlying ``httpx.AsyncClient`` when we own it."""
71
+ if self._owns_client:
72
+ await self._client.aclose()
73
+
74
+ # ---- low-level fetch -------------------------------------------------
75
+
76
+ async def _fetch_json(self, url: str) -> Any:
77
+ now = time.monotonic()
78
+ if self.cache_ttl > 0:
79
+ cached = self._cache.get(url)
80
+ if cached is not None and (now - cached[0]) < self.cache_ttl:
81
+ return cached[1]
82
+ try:
83
+ resp = await self._client.get(url)
84
+ except httpx.HTTPError as exc:
85
+ raise RegistryError(f"GET {url} failed: {exc}") from exc
86
+ if resp.status_code < 200 or resp.status_code >= 300:
87
+ raise RegistryHTTPError(url, resp.status_code, resp.content)
88
+ data = _decode_json(url, resp.content)
89
+ if self.cache_ttl > 0:
90
+ self._cache[url] = (now, data)
91
+ return data
92
+
93
+ def clear_cache(self) -> None:
94
+ self._cache.clear()
95
+
96
+ # ---- documents -------------------------------------------------------
97
+
98
+ async def registry(self) -> RegistryDoc:
99
+ return await self._fetch_json(_join(self.base_url, "registry.json"))
100
+
101
+ async def well_known(self) -> WellKnown:
102
+ return await self._fetch_json(_join(self.base_url, ".well-known/repos.json"))
103
+
104
+ async def stats(self) -> Stats:
105
+ return await self._fetch_json(_join(self.base_url, "stats.json"))
106
+
107
+ # ---- high-level helpers ---------------------------------------------
108
+
109
+ async def list(
110
+ self,
111
+ *,
112
+ status: Optional[str] = None,
113
+ format: Optional[str] = None,
114
+ owner: Optional[str] = None,
115
+ tag: Optional[str] = None,
116
+ ) -> list[Entry]:
117
+ doc = await self.registry()
118
+ entries: Iterable[Entry] = doc.get("entries", []) or []
119
+ results: list[Entry] = []
120
+ for entry in entries:
121
+ if not _matches(entry, status=status, format=format, owner=owner):
122
+ continue
123
+ if tag is not None:
124
+ tags = entry.get("tags") or []
125
+ if tag not in tags:
126
+ continue
127
+ results.append(entry)
128
+ return results
129
+
130
+ async def get_entry(self, entry_id: str) -> Optional[Entry]:
131
+ for entry in await self.list():
132
+ if entry.get("id") == entry_id:
133
+ return entry
134
+ return None
135
+
136
+ async def get_graph(self, entry_id: str) -> Graph:
137
+ entry = await self.get_entry(entry_id)
138
+ if entry is None:
139
+ raise RegistryError(f"no entry with id={entry_id!r}")
140
+ graph_url = entry.get("graph_url")
141
+ if not graph_url:
142
+ raise RegistryError(f"entry {entry_id!r} has no graph_url")
143
+ return await self._fetch_json(graph_url)
144
+
145
+ async def find_graph_for_repo(self, repo: str) -> Optional[Entry]:
146
+ norm = _normalize_repo_url(repo)
147
+ if norm is None:
148
+ return None
149
+ owner, repo_name = norm
150
+ for entry in await self.list():
151
+ e_owner = (entry.get("owner") or "").lower()
152
+ e_repo = (entry.get("repo") or "").lower()
153
+ if e_owner == owner and e_repo == repo_name:
154
+ return entry
155
+ return None
156
+
157
+ async def search(self, query: str, *, scope: str = "all") -> list[SearchHit]:
158
+ if not query:
159
+ return []
160
+ q = query.lower()
161
+ hits: list[SearchHit] = []
162
+
163
+ if scope in ("all", "concepts"):
164
+ try:
165
+ stats = await self.stats()
166
+ except RegistryError:
167
+ stats = {}
168
+ entry_lookup: dict[str, Entry] = {}
169
+ try:
170
+ entries = await self.list()
171
+ entry_lookup = {e.get("id", ""): e for e in entries if e.get("id")}
172
+ except RegistryError:
173
+ entry_lookup = {}
174
+ for concept in stats.get("concepts", []) or []:
175
+ term = (concept.get("term") or "").lower()
176
+ if q in term:
177
+ samples = concept.get("samples") or []
178
+ for sample in samples:
179
+ hits.append(
180
+ {
181
+ "term": concept.get("term", ""),
182
+ "entry_id": sample,
183
+ "entry": entry_lookup.get(sample, {}),
184
+ "samples": list(samples),
185
+ "count": int(concept.get("entries", 0) or 0),
186
+ }
187
+ )
188
+
189
+ if scope in ("all", "entries"):
190
+ for entry in await self.list():
191
+ blob_parts: list[str] = []
192
+ for key in ("id", "description", "format"):
193
+ val = entry.get(key)
194
+ if isinstance(val, str):
195
+ blob_parts.append(val)
196
+ for key in ("tags", "languages"):
197
+ val = entry.get(key)
198
+ if isinstance(val, list):
199
+ blob_parts.extend(str(x) for x in val)
200
+ blob = "\n".join(blob_parts).lower()
201
+ if q in blob:
202
+ hits.append(
203
+ {
204
+ "term": query,
205
+ "entry_id": entry.get("id", ""),
206
+ "entry": entry,
207
+ }
208
+ )
209
+ return hits
210
+
211
+
212
+ __all__ = [
213
+ "AsyncRegistry",
214
+ "RegistryError",
215
+ "RegistryHTTPError",
216
+ "RegistryParseError",
217
+ ]
@@ -0,0 +1,160 @@
1
+ """``python -m understand_quickly`` — JSON-first command-line interface.
2
+
3
+ Subcommands:
4
+ - ``list`` List registry entries (filterable).
5
+ - ``get-graph`` Fetch a graph body by entry id.
6
+ - ``find`` Resolve a GitHub URL or ``owner/repo`` to its entry.
7
+ - ``search`` Cross-graph concept + entry search.
8
+ - ``stats`` Aggregate stats across the registry.
9
+
10
+ JSON output by default; ``--pretty`` adds indented formatting (and table
11
+ shaping for ``list``). Exit codes: ``0`` success, ``1`` not-found, ``2``
12
+ HTTP/parse error, ``64`` usage error.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import json
19
+ import sys
20
+ from typing import Any, Optional, Sequence
21
+
22
+ from . import __version__
23
+ from .client import Registry, RegistryError, RegistryHTTPError
24
+
25
+
26
+ EXIT_OK = 0
27
+ EXIT_NOT_FOUND = 1
28
+ EXIT_ERROR = 2
29
+ EXIT_USAGE = 64
30
+
31
+
32
+ def _emit(data: Any, pretty: bool, *, fp: Any = None) -> None:
33
+ out = fp or sys.stdout
34
+ if pretty and isinstance(data, list) and data and isinstance(data[0], dict):
35
+ # Pretty-print a list of dicts as a compact table for `list`.
36
+ keys = ["id", "format", "status", "last_synced"]
37
+ rows = [[str(item.get(k, "")) for k in keys] for item in data]
38
+ widths = [max(len(k), *(len(r[i]) for r in rows)) for i, k in enumerate(keys)]
39
+ header = " ".join(k.ljust(widths[i]) for i, k in enumerate(keys))
40
+ sep = " ".join("-" * w for w in widths)
41
+ print(header, file=out)
42
+ print(sep, file=out)
43
+ for row in rows:
44
+ print(" ".join(row[i].ljust(widths[i]) for i in range(len(keys))), file=out)
45
+ return
46
+ if pretty:
47
+ json.dump(data, out, indent=2, sort_keys=False)
48
+ out.write("\n")
49
+ else:
50
+ json.dump(data, out)
51
+ out.write("\n")
52
+
53
+
54
+ def _build_parser() -> argparse.ArgumentParser:
55
+ parser = argparse.ArgumentParser(
56
+ prog="understand-quickly",
57
+ description="Thin client for the understand-quickly registry of code-knowledge graphs.",
58
+ )
59
+ parser.add_argument("--version", action="version", version=f"understand-quickly {__version__}")
60
+ parser.add_argument(
61
+ "--registry",
62
+ dest="registry_url",
63
+ default=None,
64
+ help="Override the registry base URL (defaults to UNDERSTAND_QUICKLY_REGISTRY env var).",
65
+ )
66
+ parser.add_argument(
67
+ "--pretty", action="store_true", help="Pretty-print human-friendly output."
68
+ )
69
+ parser.add_argument(
70
+ "--timeout",
71
+ type=float,
72
+ default=30.0,
73
+ help="HTTP timeout in seconds (default: 30).",
74
+ )
75
+ parser.add_argument(
76
+ "--no-cache",
77
+ action="store_true",
78
+ help="Disable the in-memory TTL cache for this invocation.",
79
+ )
80
+
81
+ sub = parser.add_subparsers(dest="cmd", required=True, metavar="<command>")
82
+
83
+ p_list = sub.add_parser("list", help="List registry entries.")
84
+ p_list.add_argument("--status", default=None)
85
+ p_list.add_argument("--format", dest="fmt", default=None)
86
+ p_list.add_argument("--owner", default=None)
87
+ p_list.add_argument("--tag", default=None)
88
+
89
+ p_get = sub.add_parser("get-graph", help="Fetch the graph body for an entry id.")
90
+ p_get.add_argument("entry_id")
91
+
92
+ p_find = sub.add_parser("find", help="Find the entry for a GitHub URL or owner/repo slug.")
93
+ p_find.add_argument("repo")
94
+
95
+ p_search = sub.add_parser("search", help="Search entries and concepts.")
96
+ p_search.add_argument("query")
97
+ p_search.add_argument(
98
+ "--scope", choices=("all", "entries", "concepts"), default="all"
99
+ )
100
+
101
+ sub.add_parser("stats", help="Print the aggregate stats.json document.")
102
+
103
+ return parser
104
+
105
+
106
+ def main(argv: Optional[Sequence[str]] = None) -> int:
107
+ parser = _build_parser()
108
+ try:
109
+ args = parser.parse_args(argv)
110
+ except SystemExit as exc:
111
+ # argparse exits 2 on usage errors; remap to 64 for clarity.
112
+ return EXIT_USAGE if exc.code == 2 else int(exc.code or 0)
113
+
114
+ cache_ttl = 0.0 if args.no_cache else 60.0
115
+ try:
116
+ reg = Registry(args.registry_url, cache_ttl=cache_ttl, timeout=args.timeout)
117
+ except RegistryError as exc:
118
+ print(f"error: {exc}", file=sys.stderr)
119
+ return EXIT_USAGE
120
+
121
+ try:
122
+ if args.cmd == "list":
123
+ data = reg.list(status=args.status, format=args.fmt, owner=args.owner, tag=args.tag)
124
+ _emit(data, args.pretty)
125
+ return EXIT_OK
126
+ if args.cmd == "get-graph":
127
+ data = reg.get_graph(args.entry_id)
128
+ _emit(data, args.pretty)
129
+ return EXIT_OK
130
+ if args.cmd == "find":
131
+ entry = reg.find_graph_for_repo(args.repo)
132
+ if entry is None:
133
+ print(f"no entry matches {args.repo!r}", file=sys.stderr)
134
+ _emit(None, args.pretty)
135
+ return EXIT_NOT_FOUND
136
+ _emit(entry, args.pretty)
137
+ return EXIT_OK
138
+ if args.cmd == "search":
139
+ data = reg.search(args.query, scope=args.scope)
140
+ _emit(data, args.pretty)
141
+ return EXIT_OK
142
+ if args.cmd == "stats":
143
+ _emit(reg.stats(), args.pretty)
144
+ return EXIT_OK
145
+ except RegistryHTTPError as exc:
146
+ print(f"http error: {exc}", file=sys.stderr)
147
+ return EXIT_ERROR
148
+ except RegistryError as exc:
149
+ msg = str(exc)
150
+ print(f"error: {msg}", file=sys.stderr)
151
+ if msg.startswith("no entry"):
152
+ return EXIT_NOT_FOUND
153
+ return EXIT_ERROR
154
+
155
+ parser.print_help(sys.stderr)
156
+ return EXIT_USAGE
157
+
158
+
159
+ if __name__ == "__main__": # pragma: no cover — exercised via subprocess in tests
160
+ raise SystemExit(main())