understand-quickly 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- understand_quickly/__init__.py +64 -0
- understand_quickly/__main__.py +6 -0
- understand_quickly/aclient.py +217 -0
- understand_quickly/cli.py +160 -0
- understand_quickly/client.py +300 -0
- understand_quickly/types.py +124 -0
- understand_quickly-0.1.0.dist-info/METADATA +346 -0
- understand_quickly-0.1.0.dist-info/RECORD +11 -0
- understand_quickly-0.1.0.dist-info/WHEEL +4 -0
- understand_quickly-0.1.0.dist-info/entry_points.txt +2 -0
- understand_quickly-0.1.0.dist-info/licenses/LICENSE +215 -0
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""understand-quickly Python SDK.
|
|
2
|
+
|
|
3
|
+
A thin client for the public registry of code-knowledge graphs at
|
|
4
|
+
https://looptech-ai.github.io/understand-quickly/.
|
|
5
|
+
|
|
6
|
+
>>> from understand_quickly import Registry
|
|
7
|
+
>>> reg = Registry()
|
|
8
|
+
>>> entries = reg.list(status="ok")
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from .aclient import AsyncRegistry
|
|
14
|
+
from .client import (
|
|
15
|
+
DEFAULT_REGISTRY_URL,
|
|
16
|
+
ENV_VAR,
|
|
17
|
+
Registry,
|
|
18
|
+
RegistryError,
|
|
19
|
+
RegistryHTTPError,
|
|
20
|
+
RegistryParseError,
|
|
21
|
+
)
|
|
22
|
+
from .types import (
|
|
23
|
+
Entry,
|
|
24
|
+
EntryStatus,
|
|
25
|
+
Graph,
|
|
26
|
+
SearchHit,
|
|
27
|
+
Stats,
|
|
28
|
+
StatsConcept,
|
|
29
|
+
StatsKind,
|
|
30
|
+
StatsLanguage,
|
|
31
|
+
StatsTotals,
|
|
32
|
+
TopKind,
|
|
33
|
+
WellKnown,
|
|
34
|
+
WellKnownRepo,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
__version__ = "0.1.0"
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"__version__",
|
|
41
|
+
# clients
|
|
42
|
+
"Registry",
|
|
43
|
+
"AsyncRegistry",
|
|
44
|
+
# errors
|
|
45
|
+
"RegistryError",
|
|
46
|
+
"RegistryHTTPError",
|
|
47
|
+
"RegistryParseError",
|
|
48
|
+
# constants
|
|
49
|
+
"DEFAULT_REGISTRY_URL",
|
|
50
|
+
"ENV_VAR",
|
|
51
|
+
# types
|
|
52
|
+
"Entry",
|
|
53
|
+
"EntryStatus",
|
|
54
|
+
"Graph",
|
|
55
|
+
"SearchHit",
|
|
56
|
+
"Stats",
|
|
57
|
+
"StatsConcept",
|
|
58
|
+
"StatsKind",
|
|
59
|
+
"StatsLanguage",
|
|
60
|
+
"StatsTotals",
|
|
61
|
+
"TopKind",
|
|
62
|
+
"WellKnown",
|
|
63
|
+
"WellKnownRepo",
|
|
64
|
+
]
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""Asynchronous client for the understand-quickly registry, backed by ``httpx``."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import time
|
|
6
|
+
from typing import Any, Iterable, Optional
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
from .client import (
|
|
11
|
+
DEFAULT_CACHE_TTL_SECONDS,
|
|
12
|
+
DEFAULT_TIMEOUT_SECONDS,
|
|
13
|
+
RegistryError,
|
|
14
|
+
RegistryHTTPError,
|
|
15
|
+
RegistryParseError,
|
|
16
|
+
USER_AGENT,
|
|
17
|
+
_decode_json,
|
|
18
|
+
_join,
|
|
19
|
+
_matches,
|
|
20
|
+
_normalize_repo_url,
|
|
21
|
+
_resolve_base_url,
|
|
22
|
+
)
|
|
23
|
+
from .types import Entry, Graph, Registry as RegistryDoc, SearchHit, Stats, WellKnown
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AsyncRegistry:
|
|
27
|
+
"""Async client mirroring :class:`understand_quickly.Registry`.
|
|
28
|
+
|
|
29
|
+
Use as an async context manager so the underlying ``httpx.AsyncClient``
|
|
30
|
+
is closed cleanly::
|
|
31
|
+
|
|
32
|
+
async with AsyncRegistry() as reg:
|
|
33
|
+
entries = await reg.list(status="ok")
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
base_url: Optional[str] = None,
|
|
39
|
+
*,
|
|
40
|
+
cache_ttl: float = DEFAULT_CACHE_TTL_SECONDS,
|
|
41
|
+
timeout: float = DEFAULT_TIMEOUT_SECONDS,
|
|
42
|
+
client: Optional[httpx.AsyncClient] = None,
|
|
43
|
+
transport: Optional[httpx.AsyncBaseTransport] = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
self.base_url = _resolve_base_url(base_url)
|
|
46
|
+
self.cache_ttl = float(cache_ttl)
|
|
47
|
+
self.timeout = float(timeout)
|
|
48
|
+
self._cache: dict[str, tuple[float, Any]] = {}
|
|
49
|
+
self._owns_client = client is None
|
|
50
|
+
if client is not None:
|
|
51
|
+
self._client = client
|
|
52
|
+
else:
|
|
53
|
+
kwargs: dict[str, Any] = {
|
|
54
|
+
"timeout": self.timeout,
|
|
55
|
+
"headers": {"Accept": "application/json", "User-Agent": USER_AGENT},
|
|
56
|
+
}
|
|
57
|
+
if transport is not None:
|
|
58
|
+
kwargs["transport"] = transport
|
|
59
|
+
self._client = httpx.AsyncClient(**kwargs)
|
|
60
|
+
|
|
61
|
+
# ---- lifecycle -------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
async def __aenter__(self) -> "AsyncRegistry":
|
|
64
|
+
return self
|
|
65
|
+
|
|
66
|
+
async def __aexit__(self, *exc_info: Any) -> None:
|
|
67
|
+
await self.aclose()
|
|
68
|
+
|
|
69
|
+
async def aclose(self) -> None:
|
|
70
|
+
"""Close the underlying ``httpx.AsyncClient`` when we own it."""
|
|
71
|
+
if self._owns_client:
|
|
72
|
+
await self._client.aclose()
|
|
73
|
+
|
|
74
|
+
# ---- low-level fetch -------------------------------------------------
|
|
75
|
+
|
|
76
|
+
async def _fetch_json(self, url: str) -> Any:
|
|
77
|
+
now = time.monotonic()
|
|
78
|
+
if self.cache_ttl > 0:
|
|
79
|
+
cached = self._cache.get(url)
|
|
80
|
+
if cached is not None and (now - cached[0]) < self.cache_ttl:
|
|
81
|
+
return cached[1]
|
|
82
|
+
try:
|
|
83
|
+
resp = await self._client.get(url)
|
|
84
|
+
except httpx.HTTPError as exc:
|
|
85
|
+
raise RegistryError(f"GET {url} failed: {exc}") from exc
|
|
86
|
+
if resp.status_code < 200 or resp.status_code >= 300:
|
|
87
|
+
raise RegistryHTTPError(url, resp.status_code, resp.content)
|
|
88
|
+
data = _decode_json(url, resp.content)
|
|
89
|
+
if self.cache_ttl > 0:
|
|
90
|
+
self._cache[url] = (now, data)
|
|
91
|
+
return data
|
|
92
|
+
|
|
93
|
+
def clear_cache(self) -> None:
|
|
94
|
+
self._cache.clear()
|
|
95
|
+
|
|
96
|
+
# ---- documents -------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
async def registry(self) -> RegistryDoc:
|
|
99
|
+
return await self._fetch_json(_join(self.base_url, "registry.json"))
|
|
100
|
+
|
|
101
|
+
async def well_known(self) -> WellKnown:
|
|
102
|
+
return await self._fetch_json(_join(self.base_url, ".well-known/repos.json"))
|
|
103
|
+
|
|
104
|
+
async def stats(self) -> Stats:
|
|
105
|
+
return await self._fetch_json(_join(self.base_url, "stats.json"))
|
|
106
|
+
|
|
107
|
+
# ---- high-level helpers ---------------------------------------------
|
|
108
|
+
|
|
109
|
+
async def list(
|
|
110
|
+
self,
|
|
111
|
+
*,
|
|
112
|
+
status: Optional[str] = None,
|
|
113
|
+
format: Optional[str] = None,
|
|
114
|
+
owner: Optional[str] = None,
|
|
115
|
+
tag: Optional[str] = None,
|
|
116
|
+
) -> list[Entry]:
|
|
117
|
+
doc = await self.registry()
|
|
118
|
+
entries: Iterable[Entry] = doc.get("entries", []) or []
|
|
119
|
+
results: list[Entry] = []
|
|
120
|
+
for entry in entries:
|
|
121
|
+
if not _matches(entry, status=status, format=format, owner=owner):
|
|
122
|
+
continue
|
|
123
|
+
if tag is not None:
|
|
124
|
+
tags = entry.get("tags") or []
|
|
125
|
+
if tag not in tags:
|
|
126
|
+
continue
|
|
127
|
+
results.append(entry)
|
|
128
|
+
return results
|
|
129
|
+
|
|
130
|
+
async def get_entry(self, entry_id: str) -> Optional[Entry]:
|
|
131
|
+
for entry in await self.list():
|
|
132
|
+
if entry.get("id") == entry_id:
|
|
133
|
+
return entry
|
|
134
|
+
return None
|
|
135
|
+
|
|
136
|
+
async def get_graph(self, entry_id: str) -> Graph:
|
|
137
|
+
entry = await self.get_entry(entry_id)
|
|
138
|
+
if entry is None:
|
|
139
|
+
raise RegistryError(f"no entry with id={entry_id!r}")
|
|
140
|
+
graph_url = entry.get("graph_url")
|
|
141
|
+
if not graph_url:
|
|
142
|
+
raise RegistryError(f"entry {entry_id!r} has no graph_url")
|
|
143
|
+
return await self._fetch_json(graph_url)
|
|
144
|
+
|
|
145
|
+
async def find_graph_for_repo(self, repo: str) -> Optional[Entry]:
|
|
146
|
+
norm = _normalize_repo_url(repo)
|
|
147
|
+
if norm is None:
|
|
148
|
+
return None
|
|
149
|
+
owner, repo_name = norm
|
|
150
|
+
for entry in await self.list():
|
|
151
|
+
e_owner = (entry.get("owner") or "").lower()
|
|
152
|
+
e_repo = (entry.get("repo") or "").lower()
|
|
153
|
+
if e_owner == owner and e_repo == repo_name:
|
|
154
|
+
return entry
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
async def search(self, query: str, *, scope: str = "all") -> list[SearchHit]:
|
|
158
|
+
if not query:
|
|
159
|
+
return []
|
|
160
|
+
q = query.lower()
|
|
161
|
+
hits: list[SearchHit] = []
|
|
162
|
+
|
|
163
|
+
if scope in ("all", "concepts"):
|
|
164
|
+
try:
|
|
165
|
+
stats = await self.stats()
|
|
166
|
+
except RegistryError:
|
|
167
|
+
stats = {}
|
|
168
|
+
entry_lookup: dict[str, Entry] = {}
|
|
169
|
+
try:
|
|
170
|
+
entries = await self.list()
|
|
171
|
+
entry_lookup = {e.get("id", ""): e for e in entries if e.get("id")}
|
|
172
|
+
except RegistryError:
|
|
173
|
+
entry_lookup = {}
|
|
174
|
+
for concept in stats.get("concepts", []) or []:
|
|
175
|
+
term = (concept.get("term") or "").lower()
|
|
176
|
+
if q in term:
|
|
177
|
+
samples = concept.get("samples") or []
|
|
178
|
+
for sample in samples:
|
|
179
|
+
hits.append(
|
|
180
|
+
{
|
|
181
|
+
"term": concept.get("term", ""),
|
|
182
|
+
"entry_id": sample,
|
|
183
|
+
"entry": entry_lookup.get(sample, {}),
|
|
184
|
+
"samples": list(samples),
|
|
185
|
+
"count": int(concept.get("entries", 0) or 0),
|
|
186
|
+
}
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if scope in ("all", "entries"):
|
|
190
|
+
for entry in await self.list():
|
|
191
|
+
blob_parts: list[str] = []
|
|
192
|
+
for key in ("id", "description", "format"):
|
|
193
|
+
val = entry.get(key)
|
|
194
|
+
if isinstance(val, str):
|
|
195
|
+
blob_parts.append(val)
|
|
196
|
+
for key in ("tags", "languages"):
|
|
197
|
+
val = entry.get(key)
|
|
198
|
+
if isinstance(val, list):
|
|
199
|
+
blob_parts.extend(str(x) for x in val)
|
|
200
|
+
blob = "\n".join(blob_parts).lower()
|
|
201
|
+
if q in blob:
|
|
202
|
+
hits.append(
|
|
203
|
+
{
|
|
204
|
+
"term": query,
|
|
205
|
+
"entry_id": entry.get("id", ""),
|
|
206
|
+
"entry": entry,
|
|
207
|
+
}
|
|
208
|
+
)
|
|
209
|
+
return hits
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
__all__ = [
|
|
213
|
+
"AsyncRegistry",
|
|
214
|
+
"RegistryError",
|
|
215
|
+
"RegistryHTTPError",
|
|
216
|
+
"RegistryParseError",
|
|
217
|
+
]
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""``python -m understand_quickly`` — JSON-first command-line interface.
|
|
2
|
+
|
|
3
|
+
Subcommands:
|
|
4
|
+
- ``list`` List registry entries (filterable).
|
|
5
|
+
- ``get-graph`` Fetch a graph body by entry id.
|
|
6
|
+
- ``find`` Resolve a GitHub URL or ``owner/repo`` to its entry.
|
|
7
|
+
- ``search`` Cross-graph concept + entry search.
|
|
8
|
+
- ``stats`` Aggregate stats across the registry.
|
|
9
|
+
|
|
10
|
+
JSON output by default; ``--pretty`` adds indented formatting (and table
|
|
11
|
+
shaping for ``list``). Exit codes: ``0`` success, ``1`` not-found, ``2``
|
|
12
|
+
HTTP/parse error, ``64`` usage error.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import json
|
|
19
|
+
import sys
|
|
20
|
+
from typing import Any, Optional, Sequence
|
|
21
|
+
|
|
22
|
+
from . import __version__
|
|
23
|
+
from .client import Registry, RegistryError, RegistryHTTPError
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
EXIT_OK = 0
|
|
27
|
+
EXIT_NOT_FOUND = 1
|
|
28
|
+
EXIT_ERROR = 2
|
|
29
|
+
EXIT_USAGE = 64
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _emit(data: Any, pretty: bool, *, fp: Any = None) -> None:
|
|
33
|
+
out = fp or sys.stdout
|
|
34
|
+
if pretty and isinstance(data, list) and data and isinstance(data[0], dict):
|
|
35
|
+
# Pretty-print a list of dicts as a compact table for `list`.
|
|
36
|
+
keys = ["id", "format", "status", "last_synced"]
|
|
37
|
+
rows = [[str(item.get(k, "")) for k in keys] for item in data]
|
|
38
|
+
widths = [max(len(k), *(len(r[i]) for r in rows)) for i, k in enumerate(keys)]
|
|
39
|
+
header = " ".join(k.ljust(widths[i]) for i, k in enumerate(keys))
|
|
40
|
+
sep = " ".join("-" * w for w in widths)
|
|
41
|
+
print(header, file=out)
|
|
42
|
+
print(sep, file=out)
|
|
43
|
+
for row in rows:
|
|
44
|
+
print(" ".join(row[i].ljust(widths[i]) for i in range(len(keys))), file=out)
|
|
45
|
+
return
|
|
46
|
+
if pretty:
|
|
47
|
+
json.dump(data, out, indent=2, sort_keys=False)
|
|
48
|
+
out.write("\n")
|
|
49
|
+
else:
|
|
50
|
+
json.dump(data, out)
|
|
51
|
+
out.write("\n")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
55
|
+
parser = argparse.ArgumentParser(
|
|
56
|
+
prog="understand-quickly",
|
|
57
|
+
description="Thin client for the understand-quickly registry of code-knowledge graphs.",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument("--version", action="version", version=f"understand-quickly {__version__}")
|
|
60
|
+
parser.add_argument(
|
|
61
|
+
"--registry",
|
|
62
|
+
dest="registry_url",
|
|
63
|
+
default=None,
|
|
64
|
+
help="Override the registry base URL (defaults to UNDERSTAND_QUICKLY_REGISTRY env var).",
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--pretty", action="store_true", help="Pretty-print human-friendly output."
|
|
68
|
+
)
|
|
69
|
+
parser.add_argument(
|
|
70
|
+
"--timeout",
|
|
71
|
+
type=float,
|
|
72
|
+
default=30.0,
|
|
73
|
+
help="HTTP timeout in seconds (default: 30).",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--no-cache",
|
|
77
|
+
action="store_true",
|
|
78
|
+
help="Disable the in-memory TTL cache for this invocation.",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
sub = parser.add_subparsers(dest="cmd", required=True, metavar="<command>")
|
|
82
|
+
|
|
83
|
+
p_list = sub.add_parser("list", help="List registry entries.")
|
|
84
|
+
p_list.add_argument("--status", default=None)
|
|
85
|
+
p_list.add_argument("--format", dest="fmt", default=None)
|
|
86
|
+
p_list.add_argument("--owner", default=None)
|
|
87
|
+
p_list.add_argument("--tag", default=None)
|
|
88
|
+
|
|
89
|
+
p_get = sub.add_parser("get-graph", help="Fetch the graph body for an entry id.")
|
|
90
|
+
p_get.add_argument("entry_id")
|
|
91
|
+
|
|
92
|
+
p_find = sub.add_parser("find", help="Find the entry for a GitHub URL or owner/repo slug.")
|
|
93
|
+
p_find.add_argument("repo")
|
|
94
|
+
|
|
95
|
+
p_search = sub.add_parser("search", help="Search entries and concepts.")
|
|
96
|
+
p_search.add_argument("query")
|
|
97
|
+
p_search.add_argument(
|
|
98
|
+
"--scope", choices=("all", "entries", "concepts"), default="all"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
sub.add_parser("stats", help="Print the aggregate stats.json document.")
|
|
102
|
+
|
|
103
|
+
return parser
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
107
|
+
parser = _build_parser()
|
|
108
|
+
try:
|
|
109
|
+
args = parser.parse_args(argv)
|
|
110
|
+
except SystemExit as exc:
|
|
111
|
+
# argparse exits 2 on usage errors; remap to 64 for clarity.
|
|
112
|
+
return EXIT_USAGE if exc.code == 2 else int(exc.code or 0)
|
|
113
|
+
|
|
114
|
+
cache_ttl = 0.0 if args.no_cache else 60.0
|
|
115
|
+
try:
|
|
116
|
+
reg = Registry(args.registry_url, cache_ttl=cache_ttl, timeout=args.timeout)
|
|
117
|
+
except RegistryError as exc:
|
|
118
|
+
print(f"error: {exc}", file=sys.stderr)
|
|
119
|
+
return EXIT_USAGE
|
|
120
|
+
|
|
121
|
+
try:
|
|
122
|
+
if args.cmd == "list":
|
|
123
|
+
data = reg.list(status=args.status, format=args.fmt, owner=args.owner, tag=args.tag)
|
|
124
|
+
_emit(data, args.pretty)
|
|
125
|
+
return EXIT_OK
|
|
126
|
+
if args.cmd == "get-graph":
|
|
127
|
+
data = reg.get_graph(args.entry_id)
|
|
128
|
+
_emit(data, args.pretty)
|
|
129
|
+
return EXIT_OK
|
|
130
|
+
if args.cmd == "find":
|
|
131
|
+
entry = reg.find_graph_for_repo(args.repo)
|
|
132
|
+
if entry is None:
|
|
133
|
+
print(f"no entry matches {args.repo!r}", file=sys.stderr)
|
|
134
|
+
_emit(None, args.pretty)
|
|
135
|
+
return EXIT_NOT_FOUND
|
|
136
|
+
_emit(entry, args.pretty)
|
|
137
|
+
return EXIT_OK
|
|
138
|
+
if args.cmd == "search":
|
|
139
|
+
data = reg.search(args.query, scope=args.scope)
|
|
140
|
+
_emit(data, args.pretty)
|
|
141
|
+
return EXIT_OK
|
|
142
|
+
if args.cmd == "stats":
|
|
143
|
+
_emit(reg.stats(), args.pretty)
|
|
144
|
+
return EXIT_OK
|
|
145
|
+
except RegistryHTTPError as exc:
|
|
146
|
+
print(f"http error: {exc}", file=sys.stderr)
|
|
147
|
+
return EXIT_ERROR
|
|
148
|
+
except RegistryError as exc:
|
|
149
|
+
msg = str(exc)
|
|
150
|
+
print(f"error: {msg}", file=sys.stderr)
|
|
151
|
+
if msg.startswith("no entry"):
|
|
152
|
+
return EXIT_NOT_FOUND
|
|
153
|
+
return EXIT_ERROR
|
|
154
|
+
|
|
155
|
+
parser.print_help(sys.stderr)
|
|
156
|
+
return EXIT_USAGE
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
if __name__ == "__main__": # pragma: no cover — exercised via subprocess in tests
|
|
160
|
+
raise SystemExit(main())
|