whatwasit 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- whatwasit/__init__.py +3 -0
- whatwasit/brand.py +59 -0
- whatwasit/cli.py +216 -0
- whatwasit/config.py +82 -0
- whatwasit/config_loader.py +78 -0
- whatwasit/daemon.py +341 -0
- whatwasit/db.py +174 -0
- whatwasit/embedder.py +347 -0
- whatwasit/index.py +57 -0
- whatwasit/indexer.py +108 -0
- whatwasit/interfaces.py +62 -0
- whatwasit/models.py +191 -0
- whatwasit/output.py +144 -0
- whatwasit/parsers/__init__.py +1 -0
- whatwasit/parsers/atuin.py +96 -0
- whatwasit/parsers/base.py +65 -0
- whatwasit/parsers/bash.py +53 -0
- whatwasit/parsers/zsh.py +78 -0
- whatwasit/search.py +239 -0
- whatwasit/sessions.py +229 -0
- whatwasit/tui.py +393 -0
- whatwasit-0.1.0.dist-info/METADATA +258 -0
- whatwasit-0.1.0.dist-info/RECORD +26 -0
- whatwasit-0.1.0.dist-info/WHEEL +4 -0
- whatwasit-0.1.0.dist-info/entry_points.txt +2 -0
- whatwasit-0.1.0.dist-info/licenses/LICENSE +21 -0
whatwasit/__init__.py
ADDED
whatwasit/brand.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Single source of truth for product naming and on-disk identifiers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
APP_NAME = "whatwasit"
|
|
8
|
+
CLI_NAME = "whatwasit"
|
|
9
|
+
PYPI_NAME = "whatwasit"
|
|
10
|
+
GITHUB_REPO = "whatwasit"
|
|
11
|
+
|
|
12
|
+
# Pre-rename install used ``hist`` as the internal app id.
|
|
13
|
+
LEGACY_APP_NAME = "hist"
|
|
14
|
+
LEGACY_DB_FILENAME = "whatwasit.db"
|
|
15
|
+
LEGACY_SOCKET_FILENAME = "whatwasit.sock"
|
|
16
|
+
|
|
17
|
+
DB_FILENAME = f"{APP_NAME}.db"
|
|
18
|
+
SOCKET_FILENAME = f"{APP_NAME}.sock"
|
|
19
|
+
PID_FILENAME = "daemon.pid"
|
|
20
|
+
CONFIG_FILENAME = "config.toml"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def data_dir_name_candidates() -> tuple[str, ...]:
|
|
24
|
+
"""Preferred data directory names, newest first."""
|
|
25
|
+
return (APP_NAME, LEGACY_APP_NAME)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def config_dir_name_candidates() -> tuple[str, ...]:
|
|
29
|
+
"""Preferred config directory names, newest first."""
|
|
30
|
+
return (APP_NAME, LEGACY_APP_NAME)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def resolve_data_dir(xdg_data_home: Path) -> Path:
|
|
34
|
+
"""Pick data dir: prefer new name, fall back to legacy if indexed there."""
|
|
35
|
+
for name in data_dir_name_candidates():
|
|
36
|
+
candidate = xdg_data_home / name
|
|
37
|
+
if (candidate / DB_FILENAME).is_file() or (candidate / LEGACY_DB_FILENAME).is_file():
|
|
38
|
+
return candidate
|
|
39
|
+
return xdg_data_home / APP_NAME
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def resolve_config_file(xdg_config_home: Path) -> Path:
|
|
43
|
+
"""Pick config file path: prefer new, fall back to legacy if present."""
|
|
44
|
+
for name in config_dir_name_candidates():
|
|
45
|
+
path = xdg_config_home / name / CONFIG_FILENAME
|
|
46
|
+
if path.is_file():
|
|
47
|
+
return path
|
|
48
|
+
return xdg_config_home / APP_NAME / CONFIG_FILENAME
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def resolve_db_path(data_dir: Path) -> Path:
|
|
52
|
+
"""Return the SQLite path, preferring the new filename when both exist."""
|
|
53
|
+
primary = data_dir / DB_FILENAME
|
|
54
|
+
legacy = data_dir / LEGACY_DB_FILENAME
|
|
55
|
+
if primary.is_file():
|
|
56
|
+
return primary
|
|
57
|
+
if legacy.is_file():
|
|
58
|
+
return legacy
|
|
59
|
+
return primary
|
whatwasit/cli.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Command-line entry point for ``whatwasit``.
|
|
2
|
+
|
|
3
|
+
Two modes:
|
|
4
|
+
|
|
5
|
+
- ``whatwasit index [--window N] [--rebuild]`` builds/refreshes the on-disk index
|
|
6
|
+
from shell history.
|
|
7
|
+
- ``whatwasit <natural language query...>`` (the common case) searches the index
|
|
8
|
+
and renders the results with :mod:`whatwasit.output`.
|
|
9
|
+
|
|
10
|
+
Implemented with :mod:`argparse`; the query subcommand is detected purely by
|
|
11
|
+
position (``index`` is only ever a subcommand when it is the first token),
|
|
12
|
+
so a bare quoted natural-language query never collides with it unless the
|
|
13
|
+
query literally starts with the word ``index``.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import os
|
|
20
|
+
import sys
|
|
21
|
+
from typing import List, Optional, Sequence
|
|
22
|
+
|
|
23
|
+
from .config import Config
|
|
24
|
+
from .daemon import daemon_search, daemon_status, start_daemon, stop_daemon
|
|
25
|
+
from .embedder import is_model_cached
|
|
26
|
+
from .indexer import build_index_from_history
|
|
27
|
+
from .output import display_results
|
|
28
|
+
from .search import search
|
|
29
|
+
from .tui import run_repl
|
|
30
|
+
|
|
31
|
+
from .brand import CLI_NAME
|
|
32
|
+
|
|
33
|
+
PROG = CLI_NAME
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _configure_hf_hub_when_cached() -> None:
|
|
37
|
+
"""Avoid hub network/progress overhead when ONNX assets are already local."""
|
|
38
|
+
if is_model_cached(Config.default()):
|
|
39
|
+
os.environ.setdefault("HF_HUB_OFFLINE", "1")
|
|
40
|
+
os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
|
|
41
|
+
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _build_parser() -> argparse.ArgumentParser:
|
|
45
|
+
parser = argparse.ArgumentParser(
|
|
46
|
+
prog=PROG,
|
|
47
|
+
description="Local-first semantic search over your shell history.",
|
|
48
|
+
)
|
|
49
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
50
|
+
|
|
51
|
+
index_parser = subparsers.add_parser(
|
|
52
|
+
"index", help="Build or refresh the search index from shell history."
|
|
53
|
+
)
|
|
54
|
+
index_parser.add_argument(
|
|
55
|
+
"--window",
|
|
56
|
+
type=int,
|
|
57
|
+
default=None,
|
|
58
|
+
help="Session grouping window in seconds (default: config default).",
|
|
59
|
+
)
|
|
60
|
+
index_parser.add_argument(
|
|
61
|
+
"--rebuild",
|
|
62
|
+
action="store_true",
|
|
63
|
+
help="Force a full rebuild of the index from scratch.",
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
daemon_parser = subparsers.add_parser(
|
|
67
|
+
"daemon", help="Start, stop, or check the warm-query daemon."
|
|
68
|
+
)
|
|
69
|
+
daemon_parser.add_argument(
|
|
70
|
+
"action",
|
|
71
|
+
choices=("start", "stop", "status"),
|
|
72
|
+
help="Daemon control action.",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return parser
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _print_help(parser: argparse.ArgumentParser) -> None:
|
|
79
|
+
parser.print_help()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _run_index(args: argparse.Namespace) -> int:
|
|
83
|
+
config = Config.default()
|
|
84
|
+
if args.window is not None:
|
|
85
|
+
config.session_window_seconds = args.window
|
|
86
|
+
|
|
87
|
+
stats = build_index_from_history(config)
|
|
88
|
+
print(
|
|
89
|
+
f"Indexed {stats.n_commands} commands into {stats.n_sessions} sessions "
|
|
90
|
+
f"in {stats.elapsed_seconds:.2f}s."
|
|
91
|
+
)
|
|
92
|
+
return 0
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _run_repl() -> int:
|
|
96
|
+
"""Launch the persistent interactive REPL."""
|
|
97
|
+
config = Config.default()
|
|
98
|
+
|
|
99
|
+
if not config.db_path.exists():
|
|
100
|
+
print(
|
|
101
|
+
"No index found. Run `whatwasit index` first to build a search index "
|
|
102
|
+
"from your shell history."
|
|
103
|
+
)
|
|
104
|
+
return 1
|
|
105
|
+
|
|
106
|
+
def do_search(query: str):
|
|
107
|
+
results = None
|
|
108
|
+
if config.use_daemon:
|
|
109
|
+
results = daemon_search(config, query)
|
|
110
|
+
if results is None:
|
|
111
|
+
results = search(config, query)
|
|
112
|
+
return results
|
|
113
|
+
|
|
114
|
+
run_repl(
|
|
115
|
+
do_search,
|
|
116
|
+
page_size=config.tui_page_size,
|
|
117
|
+
low_confidence_threshold=config.low_confidence_threshold,
|
|
118
|
+
)
|
|
119
|
+
return 0
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _run_query(
|
|
123
|
+
query: str,
|
|
124
|
+
top_k: Optional[int],
|
|
125
|
+
*,
|
|
126
|
+
force_plain: bool = False,
|
|
127
|
+
) -> int:
|
|
128
|
+
config = Config.default()
|
|
129
|
+
if top_k is not None:
|
|
130
|
+
config.top_k = top_k
|
|
131
|
+
|
|
132
|
+
if not config.db_path.exists():
|
|
133
|
+
print(
|
|
134
|
+
"No index found. Run `whatwasit index` first to build a search index "
|
|
135
|
+
"from your shell history."
|
|
136
|
+
)
|
|
137
|
+
return 1
|
|
138
|
+
|
|
139
|
+
results = None
|
|
140
|
+
if config.use_daemon:
|
|
141
|
+
results = daemon_search(config, query, k=top_k)
|
|
142
|
+
if results is None:
|
|
143
|
+
results = search(config, query, k=top_k)
|
|
144
|
+
display_results(results, query, config, force_plain=force_plain)
|
|
145
|
+
return 0
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _run_daemon(action: str) -> int:
|
|
149
|
+
if action == "start":
|
|
150
|
+
return start_daemon()
|
|
151
|
+
if action == "stop":
|
|
152
|
+
return stop_daemon()
|
|
153
|
+
return daemon_status()
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def main(argv: Optional[Sequence[str]] = None) -> int:
|
|
157
|
+
_configure_hf_hub_when_cached()
|
|
158
|
+
|
|
159
|
+
if argv is None:
|
|
160
|
+
argv = sys.argv[1:]
|
|
161
|
+
argv = list(argv)
|
|
162
|
+
|
|
163
|
+
parser = _build_parser()
|
|
164
|
+
|
|
165
|
+
if not argv:
|
|
166
|
+
return _run_repl()
|
|
167
|
+
|
|
168
|
+
if argv[0] in ("-h", "--help"):
|
|
169
|
+
_print_help(parser)
|
|
170
|
+
return 0
|
|
171
|
+
|
|
172
|
+
if argv[0] == "index":
|
|
173
|
+
args = parser.parse_args(argv)
|
|
174
|
+
return _run_index(args)
|
|
175
|
+
|
|
176
|
+
if argv[0] == "daemon":
|
|
177
|
+
args = parser.parse_args(argv)
|
|
178
|
+
return _run_daemon(args.action)
|
|
179
|
+
|
|
180
|
+
# Anything else is a natural-language query. Pull out -k/--top-k,
|
|
181
|
+
# --plain/--headless (which may appear anywhere) and treat the remaining
|
|
182
|
+
# tokens as the query text.
|
|
183
|
+
top_k: Optional[int] = None
|
|
184
|
+
force_plain = False
|
|
185
|
+
query_tokens: List[str] = []
|
|
186
|
+
i = 0
|
|
187
|
+
while i < len(argv):
|
|
188
|
+
token = argv[i]
|
|
189
|
+
if token in ("-k", "--top-k"):
|
|
190
|
+
if i + 1 >= len(argv):
|
|
191
|
+
print(f"{PROG}: argument {token}: expected one argument", file=sys.stderr)
|
|
192
|
+
return 2
|
|
193
|
+
try:
|
|
194
|
+
top_k = int(argv[i + 1])
|
|
195
|
+
except ValueError:
|
|
196
|
+
print(f"{PROG}: argument {token}: invalid int value: {argv[i + 1]!r}", file=sys.stderr)
|
|
197
|
+
return 2
|
|
198
|
+
i += 2
|
|
199
|
+
continue
|
|
200
|
+
if token in ("--plain", "--headless"):
|
|
201
|
+
force_plain = True
|
|
202
|
+
i += 1
|
|
203
|
+
continue
|
|
204
|
+
query_tokens.append(token)
|
|
205
|
+
i += 1
|
|
206
|
+
|
|
207
|
+
query = " ".join(query_tokens).strip()
|
|
208
|
+
if not query:
|
|
209
|
+
_print_help(parser)
|
|
210
|
+
return 0
|
|
211
|
+
|
|
212
|
+
return _run_query(query, top_k, force_plain=force_plain)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == "__main__":
|
|
216
|
+
sys.exit(main())
|
whatwasit/config.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Central configuration for whatwasit.
|
|
2
|
+
|
|
3
|
+
All tunable values (session window, model name, storage paths, schema version)
|
|
4
|
+
live here so nothing is scattered across the codebase as magic constants.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field, asdict
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from .brand import resolve_data_dir, resolve_db_path
|
|
14
|
+
|
|
15
|
+
SCHEMA_VERSION = 1
|
|
16
|
+
"""Bumped whenever the on-disk SQLite schema changes. Stored in the ``meta`` table."""
|
|
17
|
+
|
|
18
|
+
DEFAULT_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
|
19
|
+
DEFAULT_EMBEDDING_DIM = 384
|
|
20
|
+
DEFAULT_LOW_CONFIDENCE_THRESHOLD = 0.40
|
|
21
|
+
DEFAULT_SESSION_WINDOW_SECONDS = 300 # 5 minutes
|
|
22
|
+
DEFAULT_TOP_K = 10
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _xdg_data_home() -> Path:
|
|
26
|
+
env = os.environ.get("XDG_DATA_HOME")
|
|
27
|
+
if env:
|
|
28
|
+
return Path(env)
|
|
29
|
+
return Path.home() / ".local" / "share"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Config:
|
|
34
|
+
"""Runtime configuration. Construct via :meth:`default` then override fields."""
|
|
35
|
+
|
|
36
|
+
# Storage
|
|
37
|
+
data_dir: Path = field(default_factory=lambda: resolve_data_dir(_xdg_data_home()))
|
|
38
|
+
|
|
39
|
+
# Session grouping
|
|
40
|
+
session_window_seconds: int = DEFAULT_SESSION_WINDOW_SECONDS
|
|
41
|
+
split_on_cwd_change: bool = True
|
|
42
|
+
|
|
43
|
+
# Embedding
|
|
44
|
+
model_name: str = DEFAULT_MODEL_NAME
|
|
45
|
+
embedding_dim: int = DEFAULT_EMBEDDING_DIM
|
|
46
|
+
|
|
47
|
+
# Search
|
|
48
|
+
top_k: int = DEFAULT_TOP_K
|
|
49
|
+
hybrid_search: bool = True
|
|
50
|
+
low_confidence_threshold: float = DEFAULT_LOW_CONFIDENCE_THRESHOLD
|
|
51
|
+
|
|
52
|
+
# Output / TUI
|
|
53
|
+
output_mode: str = "tui" # "tui" | "plain"
|
|
54
|
+
tui_page_size: int = 5
|
|
55
|
+
use_daemon: bool = True
|
|
56
|
+
|
|
57
|
+
# Schema (read-only constant exposed for convenience)
|
|
58
|
+
schema_version: int = SCHEMA_VERSION
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def default(cls) -> "Config":
|
|
62
|
+
from .config_loader import apply_file_overrides
|
|
63
|
+
|
|
64
|
+
return apply_file_overrides(cls())
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def db_path(self) -> Path:
|
|
68
|
+
return resolve_db_path(self.data_dir)
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def index_path(self) -> Path:
|
|
72
|
+
return self.data_dir / "index.usearch"
|
|
73
|
+
|
|
74
|
+
def ensure_data_dir(self) -> Path:
|
|
75
|
+
"""Create the data directory if it does not yet exist."""
|
|
76
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
77
|
+
return self.data_dir
|
|
78
|
+
|
|
79
|
+
def to_dict(self) -> dict:
|
|
80
|
+
d = asdict(self)
|
|
81
|
+
d["data_dir"] = str(self.data_dir)
|
|
82
|
+
return d
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Load user overrides from ``~/.config/whatwasit/config.toml`` (XDG).
|
|
2
|
+
|
|
3
|
+
Uses :mod:`tomllib` on Python 3.11+ and :mod:`tomli` on 3.9–3.10.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Mapping
|
|
12
|
+
|
|
13
|
+
if sys.version_info >= (3, 11):
|
|
14
|
+
import tomllib
|
|
15
|
+
else:
|
|
16
|
+
import tomli as tomllib # type: ignore[no-redef]
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from .config import Config
|
|
20
|
+
|
|
21
|
+
_VALID_OUTPUT_MODES = frozenset({"tui", "plain"})
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _xdg_config_home() -> Path:
|
|
25
|
+
env = os.environ.get("XDG_CONFIG_HOME")
|
|
26
|
+
if env:
|
|
27
|
+
return Path(env)
|
|
28
|
+
return Path.home() / ".config"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
from .brand import resolve_config_file
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def config_file_path() -> Path:
|
|
35
|
+
"""Return the XDG path for the whatwasit config file."""
|
|
36
|
+
return resolve_config_file(_xdg_config_home())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def load_config_file() -> Mapping[str, Any]:
|
|
40
|
+
"""Parse the config file if it exists; otherwise return an empty mapping."""
|
|
41
|
+
path = config_file_path()
|
|
42
|
+
if not path.is_file():
|
|
43
|
+
return {}
|
|
44
|
+
with path.open("rb") as handle:
|
|
45
|
+
return tomllib.load(handle)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def apply_file_overrides(config: "Config") -> "Config":
|
|
49
|
+
"""Apply values from the on-disk config file onto *config*.
|
|
50
|
+
|
|
51
|
+
Unknown keys are ignored. Invalid ``output_mode`` values are skipped so
|
|
52
|
+
defaults remain in effect.
|
|
53
|
+
"""
|
|
54
|
+
data = load_config_file()
|
|
55
|
+
if not data:
|
|
56
|
+
return config
|
|
57
|
+
|
|
58
|
+
if "output_mode" in data:
|
|
59
|
+
mode = str(data["output_mode"]).lower()
|
|
60
|
+
if mode in _VALID_OUTPUT_MODES:
|
|
61
|
+
config.output_mode = mode
|
|
62
|
+
|
|
63
|
+
if "tui_page_size" in data:
|
|
64
|
+
try:
|
|
65
|
+
config.tui_page_size = int(data["tui_page_size"])
|
|
66
|
+
except (TypeError, ValueError):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
if "low_confidence_threshold" in data:
|
|
70
|
+
try:
|
|
71
|
+
config.low_confidence_threshold = float(data["low_confidence_threshold"])
|
|
72
|
+
except (TypeError, ValueError):
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
if "use_daemon" in data:
|
|
76
|
+
config.use_daemon = bool(data["use_daemon"])
|
|
77
|
+
|
|
78
|
+
return config
|