whatwasit 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
whatwasit/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """whatwasit - local-first semantic search for your shell history."""
2
+
3
+ __version__ = "0.1.0"
whatwasit/brand.py ADDED
@@ -0,0 +1,59 @@
1
+ """Single source of truth for product naming and on-disk identifiers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ APP_NAME = "whatwasit"
8
+ CLI_NAME = "whatwasit"
9
+ PYPI_NAME = "whatwasit"
10
+ GITHUB_REPO = "whatwasit"
11
+
12
+ # Pre-rename install used ``hist`` as the internal app id.
13
+ LEGACY_APP_NAME = "hist"
14
+ LEGACY_DB_FILENAME = "whatwasit.db"
15
+ LEGACY_SOCKET_FILENAME = "whatwasit.sock"
16
+
17
+ DB_FILENAME = f"{APP_NAME}.db"
18
+ SOCKET_FILENAME = f"{APP_NAME}.sock"
19
+ PID_FILENAME = "daemon.pid"
20
+ CONFIG_FILENAME = "config.toml"
21
+
22
+
23
+ def data_dir_name_candidates() -> tuple[str, ...]:
24
+ """Preferred data directory names, newest first."""
25
+ return (APP_NAME, LEGACY_APP_NAME)
26
+
27
+
28
+ def config_dir_name_candidates() -> tuple[str, ...]:
29
+ """Preferred config directory names, newest first."""
30
+ return (APP_NAME, LEGACY_APP_NAME)
31
+
32
+
33
+ def resolve_data_dir(xdg_data_home: Path) -> Path:
34
+ """Pick data dir: prefer new name, fall back to legacy if indexed there."""
35
+ for name in data_dir_name_candidates():
36
+ candidate = xdg_data_home / name
37
+ if (candidate / DB_FILENAME).is_file() or (candidate / LEGACY_DB_FILENAME).is_file():
38
+ return candidate
39
+ return xdg_data_home / APP_NAME
40
+
41
+
42
+ def resolve_config_file(xdg_config_home: Path) -> Path:
43
+ """Pick config file path: prefer new, fall back to legacy if present."""
44
+ for name in config_dir_name_candidates():
45
+ path = xdg_config_home / name / CONFIG_FILENAME
46
+ if path.is_file():
47
+ return path
48
+ return xdg_config_home / APP_NAME / CONFIG_FILENAME
49
+
50
+
51
+ def resolve_db_path(data_dir: Path) -> Path:
52
+ """Return the SQLite path, preferring the new filename when both exist."""
53
+ primary = data_dir / DB_FILENAME
54
+ legacy = data_dir / LEGACY_DB_FILENAME
55
+ if primary.is_file():
56
+ return primary
57
+ if legacy.is_file():
58
+ return legacy
59
+ return primary
whatwasit/cli.py ADDED
@@ -0,0 +1,216 @@
1
+ """Command-line entry point for ``whatwasit``.
2
+
3
+ Two modes:
4
+
5
+ - ``whatwasit index [--window N] [--rebuild]`` builds/refreshes the on-disk index
6
+ from shell history.
7
+ - ``whatwasit <natural language query...>`` (the common case) searches the index
8
+ and renders the results with :mod:`whatwasit.output`.
9
+
10
+ Implemented with :mod:`argparse`; the query subcommand is detected purely by
11
+ position (``index`` is only ever a subcommand when it is the first token),
12
+ so a bare quoted natural-language query never collides with it unless the
13
+ query literally starts with the word ``index``.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import os
20
+ import sys
21
+ from typing import List, Optional, Sequence
22
+
23
+ from .config import Config
24
+ from .daemon import daemon_search, daemon_status, start_daemon, stop_daemon
25
+ from .embedder import is_model_cached
26
+ from .indexer import build_index_from_history
27
+ from .output import display_results
28
+ from .search import search
29
+ from .tui import run_repl
30
+
31
+ from .brand import CLI_NAME
32
+
33
+ PROG = CLI_NAME
34
+
35
+
36
+ def _configure_hf_hub_when_cached() -> None:
37
+ """Avoid hub network/progress overhead when ONNX assets are already local."""
38
+ if is_model_cached(Config.default()):
39
+ os.environ.setdefault("HF_HUB_OFFLINE", "1")
40
+ os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
41
+ os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
42
+
43
+
44
+ def _build_parser() -> argparse.ArgumentParser:
45
+ parser = argparse.ArgumentParser(
46
+ prog=PROG,
47
+ description="Local-first semantic search over your shell history.",
48
+ )
49
+ subparsers = parser.add_subparsers(dest="command")
50
+
51
+ index_parser = subparsers.add_parser(
52
+ "index", help="Build or refresh the search index from shell history."
53
+ )
54
+ index_parser.add_argument(
55
+ "--window",
56
+ type=int,
57
+ default=None,
58
+ help="Session grouping window in seconds (default: config default).",
59
+ )
60
+ index_parser.add_argument(
61
+ "--rebuild",
62
+ action="store_true",
63
+ help="Force a full rebuild of the index from scratch.",
64
+ )
65
+
66
+ daemon_parser = subparsers.add_parser(
67
+ "daemon", help="Start, stop, or check the warm-query daemon."
68
+ )
69
+ daemon_parser.add_argument(
70
+ "action",
71
+ choices=("start", "stop", "status"),
72
+ help="Daemon control action.",
73
+ )
74
+
75
+ return parser
76
+
77
+
78
+ def _print_help(parser: argparse.ArgumentParser) -> None:
79
+ parser.print_help()
80
+
81
+
82
+ def _run_index(args: argparse.Namespace) -> int:
83
+ config = Config.default()
84
+ if args.window is not None:
85
+ config.session_window_seconds = args.window
86
+
87
+ stats = build_index_from_history(config)
88
+ print(
89
+ f"Indexed {stats.n_commands} commands into {stats.n_sessions} sessions "
90
+ f"in {stats.elapsed_seconds:.2f}s."
91
+ )
92
+ return 0
93
+
94
+
95
+ def _run_repl() -> int:
96
+ """Launch the persistent interactive REPL."""
97
+ config = Config.default()
98
+
99
+ if not config.db_path.exists():
100
+ print(
101
+ "No index found. Run `whatwasit index` first to build a search index "
102
+ "from your shell history."
103
+ )
104
+ return 1
105
+
106
+ def do_search(query: str):
107
+ results = None
108
+ if config.use_daemon:
109
+ results = daemon_search(config, query)
110
+ if results is None:
111
+ results = search(config, query)
112
+ return results
113
+
114
+ run_repl(
115
+ do_search,
116
+ page_size=config.tui_page_size,
117
+ low_confidence_threshold=config.low_confidence_threshold,
118
+ )
119
+ return 0
120
+
121
+
122
+ def _run_query(
123
+ query: str,
124
+ top_k: Optional[int],
125
+ *,
126
+ force_plain: bool = False,
127
+ ) -> int:
128
+ config = Config.default()
129
+ if top_k is not None:
130
+ config.top_k = top_k
131
+
132
+ if not config.db_path.exists():
133
+ print(
134
+ "No index found. Run `whatwasit index` first to build a search index "
135
+ "from your shell history."
136
+ )
137
+ return 1
138
+
139
+ results = None
140
+ if config.use_daemon:
141
+ results = daemon_search(config, query, k=top_k)
142
+ if results is None:
143
+ results = search(config, query, k=top_k)
144
+ display_results(results, query, config, force_plain=force_plain)
145
+ return 0
146
+
147
+
148
+ def _run_daemon(action: str) -> int:
149
+ if action == "start":
150
+ return start_daemon()
151
+ if action == "stop":
152
+ return stop_daemon()
153
+ return daemon_status()
154
+
155
+
156
+ def main(argv: Optional[Sequence[str]] = None) -> int:
157
+ _configure_hf_hub_when_cached()
158
+
159
+ if argv is None:
160
+ argv = sys.argv[1:]
161
+ argv = list(argv)
162
+
163
+ parser = _build_parser()
164
+
165
+ if not argv:
166
+ return _run_repl()
167
+
168
+ if argv[0] in ("-h", "--help"):
169
+ _print_help(parser)
170
+ return 0
171
+
172
+ if argv[0] == "index":
173
+ args = parser.parse_args(argv)
174
+ return _run_index(args)
175
+
176
+ if argv[0] == "daemon":
177
+ args = parser.parse_args(argv)
178
+ return _run_daemon(args.action)
179
+
180
+ # Anything else is a natural-language query. Pull out -k/--top-k,
181
+ # --plain/--headless (which may appear anywhere) and treat the remaining
182
+ # tokens as the query text.
183
+ top_k: Optional[int] = None
184
+ force_plain = False
185
+ query_tokens: List[str] = []
186
+ i = 0
187
+ while i < len(argv):
188
+ token = argv[i]
189
+ if token in ("-k", "--top-k"):
190
+ if i + 1 >= len(argv):
191
+ print(f"{PROG}: argument {token}: expected one argument", file=sys.stderr)
192
+ return 2
193
+ try:
194
+ top_k = int(argv[i + 1])
195
+ except ValueError:
196
+ print(f"{PROG}: argument {token}: invalid int value: {argv[i + 1]!r}", file=sys.stderr)
197
+ return 2
198
+ i += 2
199
+ continue
200
+ if token in ("--plain", "--headless"):
201
+ force_plain = True
202
+ i += 1
203
+ continue
204
+ query_tokens.append(token)
205
+ i += 1
206
+
207
+ query = " ".join(query_tokens).strip()
208
+ if not query:
209
+ _print_help(parser)
210
+ return 0
211
+
212
+ return _run_query(query, top_k, force_plain=force_plain)
213
+
214
+
215
+ if __name__ == "__main__":
216
+ sys.exit(main())
whatwasit/config.py ADDED
@@ -0,0 +1,82 @@
1
+ """Central configuration for whatwasit.
2
+
3
+ All tunable values (session window, model name, storage paths, schema version)
4
+ live here so nothing is scattered across the codebase as magic constants.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from dataclasses import dataclass, field, asdict
11
+ from pathlib import Path
12
+
13
+ from .brand import resolve_data_dir, resolve_db_path
14
+
15
+ SCHEMA_VERSION = 1
16
+ """Bumped whenever the on-disk SQLite schema changes. Stored in the ``meta`` table."""
17
+
18
+ DEFAULT_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
19
+ DEFAULT_EMBEDDING_DIM = 384
20
+ DEFAULT_LOW_CONFIDENCE_THRESHOLD = 0.40
21
+ DEFAULT_SESSION_WINDOW_SECONDS = 300 # 5 minutes
22
+ DEFAULT_TOP_K = 10
23
+
24
+
25
+ def _xdg_data_home() -> Path:
26
+ env = os.environ.get("XDG_DATA_HOME")
27
+ if env:
28
+ return Path(env)
29
+ return Path.home() / ".local" / "share"
30
+
31
+
32
+ @dataclass
33
+ class Config:
34
+ """Runtime configuration. Construct via :meth:`default` then override fields."""
35
+
36
+ # Storage
37
+ data_dir: Path = field(default_factory=lambda: resolve_data_dir(_xdg_data_home()))
38
+
39
+ # Session grouping
40
+ session_window_seconds: int = DEFAULT_SESSION_WINDOW_SECONDS
41
+ split_on_cwd_change: bool = True
42
+
43
+ # Embedding
44
+ model_name: str = DEFAULT_MODEL_NAME
45
+ embedding_dim: int = DEFAULT_EMBEDDING_DIM
46
+
47
+ # Search
48
+ top_k: int = DEFAULT_TOP_K
49
+ hybrid_search: bool = True
50
+ low_confidence_threshold: float = DEFAULT_LOW_CONFIDENCE_THRESHOLD
51
+
52
+ # Output / TUI
53
+ output_mode: str = "tui" # "tui" | "plain"
54
+ tui_page_size: int = 5
55
+ use_daemon: bool = True
56
+
57
+ # Schema (read-only constant exposed for convenience)
58
+ schema_version: int = SCHEMA_VERSION
59
+
60
+ @classmethod
61
+ def default(cls) -> "Config":
62
+ from .config_loader import apply_file_overrides
63
+
64
+ return apply_file_overrides(cls())
65
+
66
+ @property
67
+ def db_path(self) -> Path:
68
+ return resolve_db_path(self.data_dir)
69
+
70
+ @property
71
+ def index_path(self) -> Path:
72
+ return self.data_dir / "index.usearch"
73
+
74
+ def ensure_data_dir(self) -> Path:
75
+ """Create the data directory if it does not yet exist."""
76
+ self.data_dir.mkdir(parents=True, exist_ok=True)
77
+ return self.data_dir
78
+
79
+ def to_dict(self) -> dict:
80
+ d = asdict(self)
81
+ d["data_dir"] = str(self.data_dir)
82
+ return d
@@ -0,0 +1,78 @@
1
+ """Load user overrides from ``~/.config/whatwasit/config.toml`` (XDG).
2
+
3
+ Uses :mod:`tomllib` on Python 3.11+ and :mod:`tomli` on 3.9–3.10.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import os
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, Any, Mapping
12
+
13
+ if sys.version_info >= (3, 11):
14
+ import tomllib
15
+ else:
16
+ import tomli as tomllib # type: ignore[no-redef]
17
+
18
+ if TYPE_CHECKING:
19
+ from .config import Config
20
+
21
+ _VALID_OUTPUT_MODES = frozenset({"tui", "plain"})
22
+
23
+
24
+ def _xdg_config_home() -> Path:
25
+ env = os.environ.get("XDG_CONFIG_HOME")
26
+ if env:
27
+ return Path(env)
28
+ return Path.home() / ".config"
29
+
30
+
31
+ from .brand import resolve_config_file
32
+
33
+
34
+ def config_file_path() -> Path:
35
+ """Return the XDG path for the whatwasit config file."""
36
+ return resolve_config_file(_xdg_config_home())
37
+
38
+
39
+ def load_config_file() -> Mapping[str, Any]:
40
+ """Parse the config file if it exists; otherwise return an empty mapping."""
41
+ path = config_file_path()
42
+ if not path.is_file():
43
+ return {}
44
+ with path.open("rb") as handle:
45
+ return tomllib.load(handle)
46
+
47
+
48
+ def apply_file_overrides(config: "Config") -> "Config":
49
+ """Apply values from the on-disk config file onto *config*.
50
+
51
+ Unknown keys are ignored. Invalid ``output_mode`` values are skipped so
52
+ defaults remain in effect.
53
+ """
54
+ data = load_config_file()
55
+ if not data:
56
+ return config
57
+
58
+ if "output_mode" in data:
59
+ mode = str(data["output_mode"]).lower()
60
+ if mode in _VALID_OUTPUT_MODES:
61
+ config.output_mode = mode
62
+
63
+ if "tui_page_size" in data:
64
+ try:
65
+ config.tui_page_size = int(data["tui_page_size"])
66
+ except (TypeError, ValueError):
67
+ pass
68
+
69
+ if "low_confidence_threshold" in data:
70
+ try:
71
+ config.low_confidence_threshold = float(data["low_confidence_threshold"])
72
+ except (TypeError, ValueError):
73
+ pass
74
+
75
+ if "use_daemon" in data:
76
+ config.use_daemon = bool(data["use_daemon"])
77
+
78
+ return config