valscanner 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. valscanner-0.1.0/LICENSE +21 -0
  2. valscanner-0.1.0/PKG-INFO +248 -0
  3. valscanner-0.1.0/README.md +200 -0
  4. valscanner-0.1.0/pyproject.toml +44 -0
  5. valscanner-0.1.0/setup.cfg +4 -0
  6. valscanner-0.1.0/valscanner/__init__.py +3 -0
  7. valscanner-0.1.0/valscanner/cli.py +101 -0
  8. valscanner-0.1.0/valscanner/core/__init__.py +0 -0
  9. valscanner-0.1.0/valscanner/core/categories.py +50 -0
  10. valscanner-0.1.0/valscanner/core/db.py +117 -0
  11. valscanner-0.1.0/valscanner/core/export.py +32 -0
  12. valscanner-0.1.0/valscanner/core/metadata.py +175 -0
  13. valscanner-0.1.0/valscanner/core/scanner.py +238 -0
  14. valscanner-0.1.0/valscanner/core/schema.py +94 -0
  15. valscanner-0.1.0/valscanner/core/similarity.py +168 -0
  16. valscanner-0.1.0/valscanner/core/tagging.py +102 -0
  17. valscanner-0.1.0/valscanner/gui/__init__.py +0 -0
  18. valscanner-0.1.0/valscanner/gui/constants.py +50 -0
  19. valscanner-0.1.0/valscanner/gui/delegates.py +138 -0
  20. valscanner-0.1.0/valscanner/gui/dialogs.py +470 -0
  21. valscanner-0.1.0/valscanner/gui/models.py +190 -0
  22. valscanner-0.1.0/valscanner/gui/panels/__init__.py +0 -0
  23. valscanner-0.1.0/valscanner/gui/panels/console.py +87 -0
  24. valscanner-0.1.0/valscanner/gui/panels/detail.py +271 -0
  25. valscanner-0.1.0/valscanner/gui/panels/folders.py +275 -0
  26. valscanner-0.1.0/valscanner/gui/panels/scans.py +141 -0
  27. valscanner-0.1.0/valscanner/gui/panels/similar.py +564 -0
  28. valscanner-0.1.0/valscanner/gui/preferences.py +400 -0
  29. valscanner-0.1.0/valscanner/gui/theme.py +87 -0
  30. valscanner-0.1.0/valscanner/gui/window.py +1673 -0
  31. valscanner-0.1.0/valscanner/gui/workers.py +81 -0
  32. valscanner-0.1.0/valscanner.egg-info/PKG-INFO +248 -0
  33. valscanner-0.1.0/valscanner.egg-info/SOURCES.txt +35 -0
  34. valscanner-0.1.0/valscanner.egg-info/dependency_links.txt +1 -0
  35. valscanner-0.1.0/valscanner.egg-info/entry_points.txt +3 -0
  36. valscanner-0.1.0/valscanner.egg-info/requires.txt +6 -0
  37. valscanner-0.1.0/valscanner.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Abdalrahman Valabji
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,248 @@
1
+ Metadata-Version: 2.1
2
+ Name: valscanner
3
+ Version: 0.1.0
4
+ Summary: Recursive file scanner with metadata, tagging, and duplicate detection
5
+ Author-email: Abdalrahman Valabji <valabji@gmail.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Abdalrahman Valabji
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Keywords: scanner,files,metadata,duplicates,sqlite,gui
29
+ Classifier: Development Status :: 3 - Alpha
30
+ Classifier: License :: OSI Approved :: MIT License
31
+ Classifier: Programming Language :: Python :: 3
32
+ Classifier: Programming Language :: Python :: 3.8
33
+ Classifier: Programming Language :: Python :: 3.9
34
+ Classifier: Programming Language :: Python :: 3.10
35
+ Classifier: Programming Language :: Python :: 3.11
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Topic :: Utilities
38
+ Classifier: Topic :: System :: Filesystems
39
+ Classifier: Environment :: X11 Applications :: Qt
40
+ Requires-Python: >=3.8
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: PySide6
44
+ Provides-Extra: rich
45
+ Requires-Dist: Pillow; extra == "rich"
46
+ Requires-Dist: mutagen; extra == "rich"
47
+ Requires-Dist: PyPDF2; extra == "rich"
48
+
49
+ # ValScanner
50
+
51
+ A recursive file scanner with rich metadata extraction, full-text search, tagging, thumbnail generation, and duplicate/similar-folder detection — available as both a CLI tool and a PySide6 GUI.
52
+
53
+ ---
54
+
55
+ ## Features
56
+
57
+ - **Recursive scan** — indexes every file under a root directory into SQLite
58
+ - **Rich metadata** — image EXIF, audio tags, PDF page count (optional deps)
59
+ - **Thumbnails** — JPEG blobs stored in the database; displayed in the GUI grid view
60
+ - **Tagging** — rule-based tags derived from path, filename, size, and extension
61
+ - **Full-text search** — FTS5 virtual table over filenames and paths
62
+ - **Similar-folder detection** — pairwise comparison using filename Jaccard + extension cosine + size ratio + SHA-256 Jaccard
63
+ - **Export** — CSV and JSON export from the CLI
64
+ - **Multiple scan sessions** — each scan stored with a label; list or delete past scans
65
+
66
+ ---
67
+
68
+ ## Requirements
69
+
70
+ | Requirement | Version |
71
+ |---|---|
72
+ | Python | 3.8+ |
73
+ | PySide6 | any recent release |
74
+ | Pillow *(optional)* | image EXIF + thumbnails |
75
+ | mutagen *(optional)* | audio metadata |
76
+ | PyPDF2 *(optional)* | PDF page count |
77
+
78
+ ---
79
+
80
+ ## Installation
81
+
82
+ ### macOS / Linux
83
+
84
+ ```bash
85
+ # Clone the repo
86
+ git clone <repo-url> myscanner
87
+ cd myscanner
88
+
89
+ # Run the installer (creates .venv, installs all deps, links launchers)
90
+ bash install.sh
91
+
92
+ # Or skip optional rich-metadata deps
93
+ bash install.sh --no-rich
94
+
95
+ # Or install into your active Python environment (no venv)
96
+ bash install.sh --no-venv
97
+
98
+ # Or specify where launcher scripts go
99
+ bash install.sh --prefix ~/.local
100
+ ```
101
+
102
+ The installer places `valscanner` and `valscanner-gui` symlinks in `~/.local/bin` (Linux) or `/usr/local/bin` (macOS, if writable).
103
+
104
+ ### Windows (PowerShell)
105
+
106
+ ```powershell
107
+ # Clone the repo
108
+ git clone <repo-url> myscanner
109
+ cd myscanner
110
+
111
+ # Run the installer (creates .venv, installs all deps, writes .cmd launchers)
112
+ .\install.ps1
113
+
114
+ # Skip optional rich-metadata deps
115
+ .\install.ps1 -NoRich
116
+
117
+ # Install into your active Python environment (no venv)
118
+ .\install.ps1 -NoVenv
119
+
120
+ # Specify launcher directory
121
+ .\install.ps1 -Prefix "C:\Tools\ValScanner"
122
+ ```
123
+
124
+ Launchers are written as `.cmd` files to `%LOCALAPPDATA%\Programs\ValScanner\bin` by default. The script offers to add that directory to your user `PATH`.
125
+
126
+ ### Manual (pip)
127
+
128
+ ```bash
129
+ # Minimal — GUI only, no image/audio/PDF metadata
130
+ pip install -e .
131
+
132
+ # Full — all optional metadata extractors
133
+ pip install -e ".[rich]"
134
+ ```
135
+
136
+ ---
137
+
138
+ ## Usage
139
+
140
+ ### GUI
141
+
142
+ ```bash
143
+ valscanner-gui
144
+ # or
145
+ python -m valscanner.gui.window
146
+ ```
147
+
148
+ ### CLI
149
+
150
+ ```bash
151
+ # Scan a directory
152
+ valscanner /path/to/scan
153
+
154
+ # With options
155
+ valscanner /path/to/scan --db my.db --no-hash --export-csv --export-json --verbose
156
+
157
+ # List past scans
158
+ valscanner --list-scans --db my.db
159
+
160
+ # Delete a scan by ID
161
+ valscanner --delete-scan 3 --db my.db
162
+ ```
163
+
164
+ Full flag reference:
165
+
166
+ | Flag | Default | Description |
167
+ |---|---|---|
168
+ | `--db PATH` | `scanner.db` | SQLite database file |
169
+ | `--no-hash` | off | Skip SHA-256 (faster) |
170
+ | `--export-csv` | off | Write `<label>.csv` after scan |
171
+ | `--export-json` | off | Write `<label>.json` after scan |
172
+ | `--verbose` / `-v` | off | Per-file progress |
173
+ | `--list-scans` | — | Print all stored scans and exit |
174
+ | `--delete-scan ID` | — | Delete scan by ID and exit |
175
+
176
+ ---
177
+
178
+ ## Project layout
179
+
180
+ ```
181
+ myscanner/
182
+ ā”œā”€ā”€ pyproject.toml ← packaging & entry points
183
+ ā”œā”€ā”€ install.sh ← macOS/Linux installer
184
+ ā”œā”€ā”€ install.ps1 ← Windows PowerShell installer
185
+ ā”œā”€ā”€ file_scanner.py ← backward-compat shim
186
+ ā”œā”€ā”€ file_scanner_gui.py ← backward-compat shim
187
+ │
188
+ └── valscanner/
189
+ ā”œā”€ā”€ cli.py ← CLI entry point
190
+ ā”œā”€ā”€ core/ ← zero Qt dependencies
191
+ │ ā”œā”€ā”€ schema.py
192
+ │ ā”œā”€ā”€ categories.py
193
+ │ ā”œā”€ā”€ metadata.py
194
+ │ ā”œā”€ā”€ tagging.py
195
+ │ ā”œā”€ā”€ scanner.py
196
+ │ ā”œā”€ā”€ similarity.py
197
+ │ ā”œā”€ā”€ export.py
198
+ │ └── db.py
199
+ └── gui/ ← PySide6 front-end
200
+ ā”œā”€ā”€ constants.py
201
+ ā”œā”€ā”€ workers.py
202
+ ā”œā”€ā”€ models.py
203
+ ā”œā”€ā”€ delegates.py
204
+ ā”œā”€ā”€ dialogs.py
205
+ ā”œā”€ā”€ window.py
206
+ └── panels/
207
+ ā”œā”€ā”€ detail.py
208
+ ā”œā”€ā”€ folders.py
209
+ ā”œā”€ā”€ similar.py
210
+ ā”œā”€ā”€ scans.py
211
+ └── console.py
212
+ ```
213
+
214
+ ---
215
+
216
+ ## Database schema
217
+
218
+ Every `.db` file contains five tables:
219
+
220
+ | Table | Contents |
221
+ |---|---|
222
+ | `scans` | One row per scan session (label, root, file count, total size) |
223
+ | `files` | One row per file; `extra_meta` is a JSON blob of rich metadata |
224
+ | `folders` | Cumulative byte/file counts for every ancestor directory |
225
+ | `thumbnails` | JPEG blobs keyed by `file_id` |
226
+ | `media_samples` | Low-quality audio/video clips keyed by `file_id` |
227
+ | `files_fts` | FTS5 virtual table over `files`; populated by triggers |
228
+
229
+ ---
230
+
231
+ ## Development
232
+
233
+ ```bash
234
+ # Editable install with all extras
235
+ pip install -e ".[rich]"
236
+
237
+ # Run directly without installing
238
+ python -m valscanner.gui.window # GUI
239
+ python -m valscanner.cli # CLI (add args after --)
240
+ ```
241
+
242
+ Output files (`*.db`, `*.csv`, `*.json`) are gitignored.
243
+
244
+ ---
245
+
246
+ ## License
247
+
248
+ MIT
@@ -0,0 +1,200 @@
1
+ # ValScanner
2
+
3
+ A recursive file scanner with rich metadata extraction, full-text search, tagging, thumbnail generation, and duplicate/similar-folder detection — available as both a CLI tool and a PySide6 GUI.
4
+
5
+ ---
6
+
7
+ ## Features
8
+
9
+ - **Recursive scan** — indexes every file under a root directory into SQLite
10
+ - **Rich metadata** — image EXIF, audio tags, PDF page count (optional deps)
11
+ - **Thumbnails** — JPEG blobs stored in the database; displayed in the GUI grid view
12
+ - **Tagging** — rule-based tags derived from path, filename, size, and extension
13
+ - **Full-text search** — FTS5 virtual table over filenames and paths
14
+ - **Similar-folder detection** — pairwise comparison using filename Jaccard + extension cosine + size ratio + SHA-256 Jaccard
15
+ - **Export** — CSV and JSON export from the CLI
16
+ - **Multiple scan sessions** — each scan stored with a label; list or delete past scans
17
+
18
+ ---
19
+
20
+ ## Requirements
21
+
22
+ | Requirement | Version |
23
+ |---|---|
24
+ | Python | 3.8+ |
25
+ | PySide6 | any recent release |
26
+ | Pillow *(optional)* | image EXIF + thumbnails |
27
+ | mutagen *(optional)* | audio metadata |
28
+ | PyPDF2 *(optional)* | PDF page count |
29
+
30
+ ---
31
+
32
+ ## Installation
33
+
34
+ ### macOS / Linux
35
+
36
+ ```bash
37
+ # Clone the repo
38
+ git clone <repo-url> myscanner
39
+ cd myscanner
40
+
41
+ # Run the installer (creates .venv, installs all deps, links launchers)
42
+ bash install.sh
43
+
44
+ # Or skip optional rich-metadata deps
45
+ bash install.sh --no-rich
46
+
47
+ # Or install into your active Python environment (no venv)
48
+ bash install.sh --no-venv
49
+
50
+ # Or specify where launcher scripts go
51
+ bash install.sh --prefix ~/.local
52
+ ```
53
+
54
+ The installer places `valscanner` and `valscanner-gui` symlinks in `~/.local/bin` (Linux) or `/usr/local/bin` (macOS, if writable).
55
+
56
+ ### Windows (PowerShell)
57
+
58
+ ```powershell
59
+ # Clone the repo
60
+ git clone <repo-url> myscanner
61
+ cd myscanner
62
+
63
+ # Run the installer (creates .venv, installs all deps, writes .cmd launchers)
64
+ .\install.ps1
65
+
66
+ # Skip optional rich-metadata deps
67
+ .\install.ps1 -NoRich
68
+
69
+ # Install into your active Python environment (no venv)
70
+ .\install.ps1 -NoVenv
71
+
72
+ # Specify launcher directory
73
+ .\install.ps1 -Prefix "C:\Tools\ValScanner"
74
+ ```
75
+
76
+ Launchers are written as `.cmd` files to `%LOCALAPPDATA%\Programs\ValScanner\bin` by default. The script offers to add that directory to your user `PATH`.
77
+
78
+ ### Manual (pip)
79
+
80
+ ```bash
81
+ # Minimal — GUI only, no image/audio/PDF metadata
82
+ pip install -e .
83
+
84
+ # Full — all optional metadata extractors
85
+ pip install -e ".[rich]"
86
+ ```
87
+
88
+ ---
89
+
90
+ ## Usage
91
+
92
+ ### GUI
93
+
94
+ ```bash
95
+ valscanner-gui
96
+ # or
97
+ python -m valscanner.gui.window
98
+ ```
99
+
100
+ ### CLI
101
+
102
+ ```bash
103
+ # Scan a directory
104
+ valscanner /path/to/scan
105
+
106
+ # With options
107
+ valscanner /path/to/scan --db my.db --no-hash --export-csv --export-json --verbose
108
+
109
+ # List past scans
110
+ valscanner --list-scans --db my.db
111
+
112
+ # Delete a scan by ID
113
+ valscanner --delete-scan 3 --db my.db
114
+ ```
115
+
116
+ Full flag reference:
117
+
118
+ | Flag | Default | Description |
119
+ |---|---|---|
120
+ | `--db PATH` | `scanner.db` | SQLite database file |
121
+ | `--no-hash` | off | Skip SHA-256 (faster) |
122
+ | `--export-csv` | off | Write `<label>.csv` after scan |
123
+ | `--export-json` | off | Write `<label>.json` after scan |
124
+ | `--verbose` / `-v` | off | Per-file progress |
125
+ | `--list-scans` | — | Print all stored scans and exit |
126
+ | `--delete-scan ID` | — | Delete scan by ID and exit |
127
+
128
+ ---
129
+
130
+ ## Project layout
131
+
132
+ ```
133
+ myscanner/
134
+ ā”œā”€ā”€ pyproject.toml ← packaging & entry points
135
+ ā”œā”€ā”€ install.sh ← macOS/Linux installer
136
+ ā”œā”€ā”€ install.ps1 ← Windows PowerShell installer
137
+ ā”œā”€ā”€ file_scanner.py ← backward-compat shim
138
+ ā”œā”€ā”€ file_scanner_gui.py ← backward-compat shim
139
+ │
140
+ └── valscanner/
141
+ ā”œā”€ā”€ cli.py ← CLI entry point
142
+ ā”œā”€ā”€ core/ ← zero Qt dependencies
143
+ │ ā”œā”€ā”€ schema.py
144
+ │ ā”œā”€ā”€ categories.py
145
+ │ ā”œā”€ā”€ metadata.py
146
+ │ ā”œā”€ā”€ tagging.py
147
+ │ ā”œā”€ā”€ scanner.py
148
+ │ ā”œā”€ā”€ similarity.py
149
+ │ ā”œā”€ā”€ export.py
150
+ │ └── db.py
151
+ └── gui/ ← PySide6 front-end
152
+ ā”œā”€ā”€ constants.py
153
+ ā”œā”€ā”€ workers.py
154
+ ā”œā”€ā”€ models.py
155
+ ā”œā”€ā”€ delegates.py
156
+ ā”œā”€ā”€ dialogs.py
157
+ ā”œā”€ā”€ window.py
158
+ └── panels/
159
+ ā”œā”€ā”€ detail.py
160
+ ā”œā”€ā”€ folders.py
161
+ ā”œā”€ā”€ similar.py
162
+ ā”œā”€ā”€ scans.py
163
+ └── console.py
164
+ ```
165
+
166
+ ---
167
+
168
+ ## Database schema
169
+
170
+ Every `.db` file contains five tables:
171
+
172
+ | Table | Contents |
173
+ |---|---|
174
+ | `scans` | One row per scan session (label, root, file count, total size) |
175
+ | `files` | One row per file; `extra_meta` is a JSON blob of rich metadata |
176
+ | `folders` | Cumulative byte/file counts for every ancestor directory |
177
+ | `thumbnails` | JPEG blobs keyed by `file_id` |
178
+ | `media_samples` | Low-quality audio/video clips keyed by `file_id` |
179
+ | `files_fts` | FTS5 virtual table over `files`; populated by triggers |
180
+
181
+ ---
182
+
183
+ ## Development
184
+
185
+ ```bash
186
+ # Editable install with all extras
187
+ pip install -e ".[rich]"
188
+
189
+ # Run directly without installing
190
+ python -m valscanner.gui.window # GUI
191
+ python -m valscanner.cli # CLI (add args after --)
192
+ ```
193
+
194
+ Output files (`*.db`, `*.csv`, `*.json`) are gitignored.
195
+
196
+ ---
197
+
198
+ ## License
199
+
200
+ MIT
@@ -0,0 +1,44 @@
1
+ [build-system]
2
+ requires = ["setuptools>=64", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "valscanner"
7
+ version = "0.1.0"
8
+ description = "Recursive file scanner with metadata, tagging, and duplicate detection"
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ authors = [{ name = "Abdalrahman Valabji", email = "valabji@gmail.com" }]
12
+ requires-python = ">=3.8"
13
+ keywords = ["scanner", "files", "metadata", "duplicates", "sqlite", "gui"]
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.8",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Topic :: Utilities",
24
+ "Topic :: System :: Filesystems",
25
+ "Environment :: X11 Applications :: Qt",
26
+ ]
27
+ dependencies = [
28
+ "PySide6",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ rich = [
33
+ "Pillow",
34
+ "mutagen",
35
+ "PyPDF2",
36
+ ]
37
+
38
+ [project.scripts]
39
+ valscanner = "valscanner.cli:main"
40
+ valscanner-gui = "valscanner.gui.window:main"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["."]
44
+ include = ["valscanner*"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ """ValScanner — recursive file scanner with metadata, tagging, and duplicate detection."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+ """
4
+ ValScanner CLI — scan a directory and build a searchable file database.
5
+
6
+ Usage:
7
+ valscanner /path/to/scan [options]
8
+ valscanner --list-scans --db my.db
9
+ valscanner --delete-scan 3 --db my.db
10
+ """
11
+
12
+ import sys
13
+ import time
14
+ import argparse
15
+ from pathlib import Path
16
+
17
+ from .core.metadata import PIL_AVAILABLE, MUTAGEN_AVAILABLE, PYPDF_AVAILABLE
18
+ from .core.scanner import scan
19
+ from .core.export import export_csv, export_json
20
+ from .core.db import query_db, print_summary, list_scans, delete_scan
21
+
22
+
23
+ def main() -> None:
24
+ parser = argparse.ArgumentParser(
25
+ description="Scan a directory and build a searchable file database.",
26
+ formatter_class=argparse.RawDescriptionHelpFormatter,
27
+ )
28
+ parser.add_argument("path", nargs="?", help="Root directory to scan")
29
+ parser.add_argument("--db", default="file_index.db",
30
+ help="SQLite database path (default: file_index.db)")
31
+ parser.add_argument("--label", metavar="NAME", default="",
32
+ help="Human-readable label for this scan")
33
+ parser.add_argument("--export-csv", action="store_true", help="Export results to CSV")
34
+ parser.add_argument("--export-json", action="store_true", help="Export results to JSON")
35
+ parser.add_argument("--no-hash", action="store_true", help="Skip SHA-256 hashing")
36
+ parser.add_argument("--verbose", action="store_true", help="Print each file as indexed")
37
+ parser.add_argument("--query", metavar="TERM", help="Query the database after scanning")
38
+ parser.add_argument("--list-scans", action="store_true", help="List all scans in the database")
39
+ parser.add_argument("--delete-scan", type=int, metavar="ID", help="Delete a scan by ID")
40
+ args = parser.parse_args()
41
+
42
+ if args.list_scans:
43
+ scans = list_scans(args.db)
44
+ if not scans:
45
+ print("No scans in database.")
46
+ for s in scans:
47
+ print(f" [{s['id']:3d}] {s['label'] or s['root']:40s} "
48
+ f"{s['file_count']:>8,} files {s['total_human']:>10s} {s['scanned_at']}")
49
+ sys.exit(0)
50
+
51
+ if args.delete_scan is not None:
52
+ delete_scan(args.db, args.delete_scan)
53
+ print(f"āœ“ Scan {args.delete_scan} deleted.")
54
+ sys.exit(0)
55
+
56
+ if not args.path:
57
+ parser.error("path is required unless using --list-scans or --delete-scan")
58
+
59
+ root = Path(args.path).expanduser().resolve()
60
+ if not root.exists():
61
+ print(f"Error: path does not exist: {root}")
62
+ sys.exit(1)
63
+
64
+ print(f"\nšŸ”Ž Scanning: {root}")
65
+ print(f" Database: {args.db}")
66
+ if args.label:
67
+ print(f" Label: {args.label}")
68
+ if not PIL_AVAILABLE:
69
+ print(" ⚠ Pillow not installed — image EXIF metadata skipped")
70
+ if not MUTAGEN_AVAILABLE:
71
+ print(" ⚠ mutagen not installed — audio metadata skipped")
72
+ if not PYPDF_AVAILABLE:
73
+ print(" ⚠ PyPDF2 not installed — PDF metadata skipped")
74
+ print()
75
+
76
+ t0 = time.time()
77
+ stats = scan(root, args.db, compute_hash=not args.no_hash,
78
+ verbose=args.verbose, label=args.label)
79
+ elapsed = time.time() - t0
80
+
81
+ print(f"\nāœ… Done in {elapsed:.1f}s — "
82
+ f"scan #{stats['scan_id']}, "
83
+ f"{stats['scanned']:,} indexed, "
84
+ f"{stats['errors']:,} errors, "
85
+ f"{stats['skipped']:,} skipped")
86
+
87
+ print_summary(args.db)
88
+
89
+ if args.export_csv:
90
+ export_csv(args.db, args.db.replace(".db", ".csv"), scan_id=stats["scan_id"])
91
+ if args.export_json:
92
+ export_json(args.db, args.db.replace(".db", ".json"), scan_id=stats["scan_id"])
93
+ if args.query:
94
+ query_db(args.db, args.query)
95
+
96
+ print(f" šŸ’” Run with --query photos, --list-scans, or open the GUI:\n"
97
+ f" valscanner-gui\n")
98
+
99
+
100
+ if __name__ == "__main__":
101
+ main()
File without changes
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ MIME_CATEGORY: dict[str, str] = {
4
+ "image": "photo",
5
+ "video": "video",
6
+ "audio": "audio",
7
+ "text": "document",
8
+ "application": "application",
9
+ }
10
+
11
+ EXT_CATEGORY: dict[str, str] = {
12
+ # Documents
13
+ ".pdf": "document", ".doc": "document", ".docx": "document",
14
+ ".odt": "document", ".rtf": "document", ".txt": "document",
15
+ ".md": "document", ".rst": "document", ".tex": "document",
16
+ # Spreadsheets
17
+ ".xls": "spreadsheet", ".xlsx": "spreadsheet", ".ods": "spreadsheet",
18
+ ".csv": "spreadsheet",
19
+ # Presentations
20
+ ".ppt": "presentation", ".pptx": "presentation", ".odp": "presentation",
21
+ # Photos / images
22
+ ".jpg": "photo", ".jpeg": "photo", ".png": "photo", ".gif": "photo",
23
+ ".bmp": "photo", ".tiff": "photo", ".tif": "photo", ".webp": "photo",
24
+ ".heic": "photo", ".heif": "photo", ".raw": "photo", ".cr2": "photo",
25
+ ".nef": "photo", ".svg": "image",
26
+ # Video
27
+ ".mp4": "video", ".mov": "video", ".avi": "video", ".mkv": "video",
28
+ ".wmv": "video", ".flv": "video", ".webm": "video", ".m4v": "video",
29
+ # Audio
30
+ ".mp3": "audio", ".flac": "audio", ".wav": "audio", ".aac": "audio",
31
+ ".ogg": "audio", ".m4a": "audio", ".wma": "audio",
32
+ # Code
33
+ ".py": "code", ".js": "code", ".ts": "code", ".java": "code",
34
+ ".c": "code", ".cpp": "code", ".h": "code", ".cs": "code",
35
+ ".go": "code", ".rs": "code", ".rb": "code", ".php": "code",
36
+ ".sh": "code", ".bat": "code", ".ps1": "code",
37
+ # Data
38
+ ".json": "data", ".xml": "data", ".yaml": "data", ".yml": "data",
39
+ ".toml": "data", ".ini": "data", ".sql": "data",
40
+ # Archives
41
+ ".zip": "archive", ".tar": "archive", ".gz": "archive", ".bz2": "archive",
42
+ ".7z": "archive", ".rar": "archive", ".xz": "archive",
43
+ # Executables / installers
44
+ ".exe": "executable", ".msi": "executable", ".dmg": "executable",
45
+ ".deb": "executable", ".rpm": "executable", ".appimage": "executable",
46
+ # Fonts
47
+ ".ttf": "font", ".otf": "font", ".woff": "font", ".woff2": "font",
48
+ # Ebooks
49
+ ".epub": "ebook", ".mobi": "ebook", ".azw": "ebook",
50
+ }