valscanner 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- valscanner-0.1.0/LICENSE +21 -0
- valscanner-0.1.0/PKG-INFO +248 -0
- valscanner-0.1.0/README.md +200 -0
- valscanner-0.1.0/pyproject.toml +44 -0
- valscanner-0.1.0/setup.cfg +4 -0
- valscanner-0.1.0/valscanner/__init__.py +3 -0
- valscanner-0.1.0/valscanner/cli.py +101 -0
- valscanner-0.1.0/valscanner/core/__init__.py +0 -0
- valscanner-0.1.0/valscanner/core/categories.py +50 -0
- valscanner-0.1.0/valscanner/core/db.py +117 -0
- valscanner-0.1.0/valscanner/core/export.py +32 -0
- valscanner-0.1.0/valscanner/core/metadata.py +175 -0
- valscanner-0.1.0/valscanner/core/scanner.py +238 -0
- valscanner-0.1.0/valscanner/core/schema.py +94 -0
- valscanner-0.1.0/valscanner/core/similarity.py +168 -0
- valscanner-0.1.0/valscanner/core/tagging.py +102 -0
- valscanner-0.1.0/valscanner/gui/__init__.py +0 -0
- valscanner-0.1.0/valscanner/gui/constants.py +50 -0
- valscanner-0.1.0/valscanner/gui/delegates.py +138 -0
- valscanner-0.1.0/valscanner/gui/dialogs.py +470 -0
- valscanner-0.1.0/valscanner/gui/models.py +190 -0
- valscanner-0.1.0/valscanner/gui/panels/__init__.py +0 -0
- valscanner-0.1.0/valscanner/gui/panels/console.py +87 -0
- valscanner-0.1.0/valscanner/gui/panels/detail.py +271 -0
- valscanner-0.1.0/valscanner/gui/panels/folders.py +275 -0
- valscanner-0.1.0/valscanner/gui/panels/scans.py +141 -0
- valscanner-0.1.0/valscanner/gui/panels/similar.py +564 -0
- valscanner-0.1.0/valscanner/gui/preferences.py +400 -0
- valscanner-0.1.0/valscanner/gui/theme.py +87 -0
- valscanner-0.1.0/valscanner/gui/window.py +1673 -0
- valscanner-0.1.0/valscanner/gui/workers.py +81 -0
- valscanner-0.1.0/valscanner.egg-info/PKG-INFO +248 -0
- valscanner-0.1.0/valscanner.egg-info/SOURCES.txt +35 -0
- valscanner-0.1.0/valscanner.egg-info/dependency_links.txt +1 -0
- valscanner-0.1.0/valscanner.egg-info/entry_points.txt +3 -0
- valscanner-0.1.0/valscanner.egg-info/requires.txt +6 -0
- valscanner-0.1.0/valscanner.egg-info/top_level.txt +1 -0
valscanner-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Abdalrahman Valabji
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: valscanner
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Recursive file scanner with metadata, tagging, and duplicate detection
|
|
5
|
+
Author-email: Abdalrahman Valabji <valabji@gmail.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Abdalrahman Valabji
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Keywords: scanner,files,metadata,duplicates,sqlite,gui
|
|
29
|
+
Classifier: Development Status :: 3 - Alpha
|
|
30
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
31
|
+
Classifier: Programming Language :: Python :: 3
|
|
32
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Topic :: Utilities
|
|
38
|
+
Classifier: Topic :: System :: Filesystems
|
|
39
|
+
Classifier: Environment :: X11 Applications :: Qt
|
|
40
|
+
Requires-Python: >=3.8
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: PySide6
|
|
44
|
+
Provides-Extra: rich
|
|
45
|
+
Requires-Dist: Pillow; extra == "rich"
|
|
46
|
+
Requires-Dist: mutagen; extra == "rich"
|
|
47
|
+
Requires-Dist: PyPDF2; extra == "rich"
|
|
48
|
+
|
|
49
|
+
# ValScanner
|
|
50
|
+
|
|
51
|
+
A recursive file scanner with rich metadata extraction, full-text search, tagging, thumbnail generation, and duplicate/similar-folder detection ā available as both a CLI tool and a PySide6 GUI.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Features
|
|
56
|
+
|
|
57
|
+
- **Recursive scan** ā indexes every file under a root directory into SQLite
|
|
58
|
+
- **Rich metadata** ā image EXIF, audio tags, PDF page count (optional deps)
|
|
59
|
+
- **Thumbnails** ā JPEG blobs stored in the database; displayed in the GUI grid view
|
|
60
|
+
- **Tagging** ā rule-based tags derived from path, filename, size, and extension
|
|
61
|
+
- **Full-text search** ā FTS5 virtual table over filenames and paths
|
|
62
|
+
- **Similar-folder detection** ā pairwise comparison using filename Jaccard + extension cosine + size ratio + SHA-256 Jaccard
|
|
63
|
+
- **Export** ā CSV and JSON export from the CLI
|
|
64
|
+
- **Multiple scan sessions** ā each scan stored with a label; list or delete past scans
|
|
65
|
+
|
|
66
|
+
---
|
|
67
|
+
|
|
68
|
+
## Requirements
|
|
69
|
+
|
|
70
|
+
| Requirement | Version |
|
|
71
|
+
|---|---|
|
|
72
|
+
| Python | 3.8+ |
|
|
73
|
+
| PySide6 | any recent release |
|
|
74
|
+
| Pillow *(optional)* | image EXIF + thumbnails |
|
|
75
|
+
| mutagen *(optional)* | audio metadata |
|
|
76
|
+
| PyPDF2 *(optional)* | PDF page count |
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Installation
|
|
81
|
+
|
|
82
|
+
### macOS / Linux
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Clone the repo
|
|
86
|
+
git clone <repo-url> myscanner
|
|
87
|
+
cd myscanner
|
|
88
|
+
|
|
89
|
+
# Run the installer (creates .venv, installs all deps, links launchers)
|
|
90
|
+
bash install.sh
|
|
91
|
+
|
|
92
|
+
# Or skip optional rich-metadata deps
|
|
93
|
+
bash install.sh --no-rich
|
|
94
|
+
|
|
95
|
+
# Or install into your active Python environment (no venv)
|
|
96
|
+
bash install.sh --no-venv
|
|
97
|
+
|
|
98
|
+
# Or specify where launcher scripts go
|
|
99
|
+
bash install.sh --prefix ~/.local
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
The installer places `valscanner` and `valscanner-gui` symlinks in `~/.local/bin` (Linux) or `/usr/local/bin` (macOS, if writable).
|
|
103
|
+
|
|
104
|
+
### Windows (PowerShell)
|
|
105
|
+
|
|
106
|
+
```powershell
|
|
107
|
+
# Clone the repo
|
|
108
|
+
git clone <repo-url> myscanner
|
|
109
|
+
cd myscanner
|
|
110
|
+
|
|
111
|
+
# Run the installer (creates .venv, installs all deps, writes .cmd launchers)
|
|
112
|
+
.\install.ps1
|
|
113
|
+
|
|
114
|
+
# Skip optional rich-metadata deps
|
|
115
|
+
.\install.ps1 -NoRich
|
|
116
|
+
|
|
117
|
+
# Install into your active Python environment (no venv)
|
|
118
|
+
.\install.ps1 -NoVenv
|
|
119
|
+
|
|
120
|
+
# Specify launcher directory
|
|
121
|
+
.\install.ps1 -Prefix "C:\Tools\ValScanner"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Launchers are written as `.cmd` files to `%LOCALAPPDATA%\Programs\ValScanner\bin` by default. The script offers to add that directory to your user `PATH`.
|
|
125
|
+
|
|
126
|
+
### Manual (pip)
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
# Minimal ā GUI only, no image/audio/PDF metadata
|
|
130
|
+
pip install -e .
|
|
131
|
+
|
|
132
|
+
# Full ā all optional metadata extractors
|
|
133
|
+
pip install -e ".[rich]"
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
## Usage
|
|
139
|
+
|
|
140
|
+
### GUI
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
valscanner-gui
|
|
144
|
+
# or
|
|
145
|
+
python -m valscanner.gui.window
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### CLI
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
# Scan a directory
|
|
152
|
+
valscanner /path/to/scan
|
|
153
|
+
|
|
154
|
+
# With options
|
|
155
|
+
valscanner /path/to/scan --db my.db --no-hash --export-csv --export-json --verbose
|
|
156
|
+
|
|
157
|
+
# List past scans
|
|
158
|
+
valscanner --list-scans --db my.db
|
|
159
|
+
|
|
160
|
+
# Delete a scan by ID
|
|
161
|
+
valscanner --delete-scan 3 --db my.db
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
Full flag reference:
|
|
165
|
+
|
|
166
|
+
| Flag | Default | Description |
|
|
167
|
+
|---|---|---|
|
|
168
|
+
| `--db PATH` | `scanner.db` | SQLite database file |
|
|
169
|
+
| `--no-hash` | off | Skip SHA-256 (faster) |
|
|
170
|
+
| `--export-csv` | off | Write `<label>.csv` after scan |
|
|
171
|
+
| `--export-json` | off | Write `<label>.json` after scan |
|
|
172
|
+
| `--verbose` / `-v` | off | Per-file progress |
|
|
173
|
+
| `--list-scans` | ā | Print all stored scans and exit |
|
|
174
|
+
| `--delete-scan ID` | ā | Delete scan by ID and exit |
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
## Project layout
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
myscanner/
|
|
182
|
+
āāā pyproject.toml ā packaging & entry points
|
|
183
|
+
āāā install.sh ā macOS/Linux installer
|
|
184
|
+
āāā install.ps1 ā Windows PowerShell installer
|
|
185
|
+
āāā file_scanner.py ā backward-compat shim
|
|
186
|
+
āāā file_scanner_gui.py ā backward-compat shim
|
|
187
|
+
ā
|
|
188
|
+
āāā valscanner/
|
|
189
|
+
āāā cli.py ā CLI entry point
|
|
190
|
+
āāā core/ ā zero Qt dependencies
|
|
191
|
+
ā āāā schema.py
|
|
192
|
+
ā āāā categories.py
|
|
193
|
+
ā āāā metadata.py
|
|
194
|
+
ā āāā tagging.py
|
|
195
|
+
ā āāā scanner.py
|
|
196
|
+
ā āāā similarity.py
|
|
197
|
+
ā āāā export.py
|
|
198
|
+
ā āāā db.py
|
|
199
|
+
āāā gui/ ā PySide6 front-end
|
|
200
|
+
āāā constants.py
|
|
201
|
+
āāā workers.py
|
|
202
|
+
āāā models.py
|
|
203
|
+
āāā delegates.py
|
|
204
|
+
āāā dialogs.py
|
|
205
|
+
āāā window.py
|
|
206
|
+
āāā panels/
|
|
207
|
+
āāā detail.py
|
|
208
|
+
āāā folders.py
|
|
209
|
+
āāā similar.py
|
|
210
|
+
āāā scans.py
|
|
211
|
+
āāā console.py
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Database schema
|
|
217
|
+
|
|
218
|
+
Every `.db` file contains five tables:
|
|
219
|
+
|
|
220
|
+
| Table | Contents |
|
|
221
|
+
|---|---|
|
|
222
|
+
| `scans` | One row per scan session (label, root, file count, total size) |
|
|
223
|
+
| `files` | One row per file; `extra_meta` is a JSON blob of rich metadata |
|
|
224
|
+
| `folders` | Cumulative byte/file counts for every ancestor directory |
|
|
225
|
+
| `thumbnails` | JPEG blobs keyed by `file_id` |
|
|
226
|
+
| `media_samples` | Low-quality audio/video clips keyed by `file_id` |
|
|
227
|
+
| `files_fts` | FTS5 virtual table over `files`; populated by triggers |
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Development
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
# Editable install with all extras
|
|
235
|
+
pip install -e ".[rich]"
|
|
236
|
+
|
|
237
|
+
# Run directly without installing
|
|
238
|
+
python -m valscanner.gui.window # GUI
|
|
239
|
+
python -m valscanner.cli # CLI (add args after --)
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
Output files (`*.db`, `*.csv`, `*.json`) are gitignored.
|
|
243
|
+
|
|
244
|
+
---
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# ValScanner
|
|
2
|
+
|
|
3
|
+
A recursive file scanner with rich metadata extraction, full-text search, tagging, thumbnail generation, and duplicate/similar-folder detection ā available as both a CLI tool and a PySide6 GUI.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Recursive scan** ā indexes every file under a root directory into SQLite
|
|
10
|
+
- **Rich metadata** ā image EXIF, audio tags, PDF page count (optional deps)
|
|
11
|
+
- **Thumbnails** ā JPEG blobs stored in the database; displayed in the GUI grid view
|
|
12
|
+
- **Tagging** ā rule-based tags derived from path, filename, size, and extension
|
|
13
|
+
- **Full-text search** ā FTS5 virtual table over filenames and paths
|
|
14
|
+
- **Similar-folder detection** ā pairwise comparison using filename Jaccard + extension cosine + size ratio + SHA-256 Jaccard
|
|
15
|
+
- **Export** ā CSV and JSON export from the CLI
|
|
16
|
+
- **Multiple scan sessions** ā each scan stored with a label; list or delete past scans
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Requirements
|
|
21
|
+
|
|
22
|
+
| Requirement | Version |
|
|
23
|
+
|---|---|
|
|
24
|
+
| Python | 3.8+ |
|
|
25
|
+
| PySide6 | any recent release |
|
|
26
|
+
| Pillow *(optional)* | image EXIF + thumbnails |
|
|
27
|
+
| mutagen *(optional)* | audio metadata |
|
|
28
|
+
| PyPDF2 *(optional)* | PDF page count |
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
### macOS / Linux
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# Clone the repo
|
|
38
|
+
git clone <repo-url> myscanner
|
|
39
|
+
cd myscanner
|
|
40
|
+
|
|
41
|
+
# Run the installer (creates .venv, installs all deps, links launchers)
|
|
42
|
+
bash install.sh
|
|
43
|
+
|
|
44
|
+
# Or skip optional rich-metadata deps
|
|
45
|
+
bash install.sh --no-rich
|
|
46
|
+
|
|
47
|
+
# Or install into your active Python environment (no venv)
|
|
48
|
+
bash install.sh --no-venv
|
|
49
|
+
|
|
50
|
+
# Or specify where launcher scripts go
|
|
51
|
+
bash install.sh --prefix ~/.local
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The installer places `valscanner` and `valscanner-gui` symlinks in `~/.local/bin` (Linux) or `/usr/local/bin` (macOS, if writable).
|
|
55
|
+
|
|
56
|
+
### Windows (PowerShell)
|
|
57
|
+
|
|
58
|
+
```powershell
|
|
59
|
+
# Clone the repo
|
|
60
|
+
git clone <repo-url> myscanner
|
|
61
|
+
cd myscanner
|
|
62
|
+
|
|
63
|
+
# Run the installer (creates .venv, installs all deps, writes .cmd launchers)
|
|
64
|
+
.\install.ps1
|
|
65
|
+
|
|
66
|
+
# Skip optional rich-metadata deps
|
|
67
|
+
.\install.ps1 -NoRich
|
|
68
|
+
|
|
69
|
+
# Install into your active Python environment (no venv)
|
|
70
|
+
.\install.ps1 -NoVenv
|
|
71
|
+
|
|
72
|
+
# Specify launcher directory
|
|
73
|
+
.\install.ps1 -Prefix "C:\Tools\ValScanner"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Launchers are written as `.cmd` files to `%LOCALAPPDATA%\Programs\ValScanner\bin` by default. The script offers to add that directory to your user `PATH`.
|
|
77
|
+
|
|
78
|
+
### Manual (pip)
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Minimal ā GUI only, no image/audio/PDF metadata
|
|
82
|
+
pip install -e .
|
|
83
|
+
|
|
84
|
+
# Full ā all optional metadata extractors
|
|
85
|
+
pip install -e ".[rich]"
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Usage
|
|
91
|
+
|
|
92
|
+
### GUI
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
valscanner-gui
|
|
96
|
+
# or
|
|
97
|
+
python -m valscanner.gui.window
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### CLI
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Scan a directory
|
|
104
|
+
valscanner /path/to/scan
|
|
105
|
+
|
|
106
|
+
# With options
|
|
107
|
+
valscanner /path/to/scan --db my.db --no-hash --export-csv --export-json --verbose
|
|
108
|
+
|
|
109
|
+
# List past scans
|
|
110
|
+
valscanner --list-scans --db my.db
|
|
111
|
+
|
|
112
|
+
# Delete a scan by ID
|
|
113
|
+
valscanner --delete-scan 3 --db my.db
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Full flag reference:
|
|
117
|
+
|
|
118
|
+
| Flag | Default | Description |
|
|
119
|
+
|---|---|---|
|
|
120
|
+
| `--db PATH` | `scanner.db` | SQLite database file |
|
|
121
|
+
| `--no-hash` | off | Skip SHA-256 (faster) |
|
|
122
|
+
| `--export-csv` | off | Write `<label>.csv` after scan |
|
|
123
|
+
| `--export-json` | off | Write `<label>.json` after scan |
|
|
124
|
+
| `--verbose` / `-v` | off | Per-file progress |
|
|
125
|
+
| `--list-scans` | ā | Print all stored scans and exit |
|
|
126
|
+
| `--delete-scan ID` | ā | Delete scan by ID and exit |
|
|
127
|
+
|
|
128
|
+
---
|
|
129
|
+
|
|
130
|
+
## Project layout
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
myscanner/
|
|
134
|
+
āāā pyproject.toml ā packaging & entry points
|
|
135
|
+
āāā install.sh ā macOS/Linux installer
|
|
136
|
+
āāā install.ps1 ā Windows PowerShell installer
|
|
137
|
+
āāā file_scanner.py ā backward-compat shim
|
|
138
|
+
āāā file_scanner_gui.py ā backward-compat shim
|
|
139
|
+
ā
|
|
140
|
+
āāā valscanner/
|
|
141
|
+
āāā cli.py ā CLI entry point
|
|
142
|
+
āāā core/ ā zero Qt dependencies
|
|
143
|
+
ā āāā schema.py
|
|
144
|
+
ā āāā categories.py
|
|
145
|
+
ā āāā metadata.py
|
|
146
|
+
ā āāā tagging.py
|
|
147
|
+
ā āāā scanner.py
|
|
148
|
+
ā āāā similarity.py
|
|
149
|
+
ā āāā export.py
|
|
150
|
+
ā āāā db.py
|
|
151
|
+
āāā gui/ ā PySide6 front-end
|
|
152
|
+
āāā constants.py
|
|
153
|
+
āāā workers.py
|
|
154
|
+
āāā models.py
|
|
155
|
+
āāā delegates.py
|
|
156
|
+
āāā dialogs.py
|
|
157
|
+
āāā window.py
|
|
158
|
+
āāā panels/
|
|
159
|
+
āāā detail.py
|
|
160
|
+
āāā folders.py
|
|
161
|
+
āāā similar.py
|
|
162
|
+
āāā scans.py
|
|
163
|
+
āāā console.py
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## Database schema
|
|
169
|
+
|
|
170
|
+
Every `.db` file contains five tables:
|
|
171
|
+
|
|
172
|
+
| Table | Contents |
|
|
173
|
+
|---|---|
|
|
174
|
+
| `scans` | One row per scan session (label, root, file count, total size) |
|
|
175
|
+
| `files` | One row per file; `extra_meta` is a JSON blob of rich metadata |
|
|
176
|
+
| `folders` | Cumulative byte/file counts for every ancestor directory |
|
|
177
|
+
| `thumbnails` | JPEG blobs keyed by `file_id` |
|
|
178
|
+
| `media_samples` | Low-quality audio/video clips keyed by `file_id` |
|
|
179
|
+
| `files_fts` | FTS5 virtual table over `files`; populated by triggers |
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Development
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
# Editable install with all extras
|
|
187
|
+
pip install -e ".[rich]"
|
|
188
|
+
|
|
189
|
+
# Run directly without installing
|
|
190
|
+
python -m valscanner.gui.window # GUI
|
|
191
|
+
python -m valscanner.cli # CLI (add args after --)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
Output files (`*.db`, `*.csv`, `*.json`) are gitignored.
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## License
|
|
199
|
+
|
|
200
|
+
MIT
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "valscanner"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Recursive file scanner with metadata, tagging, and duplicate detection"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [{ name = "Abdalrahman Valabji", email = "valabji@gmail.com" }]
|
|
12
|
+
requires-python = ">=3.8"
|
|
13
|
+
keywords = ["scanner", "files", "metadata", "duplicates", "sqlite", "gui"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.8",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
"Programming Language :: Python :: 3.11",
|
|
22
|
+
"Programming Language :: Python :: 3.12",
|
|
23
|
+
"Topic :: Utilities",
|
|
24
|
+
"Topic :: System :: Filesystems",
|
|
25
|
+
"Environment :: X11 Applications :: Qt",
|
|
26
|
+
]
|
|
27
|
+
dependencies = [
|
|
28
|
+
"PySide6",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
rich = [
|
|
33
|
+
"Pillow",
|
|
34
|
+
"mutagen",
|
|
35
|
+
"PyPDF2",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
[project.scripts]
|
|
39
|
+
valscanner = "valscanner.cli:main"
|
|
40
|
+
valscanner-gui = "valscanner.gui.window:main"
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.packages.find]
|
|
43
|
+
where = ["."]
|
|
44
|
+
include = ["valscanner*"]
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
"""
|
|
4
|
+
ValScanner CLI ā scan a directory and build a searchable file database.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
valscanner /path/to/scan [options]
|
|
8
|
+
valscanner --list-scans --db my.db
|
|
9
|
+
valscanner --delete-scan 3 --db my.db
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import sys
|
|
13
|
+
import time
|
|
14
|
+
import argparse
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from .core.metadata import PIL_AVAILABLE, MUTAGEN_AVAILABLE, PYPDF_AVAILABLE
|
|
18
|
+
from .core.scanner import scan
|
|
19
|
+
from .core.export import export_csv, export_json
|
|
20
|
+
from .core.db import query_db, print_summary, list_scans, delete_scan
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main() -> None:
|
|
24
|
+
parser = argparse.ArgumentParser(
|
|
25
|
+
description="Scan a directory and build a searchable file database.",
|
|
26
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument("path", nargs="?", help="Root directory to scan")
|
|
29
|
+
parser.add_argument("--db", default="file_index.db",
|
|
30
|
+
help="SQLite database path (default: file_index.db)")
|
|
31
|
+
parser.add_argument("--label", metavar="NAME", default="",
|
|
32
|
+
help="Human-readable label for this scan")
|
|
33
|
+
parser.add_argument("--export-csv", action="store_true", help="Export results to CSV")
|
|
34
|
+
parser.add_argument("--export-json", action="store_true", help="Export results to JSON")
|
|
35
|
+
parser.add_argument("--no-hash", action="store_true", help="Skip SHA-256 hashing")
|
|
36
|
+
parser.add_argument("--verbose", action="store_true", help="Print each file as indexed")
|
|
37
|
+
parser.add_argument("--query", metavar="TERM", help="Query the database after scanning")
|
|
38
|
+
parser.add_argument("--list-scans", action="store_true", help="List all scans in the database")
|
|
39
|
+
parser.add_argument("--delete-scan", type=int, metavar="ID", help="Delete a scan by ID")
|
|
40
|
+
args = parser.parse_args()
|
|
41
|
+
|
|
42
|
+
if args.list_scans:
|
|
43
|
+
scans = list_scans(args.db)
|
|
44
|
+
if not scans:
|
|
45
|
+
print("No scans in database.")
|
|
46
|
+
for s in scans:
|
|
47
|
+
print(f" [{s['id']:3d}] {s['label'] or s['root']:40s} "
|
|
48
|
+
f"{s['file_count']:>8,} files {s['total_human']:>10s} {s['scanned_at']}")
|
|
49
|
+
sys.exit(0)
|
|
50
|
+
|
|
51
|
+
if args.delete_scan is not None:
|
|
52
|
+
delete_scan(args.db, args.delete_scan)
|
|
53
|
+
print(f"ā Scan {args.delete_scan} deleted.")
|
|
54
|
+
sys.exit(0)
|
|
55
|
+
|
|
56
|
+
if not args.path:
|
|
57
|
+
parser.error("path is required unless using --list-scans or --delete-scan")
|
|
58
|
+
|
|
59
|
+
root = Path(args.path).expanduser().resolve()
|
|
60
|
+
if not root.exists():
|
|
61
|
+
print(f"Error: path does not exist: {root}")
|
|
62
|
+
sys.exit(1)
|
|
63
|
+
|
|
64
|
+
print(f"\nš Scanning: {root}")
|
|
65
|
+
print(f" Database: {args.db}")
|
|
66
|
+
if args.label:
|
|
67
|
+
print(f" Label: {args.label}")
|
|
68
|
+
if not PIL_AVAILABLE:
|
|
69
|
+
print(" ā Pillow not installed ā image EXIF metadata skipped")
|
|
70
|
+
if not MUTAGEN_AVAILABLE:
|
|
71
|
+
print(" ā mutagen not installed ā audio metadata skipped")
|
|
72
|
+
if not PYPDF_AVAILABLE:
|
|
73
|
+
print(" ā PyPDF2 not installed ā PDF metadata skipped")
|
|
74
|
+
print()
|
|
75
|
+
|
|
76
|
+
t0 = time.time()
|
|
77
|
+
stats = scan(root, args.db, compute_hash=not args.no_hash,
|
|
78
|
+
verbose=args.verbose, label=args.label)
|
|
79
|
+
elapsed = time.time() - t0
|
|
80
|
+
|
|
81
|
+
print(f"\nā
Done in {elapsed:.1f}s ā "
|
|
82
|
+
f"scan #{stats['scan_id']}, "
|
|
83
|
+
f"{stats['scanned']:,} indexed, "
|
|
84
|
+
f"{stats['errors']:,} errors, "
|
|
85
|
+
f"{stats['skipped']:,} skipped")
|
|
86
|
+
|
|
87
|
+
print_summary(args.db)
|
|
88
|
+
|
|
89
|
+
if args.export_csv:
|
|
90
|
+
export_csv(args.db, args.db.replace(".db", ".csv"), scan_id=stats["scan_id"])
|
|
91
|
+
if args.export_json:
|
|
92
|
+
export_json(args.db, args.db.replace(".db", ".json"), scan_id=stats["scan_id"])
|
|
93
|
+
if args.query:
|
|
94
|
+
query_db(args.db, args.query)
|
|
95
|
+
|
|
96
|
+
print(f" š” Run with --query photos, --list-scans, or open the GUI:\n"
|
|
97
|
+
f" valscanner-gui\n")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
main()
|
|
File without changes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
MIME_CATEGORY: dict[str, str] = {
|
|
4
|
+
"image": "photo",
|
|
5
|
+
"video": "video",
|
|
6
|
+
"audio": "audio",
|
|
7
|
+
"text": "document",
|
|
8
|
+
"application": "application",
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
EXT_CATEGORY: dict[str, str] = {
|
|
12
|
+
# Documents
|
|
13
|
+
".pdf": "document", ".doc": "document", ".docx": "document",
|
|
14
|
+
".odt": "document", ".rtf": "document", ".txt": "document",
|
|
15
|
+
".md": "document", ".rst": "document", ".tex": "document",
|
|
16
|
+
# Spreadsheets
|
|
17
|
+
".xls": "spreadsheet", ".xlsx": "spreadsheet", ".ods": "spreadsheet",
|
|
18
|
+
".csv": "spreadsheet",
|
|
19
|
+
# Presentations
|
|
20
|
+
".ppt": "presentation", ".pptx": "presentation", ".odp": "presentation",
|
|
21
|
+
# Photos / images
|
|
22
|
+
".jpg": "photo", ".jpeg": "photo", ".png": "photo", ".gif": "photo",
|
|
23
|
+
".bmp": "photo", ".tiff": "photo", ".tif": "photo", ".webp": "photo",
|
|
24
|
+
".heic": "photo", ".heif": "photo", ".raw": "photo", ".cr2": "photo",
|
|
25
|
+
".nef": "photo", ".svg": "image",
|
|
26
|
+
# Video
|
|
27
|
+
".mp4": "video", ".mov": "video", ".avi": "video", ".mkv": "video",
|
|
28
|
+
".wmv": "video", ".flv": "video", ".webm": "video", ".m4v": "video",
|
|
29
|
+
# Audio
|
|
30
|
+
".mp3": "audio", ".flac": "audio", ".wav": "audio", ".aac": "audio",
|
|
31
|
+
".ogg": "audio", ".m4a": "audio", ".wma": "audio",
|
|
32
|
+
# Code
|
|
33
|
+
".py": "code", ".js": "code", ".ts": "code", ".java": "code",
|
|
34
|
+
".c": "code", ".cpp": "code", ".h": "code", ".cs": "code",
|
|
35
|
+
".go": "code", ".rs": "code", ".rb": "code", ".php": "code",
|
|
36
|
+
".sh": "code", ".bat": "code", ".ps1": "code",
|
|
37
|
+
# Data
|
|
38
|
+
".json": "data", ".xml": "data", ".yaml": "data", ".yml": "data",
|
|
39
|
+
".toml": "data", ".ini": "data", ".sql": "data",
|
|
40
|
+
# Archives
|
|
41
|
+
".zip": "archive", ".tar": "archive", ".gz": "archive", ".bz2": "archive",
|
|
42
|
+
".7z": "archive", ".rar": "archive", ".xz": "archive",
|
|
43
|
+
# Executables / installers
|
|
44
|
+
".exe": "executable", ".msi": "executable", ".dmg": "executable",
|
|
45
|
+
".deb": "executable", ".rpm": "executable", ".appimage": "executable",
|
|
46
|
+
# Fonts
|
|
47
|
+
".ttf": "font", ".otf": "font", ".woff": "font", ".woff2": "font",
|
|
48
|
+
# Ebooks
|
|
49
|
+
".epub": "ebook", ".mobi": "ebook", ".azw": "ebook",
|
|
50
|
+
}
|