vcti-path-format 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. vcti_path_format-1.2.0/LICENSE +8 -0
  2. vcti_path_format-1.2.0/PKG-INFO +274 -0
  3. vcti_path_format-1.2.0/README.md +257 -0
  4. vcti_path_format-1.2.0/pyproject.toml +41 -0
  5. vcti_path_format-1.2.0/setup.cfg +4 -0
  6. vcti_path_format-1.2.0/src/vcti/pathformat/__init__.py +29 -0
  7. vcti_path_format-1.2.0/src/vcti/pathformat/descriptor.py +94 -0
  8. vcti_path_format-1.2.0/src/vcti/pathformat/evaluator/__init__.py +29 -0
  9. vcti_path_format-1.2.0/src/vcti/pathformat/evaluator/base.py +87 -0
  10. vcti_path_format-1.2.0/src/vcti/pathformat/evaluator/heuristic.py +367 -0
  11. vcti_path_format-1.2.0/src/vcti/pathformat/feature_validator/__init__.py +23 -0
  12. vcti_path_format-1.2.0/src/vcti/pathformat/feature_validator/base.py +88 -0
  13. vcti_path_format-1.2.0/src/vcti/pathformat/feature_validator/extension.py +55 -0
  14. vcti_path_format-1.2.0/src/vcti/pathformat/feature_validator/magic_bytes.py +62 -0
  15. vcti_path_format-1.2.0/src/vcti/pathformat/feature_validator/registry.py +53 -0
  16. vcti_path_format-1.2.0/src/vcti/pathformat/identifier.py +178 -0
  17. vcti_path_format-1.2.0/src/vcti/pathformat/py.typed +0 -0
  18. vcti_path_format-1.2.0/src/vcti/pathformat/registry.py +30 -0
  19. vcti_path_format-1.2.0/src/vcti_path_format.egg-info/PKG-INFO +274 -0
  20. vcti_path_format-1.2.0/src/vcti_path_format.egg-info/SOURCES.txt +33 -0
  21. vcti_path_format-1.2.0/src/vcti_path_format.egg-info/dependency_links.txt +1 -0
  22. vcti_path_format-1.2.0/src/vcti_path_format.egg-info/requires.txt +9 -0
  23. vcti_path_format-1.2.0/src/vcti_path_format.egg-info/top_level.txt +1 -0
  24. vcti_path_format-1.2.0/src/vcti_path_format.egg-info/zip-safe +1 -0
  25. vcti_path_format-1.2.0/tests/test_descriptor.py +64 -0
  26. vcti_path_format-1.2.0/tests/test_evaluator_base.py +63 -0
  27. vcti_path_format-1.2.0/tests/test_extension_validator.py +62 -0
  28. vcti_path_format-1.2.0/tests/test_feature_validator_base.py +103 -0
  29. vcti_path_format-1.2.0/tests/test_heuristic_evaluator.py +291 -0
  30. vcti_path_format-1.2.0/tests/test_identifier.py +90 -0
  31. vcti_path_format-1.2.0/tests/test_integration.py +156 -0
  32. vcti_path_format-1.2.0/tests/test_magic_bytes_validator.py +76 -0
  33. vcti_path_format-1.2.0/tests/test_registry.py +44 -0
  34. vcti_path_format-1.2.0/tests/test_validator_registry.py +45 -0
  35. vcti_path_format-1.2.0/tests/test_version.py +15 -0
@@ -0,0 +1,8 @@
1
+ Copyright (c) 2018-2026 Visual Collaboration Technologies Inc.
2
+ All Rights Reserved.
3
+
4
+ This software is proprietary and confidential. Unauthorized copying,
5
+ distribution, or use of this software, via any medium, is strictly
6
+ prohibited. Access is granted only to authorized VCollab developers
7
+ and individuals explicitly authorized by Visual Collaboration
8
+ Technologies Inc.
@@ -0,0 +1,274 @@
1
+ Metadata-Version: 2.4
2
+ Name: vcti-path-format
3
+ Version: 1.2.0
4
+ Summary: File format identification framework with heuristic evaluators and feature validators for Python
5
+ Author: Visual Collaboration Technologies Inc.
6
+ Requires-Python: <3.15,>=3.12
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE
9
+ Requires-Dist: vcti-plugin-catalog>=1.0.1
10
+ Requires-Dist: vcti-lookup>=1.0.1
11
+ Provides-Extra: test
12
+ Requires-Dist: pytest; extra == "test"
13
+ Requires-Dist: pytest-cov; extra == "test"
14
+ Provides-Extra: lint
15
+ Requires-Dist: ruff; extra == "lint"
16
+ Dynamic: license-file
17
+
18
+ # Path Format
19
+
20
+ File format identification framework with heuristic evaluators and feature validators for Python.
21
+
22
+ ## Installation
23
+
24
+ ```bash
25
+ pip install vcti-path-format>=1.2.0
26
+ ```
27
+
28
+ ### In `pyproject.toml` dependencies
29
+
30
+ ```toml
31
+ dependencies = [
32
+ "vcti-path-format>=1.2.0",
33
+ ]
34
+ ```
35
+
36
+ ---
37
+
38
+ ## Quick Start
39
+
40
+ ```python
41
+ from pathlib import Path
42
+ from vcti.pathformat import (
43
+ FormatDescriptor,
44
+ FormatIdentifier,
45
+ FormatRegistry,
46
+ MatchConfidence,
47
+ )
48
+ from vcti.pathformat.evaluator import HeuristicEvaluator
49
+
50
+ # Define a format descriptor with validators
51
+ hdf5_descriptor = FormatDescriptor(
52
+ id="hdf5-file",
53
+ name="HDF5 File",
54
+ evaluator=(
55
+ HeuristicEvaluator()
56
+ .check_magic_bytes(b"\x89HDF\r\n\x1a\n") # GATE
57
+ .check_extension([".h5", ".hdf5", ".he5"]) # EVIDENCE
58
+ ),
59
+ attributes={"path_type": "file", "structure": "hdf5"},
60
+ )
61
+
62
+ # Register in a format registry
63
+ registry = FormatRegistry()
64
+ registry.register(hdf5_descriptor)
65
+
66
+ # Identify a file
67
+ identifier = FormatIdentifier(registry)
68
+ results = identifier.identify_file_format(Path("data.h5"))
69
+
70
+ for result in results:
71
+ print(f"{result.descriptor.name}: {result.confidence.name}")
72
+
73
+ # Get best match above a confidence threshold
74
+ best = identifier.get_best_match(
75
+ Path("data.h5"),
76
+ min_confidence=MatchConfidence.LIKELY,
77
+ )
78
+ ```
79
+
80
+ ---
81
+
82
+ ## Core Concepts
83
+
84
+ ### FormatDescriptor
85
+
86
+ Extends `Descriptor[Evaluator]` from vcti-plugin-catalog. Wraps an evaluator
87
+ with format metadata and attributes.
88
+
89
+ ### FormatRegistry
90
+
91
+ Extends `Registry[FormatDescriptor]`. Central catalog of known formats with
92
+ attribute-based filtering via `registry.lookup`.
93
+
94
+ ### FormatIdentifier
95
+
96
+ Evaluates a path against all (or filtered) registered formats and returns
97
+ results sorted by confidence.
98
+
99
+ ### HeuristicEvaluator
100
+
101
+ Builder-pattern evaluator that aggregates validation evidence:
102
+
103
+ ```python
104
+ evaluator = (
105
+ HeuristicEvaluator()
106
+ .check_magic_bytes(b"\x89PNG\r\n\x1a\n") # GATE
107
+ .check_extension([".png"]) # EVIDENCE
108
+ .add_validator(custom_validator) # Custom
109
+ )
110
+ ```
111
+
112
+ **Heuristic rules:**
113
+ - Failed GATE -> `CERTAINLY_NOT`
114
+ - All passed + GATE present -> `DEFINITE`
115
+ - All passed + no GATE -> `LIKELY`
116
+ - Some EVIDENCE failed -> `UNLIKELY`
117
+ - No validators -> `CANT_EVALUATE`
118
+
119
+ ### Feature Validators
120
+
121
+ | Validator | Role | Tier | Checks |
122
+ |-----------|------|------|--------|
123
+ | `MagicBytesValidator` | GATE | IDENTIFICATION | File signature bytes |
124
+ | `ExtensionValidator` | EVIDENCE | IDENTIFICATION | File extension |
125
+
126
+ Custom validators implement the `FeatureValidator` protocol.
127
+
128
+ ---
129
+
130
+ ## Validation Tiers
131
+
132
+ Control evaluation depth with `max_tier`:
133
+
134
+ | Tier | Cost | Examples |
135
+ |------|------|---------|
136
+ | `IDENTIFICATION` | Cheap | Magic bytes, file extension |
137
+ | `STRUCTURE` | Medium | Schema validation, header parsing |
138
+ | `SEMANTIC` | Expensive | Content analysis, business logic |
139
+
140
+ ```python
141
+ from vcti.pathformat import ValidationTier
142
+
143
+ # Only run cheap checks
144
+ results = identifier.identify_file_format(path, max_tier=ValidationTier.IDENTIFICATION)
145
+ ```
146
+
147
+ ---
148
+
149
+ ## Custom Validators
150
+
151
+ Implement the `FeatureValidator` protocol to add domain-specific checks:
152
+
153
+ ```python
154
+ from pathlib import Path
155
+ from vcti.pathformat.feature_validator import (
156
+ FeatureValidator,
157
+ ValidationResult,
158
+ ValidationTier,
159
+ ValidatorRole,
160
+ )
161
+
162
+ class HeaderValidator:
163
+ """Checks for a text header line in the first line of a file."""
164
+
165
+ id = "header-check"
166
+ description = "Header line validator"
167
+ role = ValidatorRole.EVIDENCE
168
+ tier = ValidationTier.STRUCTURE
169
+
170
+ def __init__(self, expected_header: str):
171
+ self.expected_header = expected_header
172
+
173
+ def validate(self, path: Path) -> ValidationResult:
174
+ try:
175
+ first_line = path.read_text(encoding="utf-8").split("\n", 1)[0]
176
+ is_passed = first_line.strip() == self.expected_header
177
+ except (OSError, UnicodeDecodeError):
178
+ is_passed = False
179
+ return ValidationResult(
180
+ validator_id=self.id,
181
+ role=self.role,
182
+ is_passed=is_passed,
183
+ details=f"Header {'matches' if is_passed else 'mismatch'}",
184
+ )
185
+
186
+ # Use with the builder pattern
187
+ evaluator = (
188
+ HeuristicEvaluator()
189
+ .check_extension([".csv"])
190
+ .add_validator(HeaderValidator("id,name,value"))
191
+ )
192
+ ```
193
+
194
+ ---
195
+
196
+ ## Evaluator Caching
197
+
198
+ `HeuristicEvaluator` includes an LRU cache keyed by `(path, max_tier)`:
199
+
200
+ ```python
201
+ # Default: 128 entries
202
+ evaluator = HeuristicEvaluator(cache_size=128)
203
+
204
+ # Disable caching
205
+ evaluator = HeuristicEvaluator(cache_size=0)
206
+
207
+ # Bypass cache for a single call
208
+ report = descriptor.evaluate(path, use_cache=False)
209
+
210
+ # Inspect and manage
211
+ info = evaluator.cache_info() # (hits, misses, maxsize, currsize) or None
212
+ evaluator.clear_cache()
213
+ ```
214
+
215
+ Cache entries become stale if file contents change. Call `clear_cache()` after
216
+ known file modifications, or pass `use_cache=False` for one-off re-evaluation.
217
+
218
+ ---
219
+
220
+ ## Pre-filtering with Rules
221
+
222
+ ```python
223
+ from vcti.lookup import Rule
224
+
225
+ # Only evaluate formats with structure="hdf5"
226
+ results = identifier.identify_file_format(
227
+ path,
228
+ rules=[Rule("structure", "==", "hdf5")],
229
+ )
230
+ ```
231
+
232
+ ---
233
+
234
+ ## Error Handling
235
+
236
+ The framework raises typed exceptions:
237
+
238
+ | Exception | When |
239
+ |-----------|------|
240
+ | `FileNotFoundError` | Path does not exist |
241
+ | `PathAccessError` | Path is not a file or directory, or cannot be read |
242
+ | `EvaluatorError` | Base class for evaluator errors |
243
+ | `ValidationError` | A validator raised an unexpected exception |
244
+ | `InvalidValidatorError` | Invalid validator passed to builder |
245
+
246
+ ```python
247
+ from vcti.pathformat import PathAccessError
248
+
249
+ try:
250
+ results = identifier.identify_file_format(path)
251
+ except FileNotFoundError:
252
+ print("File not found")
253
+ except PathAccessError as e:
254
+ print(f"Cannot access path: {e}")
255
+ ```
256
+
257
+ ---
258
+
259
+ ## Ecosystem
260
+
261
+ This package is the identification engine in a three-repo system:
262
+
263
+ | Package | Role |
264
+ |---------|------|
265
+ | **vcti-path-format** | Framework: evaluators, validators, registry, identifier |
266
+ | [vcti-path-format-attributes](https://pypi.org/project/vcti-path-format-attributes/) | Vocabulary: standardized attribute enums |
267
+ | [vcti-path-format-descriptors](https://pypi.org/project/vcti-path-format-descriptors/) | Built-in format definitions (HDF5, CAX, etc.) |
268
+
269
+ ---
270
+
271
+ ## Dependencies
272
+
273
+ - [vcti-plugin-catalog](https://pypi.org/project/vcti-plugin-catalog/) (>=1.0.1) — descriptor/registry framework
274
+ - [vcti-lookup](https://pypi.org/project/vcti-lookup/) (>=1.0.1) — attribute-based filtering
@@ -0,0 +1,257 @@
1
+ # Path Format
2
+
3
+ File format identification framework with heuristic evaluators and feature validators for Python.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install vcti-path-format>=1.2.0
9
+ ```
10
+
11
+ ### In `pyproject.toml` dependencies
12
+
13
+ ```toml
14
+ dependencies = [
15
+ "vcti-path-format>=1.2.0",
16
+ ]
17
+ ```
18
+
19
+ ---
20
+
21
+ ## Quick Start
22
+
23
+ ```python
24
+ from pathlib import Path
25
+ from vcti.pathformat import (
26
+ FormatDescriptor,
27
+ FormatIdentifier,
28
+ FormatRegistry,
29
+ MatchConfidence,
30
+ )
31
+ from vcti.pathformat.evaluator import HeuristicEvaluator
32
+
33
+ # Define a format descriptor with validators
34
+ hdf5_descriptor = FormatDescriptor(
35
+ id="hdf5-file",
36
+ name="HDF5 File",
37
+ evaluator=(
38
+ HeuristicEvaluator()
39
+ .check_magic_bytes(b"\x89HDF\r\n\x1a\n") # GATE
40
+ .check_extension([".h5", ".hdf5", ".he5"]) # EVIDENCE
41
+ ),
42
+ attributes={"path_type": "file", "structure": "hdf5"},
43
+ )
44
+
45
+ # Register in a format registry
46
+ registry = FormatRegistry()
47
+ registry.register(hdf5_descriptor)
48
+
49
+ # Identify a file
50
+ identifier = FormatIdentifier(registry)
51
+ results = identifier.identify_file_format(Path("data.h5"))
52
+
53
+ for result in results:
54
+ print(f"{result.descriptor.name}: {result.confidence.name}")
55
+
56
+ # Get best match above a confidence threshold
57
+ best = identifier.get_best_match(
58
+ Path("data.h5"),
59
+ min_confidence=MatchConfidence.LIKELY,
60
+ )
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Core Concepts
66
+
67
+ ### FormatDescriptor
68
+
69
+ Extends `Descriptor[Evaluator]` from vcti-plugin-catalog. Wraps an evaluator
70
+ with format metadata and attributes.
71
+
72
+ ### FormatRegistry
73
+
74
+ Extends `Registry[FormatDescriptor]`. Central catalog of known formats with
75
+ attribute-based filtering via `registry.lookup`.
76
+
77
+ ### FormatIdentifier
78
+
79
+ Evaluates a path against all (or filtered) registered formats and returns
80
+ results sorted by confidence.
81
+
82
+ ### HeuristicEvaluator
83
+
84
+ Builder-pattern evaluator that aggregates validation evidence:
85
+
86
+ ```python
87
+ evaluator = (
88
+ HeuristicEvaluator()
89
+ .check_magic_bytes(b"\x89PNG\r\n\x1a\n") # GATE
90
+ .check_extension([".png"]) # EVIDENCE
91
+ .add_validator(custom_validator) # Custom
92
+ )
93
+ ```
94
+
95
+ **Heuristic rules:**
96
+ - Failed GATE -> `CERTAINLY_NOT`
97
+ - All passed + GATE present -> `DEFINITE`
98
+ - All passed + no GATE -> `LIKELY`
99
+ - Some EVIDENCE failed -> `UNLIKELY`
100
+ - No validators -> `CANT_EVALUATE`
101
+
102
+ ### Feature Validators
103
+
104
+ | Validator | Role | Tier | Checks |
105
+ |-----------|------|------|--------|
106
+ | `MagicBytesValidator` | GATE | IDENTIFICATION | File signature bytes |
107
+ | `ExtensionValidator` | EVIDENCE | IDENTIFICATION | File extension |
108
+
109
+ Custom validators implement the `FeatureValidator` protocol.
110
+
111
+ ---
112
+
113
+ ## Validation Tiers
114
+
115
+ Control evaluation depth with `max_tier`:
116
+
117
+ | Tier | Cost | Examples |
118
+ |------|------|---------|
119
+ | `IDENTIFICATION` | Cheap | Magic bytes, file extension |
120
+ | `STRUCTURE` | Medium | Schema validation, header parsing |
121
+ | `SEMANTIC` | Expensive | Content analysis, business logic |
122
+
123
+ ```python
124
+ from vcti.pathformat import ValidationTier
125
+
126
+ # Only run cheap checks
127
+ results = identifier.identify_file_format(path, max_tier=ValidationTier.IDENTIFICATION)
128
+ ```
129
+
130
+ ---
131
+
132
+ ## Custom Validators
133
+
134
+ Implement the `FeatureValidator` protocol to add domain-specific checks:
135
+
136
+ ```python
137
+ from pathlib import Path
138
+ from vcti.pathformat.feature_validator import (
139
+ FeatureValidator,
140
+ ValidationResult,
141
+ ValidationTier,
142
+ ValidatorRole,
143
+ )
144
+
145
+ class HeaderValidator:
146
+ """Checks for a text header line in the first line of a file."""
147
+
148
+ id = "header-check"
149
+ description = "Header line validator"
150
+ role = ValidatorRole.EVIDENCE
151
+ tier = ValidationTier.STRUCTURE
152
+
153
+ def __init__(self, expected_header: str):
154
+ self.expected_header = expected_header
155
+
156
+ def validate(self, path: Path) -> ValidationResult:
157
+ try:
158
+ first_line = path.read_text(encoding="utf-8").split("\n", 1)[0]
159
+ is_passed = first_line.strip() == self.expected_header
160
+ except (OSError, UnicodeDecodeError):
161
+ is_passed = False
162
+ return ValidationResult(
163
+ validator_id=self.id,
164
+ role=self.role,
165
+ is_passed=is_passed,
166
+ details=f"Header {'matches' if is_passed else 'mismatch'}",
167
+ )
168
+
169
+ # Use with the builder pattern
170
+ evaluator = (
171
+ HeuristicEvaluator()
172
+ .check_extension([".csv"])
173
+ .add_validator(HeaderValidator("id,name,value"))
174
+ )
175
+ ```
176
+
177
+ ---
178
+
179
+ ## Evaluator Caching
180
+
181
+ `HeuristicEvaluator` includes an LRU cache keyed by `(path, max_tier)`:
182
+
183
+ ```python
184
+ # Default: 128 entries
185
+ evaluator = HeuristicEvaluator(cache_size=128)
186
+
187
+ # Disable caching
188
+ evaluator = HeuristicEvaluator(cache_size=0)
189
+
190
+ # Bypass cache for a single call
191
+ report = descriptor.evaluate(path, use_cache=False)
192
+
193
+ # Inspect and manage
194
+ info = evaluator.cache_info() # (hits, misses, maxsize, currsize) or None
195
+ evaluator.clear_cache()
196
+ ```
197
+
198
+ Cache entries become stale if file contents change. Call `clear_cache()` after
199
+ known file modifications, or pass `use_cache=False` for one-off re-evaluation.
200
+
201
+ ---
202
+
203
+ ## Pre-filtering with Rules
204
+
205
+ ```python
206
+ from vcti.lookup import Rule
207
+
208
+ # Only evaluate formats with structure="hdf5"
209
+ results = identifier.identify_file_format(
210
+ path,
211
+ rules=[Rule("structure", "==", "hdf5")],
212
+ )
213
+ ```
214
+
215
+ ---
216
+
217
+ ## Error Handling
218
+
219
+ The framework raises typed exceptions:
220
+
221
+ | Exception | When |
222
+ |-----------|------|
223
+ | `FileNotFoundError` | Path does not exist |
224
+ | `PathAccessError` | Path is not a file or directory, or cannot be read |
225
+ | `EvaluatorError` | Base class for evaluator errors |
226
+ | `ValidationError` | A validator raised an unexpected exception |
227
+ | `InvalidValidatorError` | Invalid validator passed to builder |
228
+
229
+ ```python
230
+ from vcti.pathformat import PathAccessError
231
+
232
+ try:
233
+ results = identifier.identify_file_format(path)
234
+ except FileNotFoundError:
235
+ print("File not found")
236
+ except PathAccessError as e:
237
+ print(f"Cannot access path: {e}")
238
+ ```
239
+
240
+ ---
241
+
242
+ ## Ecosystem
243
+
244
+ This package is the identification engine in a three-repo system:
245
+
246
+ | Package | Role |
247
+ |---------|------|
248
+ | **vcti-path-format** | Framework: evaluators, validators, registry, identifier |
249
+ | [vcti-path-format-attributes](https://pypi.org/project/vcti-path-format-attributes/) | Vocabulary: standardized attribute enums |
250
+ | [vcti-path-format-descriptors](https://pypi.org/project/vcti-path-format-descriptors/) | Built-in format definitions (HDF5, CAX, etc.) |
251
+
252
+ ---
253
+
254
+ ## Dependencies
255
+
256
+ - [vcti-plugin-catalog](https://pypi.org/project/vcti-plugin-catalog/) (>=1.0.1) — descriptor/registry framework
257
+ - [vcti-lookup](https://pypi.org/project/vcti-lookup/) (>=1.0.1) — attribute-based filtering
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "vcti-path-format"
7
+ version = "1.2.0"
8
+ description = "File format identification framework with heuristic evaluators and feature validators for Python"
9
+ readme = "README.md"
10
+ authors = [
11
+ {name = "Visual Collaboration Technologies Inc."}
12
+ ]
13
+ requires-python = ">=3.12,<3.15"
14
+ dependencies = [
15
+ "vcti-plugin-catalog>=1.0.1",
16
+ "vcti-lookup>=1.0.1",
17
+ ]
18
+
19
+ [project.optional-dependencies]
20
+ test = ["pytest", "pytest-cov"]
21
+ lint = ["ruff"]
22
+
23
+ [tool.setuptools.packages.find]
24
+ where = ["src"]
25
+ include = ["vcti.pathformat", "vcti.pathformat.*"]
26
+
27
+ [tool.setuptools.package-data]
28
+ "vcti.pathformat" = ["py.typed"]
29
+
30
+ [tool.setuptools]
31
+ zip-safe = true
32
+
33
+ [tool.pytest.ini_options]
34
+ addopts = "--cov=vcti.pathformat --cov-report=term-missing --cov-fail-under=95"
35
+
36
+ [tool.ruff]
37
+ target-version = "py312"
38
+ line-length = 99
39
+
40
+ [tool.ruff.lint]
41
+ select = ["E", "F", "W", "I", "UP"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,29 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """vcti.pathformat — File format identification framework with heuristic evaluators."""
4
+
5
+ from importlib.metadata import version
6
+
7
+ from .descriptor import FormatDescriptor
8
+ from .evaluator.base import EvaluationReport, Evaluator, MatchConfidence
9
+ from .evaluator.heuristic import PathAccessError
10
+ from .feature_validator.base import ValidationResult, ValidationTier
11
+ from .identifier import FormatIdentifier, IdentificationResult, identify_file_format
12
+ from .registry import FormatRegistry
13
+
14
+ __version__ = version("vcti-path-format")
15
+
16
+ __all__ = [
17
+ "__version__",
18
+ "EvaluationReport",
19
+ "Evaluator",
20
+ "FormatDescriptor",
21
+ "FormatIdentifier",
22
+ "FormatRegistry",
23
+ "IdentificationResult",
24
+ "MatchConfidence",
25
+ "PathAccessError",
26
+ "ValidationResult",
27
+ "ValidationTier",
28
+ "identify_file_format",
29
+ ]
@@ -0,0 +1,94 @@
1
+ # Copyright Visual Collaboration Technologies Inc. All Rights Reserved.
2
+ # See LICENSE for details.
3
+ """Format descriptor for supported file or folder formats.
4
+
5
+ Defines the FormatDescriptor class, which encapsulates metadata, validation logic,
6
+ and attributes for a specific data format.
7
+ """
8
+
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ from vcti.plugincatalog import Descriptor
13
+
14
+ from .evaluator.base import EvaluationReport, Evaluator, MatchConfidence
15
+ from .feature_validator.base import ValidationTier
16
+
17
+
18
+ class FormatDescriptor(Descriptor[Evaluator]):
19
+ """Describes a supported data format (file or folder).
20
+
21
+ Each FormatDescriptor instance defines the metadata and validation logic
22
+ for a specific data format. Extends the generic Descriptor with an Evaluator instance.
23
+
24
+ Args:
25
+ id: Unique identifier for the format (e.g., 'csv', 'hdf5-file').
26
+ name: Human-readable name for the format.
27
+ evaluator: Evaluator instance for determining match confidence.
28
+ description: Optional description of the format.
29
+ attributes: Optional format-specific attributes as key-value pairs.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ id: str,
35
+ name: str,
36
+ evaluator: Evaluator,
37
+ description: str | None = None,
38
+ attributes: dict[str, Any] | None = None,
39
+ ):
40
+ if not isinstance(evaluator, Evaluator):
41
+ raise TypeError(
42
+ f"evaluator must be an Evaluator instance, got {type(evaluator).__name__}"
43
+ )
44
+ super().__init__(
45
+ id=id,
46
+ name=name,
47
+ instance=evaluator,
48
+ description=description,
49
+ attributes=attributes,
50
+ )
51
+
52
+ @property
53
+ def evaluator(self) -> Evaluator:
54
+ """Get the evaluator instance."""
55
+ return self.instance
56
+
57
+ def evaluate(
58
+ self,
59
+ path: Path,
60
+ max_tier: ValidationTier = ValidationTier.SEMANTIC,
61
+ use_cache: bool = True,
62
+ ) -> EvaluationReport:
63
+ """Validate file features and evaluate match confidence.
64
+
65
+ Args:
66
+ path: Path to the file or folder to evaluate.
67
+ max_tier: Maximum validation tier to execute (inclusive).
68
+ use_cache: If True and evaluator supports caching, use cached results.
69
+
70
+ Returns:
71
+ The evaluation report for format matching.
72
+ """
73
+ return self.evaluator.evaluate(path, max_tier, use_cache=use_cache)
74
+
75
+ def evaluate_confidence(
76
+ self,
77
+ path: Path,
78
+ max_tier: ValidationTier = ValidationTier.SEMANTIC,
79
+ use_cache: bool = True,
80
+ ) -> MatchConfidence:
81
+ """Convenience wrapper returning only the confidence value.
82
+
83
+ Args:
84
+ path: Path to the file or folder to evaluate.
85
+ max_tier: Maximum validation tier to execute (inclusive).
86
+ use_cache: If True and evaluator supports caching, use cached results.
87
+
88
+ Returns:
89
+ The confidence level of the match.
90
+ """
91
+ return self.evaluate(path, max_tier, use_cache).confidence
92
+
93
+ def __repr__(self) -> str:
94
+ return f"FormatDescriptor(id={self.id!r}, name={self.name!r})"