truthound-dashboard 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/deps.py +28 -0
- truthound_dashboard/api/drift.py +1 -0
- truthound_dashboard/api/mask.py +164 -0
- truthound_dashboard/api/profile.py +11 -3
- truthound_dashboard/api/router.py +22 -0
- truthound_dashboard/api/scan.py +168 -0
- truthound_dashboard/api/schemas.py +13 -4
- truthound_dashboard/api/validations.py +33 -1
- truthound_dashboard/api/validators.py +85 -0
- truthound_dashboard/core/__init__.py +8 -0
- truthound_dashboard/core/phase5/activity.py +1 -1
- truthound_dashboard/core/services.py +457 -7
- truthound_dashboard/core/truthound_adapter.py +441 -26
- truthound_dashboard/db/__init__.py +6 -0
- truthound_dashboard/db/models.py +250 -1
- truthound_dashboard/schemas/__init__.py +52 -1
- truthound_dashboard/schemas/collaboration.py +1 -1
- truthound_dashboard/schemas/drift.py +118 -3
- truthound_dashboard/schemas/mask.py +209 -0
- truthound_dashboard/schemas/profile.py +45 -2
- truthound_dashboard/schemas/scan.py +312 -0
- truthound_dashboard/schemas/schema.py +30 -2
- truthound_dashboard/schemas/validation.py +60 -3
- truthound_dashboard/schemas/validators/__init__.py +59 -0
- truthound_dashboard/schemas/validators/aggregate_validators.py +238 -0
- truthound_dashboard/schemas/validators/anomaly_validators.py +723 -0
- truthound_dashboard/schemas/validators/base.py +263 -0
- truthound_dashboard/schemas/validators/completeness_validators.py +269 -0
- truthound_dashboard/schemas/validators/cross_table_validators.py +375 -0
- truthound_dashboard/schemas/validators/datetime_validators.py +253 -0
- truthound_dashboard/schemas/validators/distribution_validators.py +422 -0
- truthound_dashboard/schemas/validators/drift_validators.py +615 -0
- truthound_dashboard/schemas/validators/geospatial_validators.py +486 -0
- truthound_dashboard/schemas/validators/multi_column_validators.py +706 -0
- truthound_dashboard/schemas/validators/privacy_validators.py +531 -0
- truthound_dashboard/schemas/validators/query_validators.py +510 -0
- truthound_dashboard/schemas/validators/registry.py +318 -0
- truthound_dashboard/schemas/validators/schema_validators.py +408 -0
- truthound_dashboard/schemas/validators/string_validators.py +396 -0
- truthound_dashboard/schemas/validators/table_validators.py +412 -0
- truthound_dashboard/schemas/validators/uniqueness_validators.py +355 -0
- truthound_dashboard/schemas/validators.py +59 -0
- truthound_dashboard/static/assets/{index-BqXVFyqj.js → index-BCA8H1hO.js} +95 -95
- truthound_dashboard/static/assets/index-BNsSQ2fN.css +1 -0
- truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +1 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/METADATA +46 -11
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/RECORD +51 -27
- truthound_dashboard/static/assets/index-o8qHVDte.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-n_T3wZTf.js +0 -1
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/licenses/LICENSE +0 -0
truthound_dashboard/api/deps.py
CHANGED
|
@@ -22,6 +22,8 @@ from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
22
22
|
from truthound_dashboard.core import (
|
|
23
23
|
DriftService,
|
|
24
24
|
HistoryService,
|
|
25
|
+
MaskService,
|
|
26
|
+
PIIScanService,
|
|
25
27
|
ProfileService,
|
|
26
28
|
RuleService,
|
|
27
29
|
ScheduleService,
|
|
@@ -142,6 +144,30 @@ async def get_schedule_service(session: SessionDep) -> ScheduleService:
|
|
|
142
144
|
return ScheduleService(session)
|
|
143
145
|
|
|
144
146
|
|
|
147
|
+
async def get_pii_scan_service(session: SessionDep) -> PIIScanService:
|
|
148
|
+
"""Get PII scan service dependency.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
session: Database session.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
PIIScanService instance.
|
|
155
|
+
"""
|
|
156
|
+
return PIIScanService(session)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
async def get_mask_service(session: SessionDep) -> MaskService:
|
|
160
|
+
"""Get mask service dependency.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
session: Database session.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
MaskService instance.
|
|
167
|
+
"""
|
|
168
|
+
return MaskService(session)
|
|
169
|
+
|
|
170
|
+
|
|
145
171
|
# Type aliases for service dependencies
|
|
146
172
|
SourceServiceDep = Annotated[SourceService, Depends(get_source_service)]
|
|
147
173
|
ValidationServiceDep = Annotated[ValidationService, Depends(get_validation_service)]
|
|
@@ -151,3 +177,5 @@ RuleServiceDep = Annotated[RuleService, Depends(get_rule_service)]
|
|
|
151
177
|
HistoryServiceDep = Annotated[HistoryService, Depends(get_history_service)]
|
|
152
178
|
DriftServiceDep = Annotated[DriftService, Depends(get_drift_service)]
|
|
153
179
|
ScheduleServiceDep = Annotated[ScheduleService, Depends(get_schedule_service)]
|
|
180
|
+
PIIScanServiceDep = Annotated[PIIScanService, Depends(get_pii_scan_service)]
|
|
181
|
+
MaskServiceDep = Annotated[MaskService, Depends(get_mask_service)]
|
truthound_dashboard/api/drift.py
CHANGED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Data masking API endpoints.
|
|
2
|
+
|
|
3
|
+
Provides endpoints for running th.mask() operations with three strategies:
|
|
4
|
+
- redact: Replace values with asterisks
|
|
5
|
+
- hash: Replace values with SHA256 hash
|
|
6
|
+
- fake: Replace values with realistic fake data
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from fastapi import APIRouter, HTTPException, status
|
|
12
|
+
|
|
13
|
+
from truthound_dashboard.schemas import (
|
|
14
|
+
MaskListItem,
|
|
15
|
+
MaskListResponse,
|
|
16
|
+
MaskRequest,
|
|
17
|
+
MaskResponse,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .deps import MaskServiceDep
|
|
21
|
+
|
|
22
|
+
router = APIRouter(prefix="/masks", tags=["masks"])
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@router.post(
|
|
26
|
+
"/sources/{source_id}/mask",
|
|
27
|
+
response_model=MaskResponse,
|
|
28
|
+
status_code=status.HTTP_201_CREATED,
|
|
29
|
+
summary="Run data masking",
|
|
30
|
+
description="""
|
|
31
|
+
Run data masking on a source using th.mask().
|
|
32
|
+
|
|
33
|
+
Supports three masking strategies:
|
|
34
|
+
- **redact**: Replace values with asterisks (default)
|
|
35
|
+
- **hash**: Replace values with SHA256 hash (deterministic, can be used for joins)
|
|
36
|
+
- **fake**: Replace values with realistic fake data
|
|
37
|
+
|
|
38
|
+
If `columns` is not specified, PII columns are auto-detected.
|
|
39
|
+
""",
|
|
40
|
+
)
|
|
41
|
+
async def run_mask(
|
|
42
|
+
source_id: str,
|
|
43
|
+
request: MaskRequest,
|
|
44
|
+
service: MaskServiceDep,
|
|
45
|
+
) -> MaskResponse:
|
|
46
|
+
"""Run data masking on a source.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
source_id: Source ID to mask.
|
|
50
|
+
request: Masking options.
|
|
51
|
+
service: Mask service dependency.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
MaskResponse with operation details.
|
|
55
|
+
|
|
56
|
+
Raises:
|
|
57
|
+
HTTPException: If source not found or masking fails.
|
|
58
|
+
"""
|
|
59
|
+
try:
|
|
60
|
+
mask = await service.run_mask(
|
|
61
|
+
source_id,
|
|
62
|
+
columns=request.columns,
|
|
63
|
+
strategy=request.strategy,
|
|
64
|
+
output_format=request.output_format,
|
|
65
|
+
)
|
|
66
|
+
return MaskResponse.from_db(mask)
|
|
67
|
+
except ValueError as e:
|
|
68
|
+
raise HTTPException(
|
|
69
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
70
|
+
detail=str(e),
|
|
71
|
+
) from e
|
|
72
|
+
except Exception as e:
|
|
73
|
+
raise HTTPException(
|
|
74
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
75
|
+
detail=f"Masking failed: {e}",
|
|
76
|
+
) from e
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@router.get(
|
|
80
|
+
"/{mask_id}",
|
|
81
|
+
response_model=MaskResponse,
|
|
82
|
+
summary="Get mask operation by ID",
|
|
83
|
+
)
|
|
84
|
+
async def get_mask(
|
|
85
|
+
mask_id: str,
|
|
86
|
+
service: MaskServiceDep,
|
|
87
|
+
) -> MaskResponse:
|
|
88
|
+
"""Get a masking operation by ID.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
mask_id: Mask operation ID.
|
|
92
|
+
service: Mask service dependency.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
MaskResponse with operation details.
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
HTTPException: If mask operation not found.
|
|
99
|
+
"""
|
|
100
|
+
mask = await service.get_mask(mask_id)
|
|
101
|
+
if mask is None:
|
|
102
|
+
raise HTTPException(
|
|
103
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
104
|
+
detail=f"Mask operation '{mask_id}' not found",
|
|
105
|
+
)
|
|
106
|
+
return MaskResponse.from_db(mask)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@router.get(
|
|
110
|
+
"/sources/{source_id}/masks",
|
|
111
|
+
response_model=MaskListResponse,
|
|
112
|
+
summary="List mask operations for a source",
|
|
113
|
+
)
|
|
114
|
+
async def list_masks(
|
|
115
|
+
source_id: str,
|
|
116
|
+
service: MaskServiceDep,
|
|
117
|
+
limit: int = 20,
|
|
118
|
+
) -> MaskListResponse:
|
|
119
|
+
"""List masking operations for a source.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
source_id: Source ID.
|
|
123
|
+
service: Mask service dependency.
|
|
124
|
+
limit: Maximum number of results (default: 20).
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
MaskListResponse with list of operations.
|
|
128
|
+
"""
|
|
129
|
+
masks = await service.list_for_source(source_id, limit=limit)
|
|
130
|
+
return MaskListResponse(
|
|
131
|
+
data=[MaskListItem.from_db(m) for m in masks],
|
|
132
|
+
total=len(masks),
|
|
133
|
+
limit=limit,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
@router.get(
|
|
138
|
+
"/sources/{source_id}/masks/latest",
|
|
139
|
+
response_model=MaskResponse,
|
|
140
|
+
summary="Get latest mask operation for a source",
|
|
141
|
+
)
|
|
142
|
+
async def get_latest_mask(
|
|
143
|
+
source_id: str,
|
|
144
|
+
service: MaskServiceDep,
|
|
145
|
+
) -> MaskResponse:
|
|
146
|
+
"""Get the most recent masking operation for a source.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
source_id: Source ID.
|
|
150
|
+
service: Mask service dependency.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
MaskResponse with operation details.
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
HTTPException: If no mask operations found for source.
|
|
157
|
+
"""
|
|
158
|
+
mask = await service.get_latest_for_source(source_id)
|
|
159
|
+
if mask is None:
|
|
160
|
+
raise HTTPException(
|
|
161
|
+
status_code=status.HTTP_404_NOT_FOUND,
|
|
162
|
+
detail=f"No mask operations found for source '{source_id}'",
|
|
163
|
+
)
|
|
164
|
+
return MaskResponse.from_db(mask)
|
|
@@ -9,7 +9,7 @@ from typing import Annotated
|
|
|
9
9
|
|
|
10
10
|
from fastapi import APIRouter, HTTPException, Path
|
|
11
11
|
|
|
12
|
-
from truthound_dashboard.schemas import ProfileResponse
|
|
12
|
+
from truthound_dashboard.schemas import ProfileRequest, ProfileResponse
|
|
13
13
|
|
|
14
14
|
from .deps import ProfileServiceDep, SourceServiceDep
|
|
15
15
|
|
|
@@ -20,12 +20,13 @@ router = APIRouter()
|
|
|
20
20
|
"/sources/{source_id}/profile",
|
|
21
21
|
response_model=ProfileResponse,
|
|
22
22
|
summary="Profile source",
|
|
23
|
-
description="Run data profiling on a source",
|
|
23
|
+
description="Run data profiling on a source with optional sampling",
|
|
24
24
|
)
|
|
25
25
|
async def profile_source(
|
|
26
26
|
service: ProfileServiceDep,
|
|
27
27
|
source_service: SourceServiceDep,
|
|
28
28
|
source_id: Annotated[str, Path(description="Source ID to profile")],
|
|
29
|
+
request: ProfileRequest | None = None,
|
|
29
30
|
) -> ProfileResponse:
|
|
30
31
|
"""Run data profiling on a source.
|
|
31
32
|
|
|
@@ -33,6 +34,7 @@ async def profile_source(
|
|
|
33
34
|
service: Injected profile service.
|
|
34
35
|
source_service: Injected source service.
|
|
35
36
|
source_id: Source to profile.
|
|
37
|
+
request: Optional profiling configuration with sample_size.
|
|
36
38
|
|
|
37
39
|
Returns:
|
|
38
40
|
Profiling result with column statistics.
|
|
@@ -45,8 +47,14 @@ async def profile_source(
|
|
|
45
47
|
if source is None:
|
|
46
48
|
raise HTTPException(status_code=404, detail="Source not found")
|
|
47
49
|
|
|
50
|
+
# Extract sample_size from request if provided
|
|
51
|
+
sample_size = request.sample_size if request else None
|
|
52
|
+
|
|
48
53
|
try:
|
|
49
|
-
result = await service.profile_source(
|
|
54
|
+
result = await service.profile_source(
|
|
55
|
+
source_id,
|
|
56
|
+
sample_size=sample_size,
|
|
57
|
+
)
|
|
50
58
|
return ProfileResponse.from_result(result)
|
|
51
59
|
except Exception as e:
|
|
52
60
|
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -10,13 +10,16 @@ from . import (
|
|
|
10
10
|
drift,
|
|
11
11
|
health,
|
|
12
12
|
history,
|
|
13
|
+
mask,
|
|
13
14
|
notifications,
|
|
14
15
|
profile,
|
|
15
16
|
rules,
|
|
17
|
+
scan,
|
|
16
18
|
schedules,
|
|
17
19
|
schemas,
|
|
18
20
|
sources,
|
|
19
21
|
validations,
|
|
22
|
+
validators,
|
|
20
23
|
# Phase 5
|
|
21
24
|
catalog,
|
|
22
25
|
collaboration,
|
|
@@ -57,6 +60,12 @@ api_router.include_router(
|
|
|
57
60
|
tags=["validations"],
|
|
58
61
|
)
|
|
59
62
|
|
|
63
|
+
# Validator registry endpoints
|
|
64
|
+
api_router.include_router(
|
|
65
|
+
validators.router,
|
|
66
|
+
tags=["validators"],
|
|
67
|
+
)
|
|
68
|
+
|
|
60
69
|
# Profiling endpoints
|
|
61
70
|
api_router.include_router(
|
|
62
71
|
profile.router,
|
|
@@ -75,6 +84,19 @@ api_router.include_router(
|
|
|
75
84
|
tags=["drift"],
|
|
76
85
|
)
|
|
77
86
|
|
|
87
|
+
# PII scan endpoints
|
|
88
|
+
api_router.include_router(
|
|
89
|
+
scan.router,
|
|
90
|
+
prefix="/scans",
|
|
91
|
+
tags=["pii-scan"],
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Data masking endpoints
|
|
95
|
+
api_router.include_router(
|
|
96
|
+
mask.router,
|
|
97
|
+
tags=["masks"],
|
|
98
|
+
)
|
|
99
|
+
|
|
78
100
|
# Schedule management endpoints (Phase 2)
|
|
79
101
|
api_router.include_router(
|
|
80
102
|
schedules.router,
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""PII scan API endpoints.
|
|
2
|
+
|
|
3
|
+
This module provides endpoints for running PII scans using th.scan().
|
|
4
|
+
Detects personally identifiable information and checks compliance with
|
|
5
|
+
privacy regulations (GDPR, CCPA, LGPD).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Annotated
|
|
11
|
+
|
|
12
|
+
from fastapi import APIRouter, HTTPException, Path, Query
|
|
13
|
+
|
|
14
|
+
from truthound_dashboard.schemas import (
|
|
15
|
+
PIIScanListItem,
|
|
16
|
+
PIIScanListResponse,
|
|
17
|
+
PIIScanRequest,
|
|
18
|
+
PIIScanResponse,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
from .deps import PIIScanServiceDep, SourceServiceDep
|
|
22
|
+
|
|
23
|
+
router = APIRouter()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@router.post(
|
|
27
|
+
"/sources/{source_id}/scan",
|
|
28
|
+
response_model=PIIScanResponse,
|
|
29
|
+
summary="Run PII scan",
|
|
30
|
+
description="Scan data source for personally identifiable information (PII)",
|
|
31
|
+
)
|
|
32
|
+
async def run_pii_scan(
|
|
33
|
+
service: PIIScanServiceDep,
|
|
34
|
+
source_id: Annotated[str, Path(description="Source ID to scan")],
|
|
35
|
+
request: PIIScanRequest,
|
|
36
|
+
) -> PIIScanResponse:
|
|
37
|
+
"""Run PII scan on a data source.
|
|
38
|
+
|
|
39
|
+
Supports all th.scan() parameters for maximum flexibility:
|
|
40
|
+
- columns: Specific columns to scan
|
|
41
|
+
- regulations: Privacy regulations to check (gdpr, ccpa, lgpd)
|
|
42
|
+
- min_confidence: Confidence threshold for PII detection
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
service: Injected PII scan service.
|
|
46
|
+
source_id: Source to scan.
|
|
47
|
+
request: Scan options.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
PII scan result with findings and violations.
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
HTTPException: 404 if source not found.
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
scan = await service.run_scan(
|
|
57
|
+
source_id,
|
|
58
|
+
columns=request.columns,
|
|
59
|
+
regulations=request.regulations,
|
|
60
|
+
min_confidence=request.min_confidence,
|
|
61
|
+
)
|
|
62
|
+
return PIIScanResponse.from_model(scan)
|
|
63
|
+
except ValueError as e:
|
|
64
|
+
raise HTTPException(status_code=404, detail=str(e))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@router.get(
|
|
68
|
+
"/{scan_id}",
|
|
69
|
+
response_model=PIIScanResponse,
|
|
70
|
+
summary="Get PII scan",
|
|
71
|
+
description="Get a specific PII scan result by ID",
|
|
72
|
+
)
|
|
73
|
+
async def get_pii_scan(
|
|
74
|
+
service: PIIScanServiceDep,
|
|
75
|
+
scan_id: Annotated[str, Path(description="Scan ID")],
|
|
76
|
+
) -> PIIScanResponse:
|
|
77
|
+
"""Get a specific PII scan result.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
service: Injected PII scan service.
|
|
81
|
+
scan_id: Scan unique identifier.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
PII scan details with findings and violations.
|
|
85
|
+
|
|
86
|
+
Raises:
|
|
87
|
+
HTTPException: 404 if scan not found.
|
|
88
|
+
"""
|
|
89
|
+
scan = await service.get_scan(scan_id)
|
|
90
|
+
if scan is None:
|
|
91
|
+
raise HTTPException(status_code=404, detail="PII scan not found")
|
|
92
|
+
return PIIScanResponse.from_model(scan)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@router.get(
|
|
96
|
+
"/sources/{source_id}/scans",
|
|
97
|
+
response_model=PIIScanListResponse,
|
|
98
|
+
summary="List source PII scans",
|
|
99
|
+
description="Get PII scan history for a source",
|
|
100
|
+
)
|
|
101
|
+
async def list_source_pii_scans(
|
|
102
|
+
service: PIIScanServiceDep,
|
|
103
|
+
source_service: SourceServiceDep,
|
|
104
|
+
source_id: Annotated[str, Path(description="Source ID")],
|
|
105
|
+
limit: Annotated[int, Query(ge=1, le=100, description="Maximum items")] = 20,
|
|
106
|
+
) -> PIIScanListResponse:
|
|
107
|
+
"""List PII scan history for a source.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
service: Injected PII scan service.
|
|
111
|
+
source_service: Injected source service.
|
|
112
|
+
source_id: Source to get scans for.
|
|
113
|
+
limit: Maximum scans to return.
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of PII scan summaries.
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
HTTPException: 404 if source not found.
|
|
120
|
+
"""
|
|
121
|
+
# Verify source exists
|
|
122
|
+
source = await source_service.get_by_id(source_id)
|
|
123
|
+
if source is None:
|
|
124
|
+
raise HTTPException(status_code=404, detail="Source not found")
|
|
125
|
+
|
|
126
|
+
scans = await service.list_for_source(source_id, limit=limit)
|
|
127
|
+
|
|
128
|
+
return PIIScanListResponse(
|
|
129
|
+
data=[PIIScanListItem.from_model(s) for s in scans],
|
|
130
|
+
total=len(scans),
|
|
131
|
+
limit=limit,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@router.get(
|
|
136
|
+
"/sources/{source_id}/scans/latest",
|
|
137
|
+
response_model=PIIScanResponse,
|
|
138
|
+
summary="Get latest PII scan",
|
|
139
|
+
description="Get the most recent PII scan for a source",
|
|
140
|
+
)
|
|
141
|
+
async def get_latest_pii_scan(
|
|
142
|
+
service: PIIScanServiceDep,
|
|
143
|
+
source_service: SourceServiceDep,
|
|
144
|
+
source_id: Annotated[str, Path(description="Source ID")],
|
|
145
|
+
) -> PIIScanResponse:
|
|
146
|
+
"""Get the most recent PII scan for a source.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
service: Injected PII scan service.
|
|
150
|
+
source_service: Injected source service.
|
|
151
|
+
source_id: Source to get latest scan for.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Latest PII scan result.
|
|
155
|
+
|
|
156
|
+
Raises:
|
|
157
|
+
HTTPException: 404 if source or scan not found.
|
|
158
|
+
"""
|
|
159
|
+
# Verify source exists
|
|
160
|
+
source = await source_service.get_by_id(source_id)
|
|
161
|
+
if source is None:
|
|
162
|
+
raise HTTPException(status_code=404, detail="Source not found")
|
|
163
|
+
|
|
164
|
+
scan = await service.get_latest_for_source(source_id)
|
|
165
|
+
if scan is None:
|
|
166
|
+
raise HTTPException(status_code=404, detail="No PII scans found for source")
|
|
167
|
+
|
|
168
|
+
return PIIScanResponse.from_model(scan)
|
|
@@ -59,7 +59,7 @@ async def get_schema(
|
|
|
59
59
|
"/sources/{source_id}/learn",
|
|
60
60
|
response_model=SchemaResponse,
|
|
61
61
|
summary="Learn schema",
|
|
62
|
-
description="Auto-learn schema from data source",
|
|
62
|
+
description="Auto-learn schema from data source with customizable inference options",
|
|
63
63
|
)
|
|
64
64
|
async def learn_schema(
|
|
65
65
|
service: SchemaServiceDep,
|
|
@@ -69,17 +69,24 @@ async def learn_schema(
|
|
|
69
69
|
) -> SchemaResponse:
|
|
70
70
|
"""Learn schema from a data source.
|
|
71
71
|
|
|
72
|
+
Analyzes the data source and generates a schema with inferred types
|
|
73
|
+
and optional constraints. Supports customization of categorical detection
|
|
74
|
+
and sampling for large datasets.
|
|
75
|
+
|
|
72
76
|
Args:
|
|
73
77
|
service: Injected schema service.
|
|
74
78
|
source_service: Injected source service.
|
|
75
79
|
source_id: Source to learn schema from.
|
|
76
|
-
request: Learning options
|
|
80
|
+
request: Learning options including:
|
|
81
|
+
- infer_constraints: Whether to infer min/max and allowed values
|
|
82
|
+
- categorical_threshold: Max unique values for categorical detection
|
|
83
|
+
- sample_size: Number of rows to sample for large datasets
|
|
77
84
|
|
|
78
85
|
Returns:
|
|
79
|
-
Learned schema.
|
|
86
|
+
Learned schema with column types and constraints.
|
|
80
87
|
|
|
81
88
|
Raises:
|
|
82
|
-
HTTPException: 404 if source not found.
|
|
89
|
+
HTTPException: 404 if source not found, 500 on learning error.
|
|
83
90
|
"""
|
|
84
91
|
# Verify source exists
|
|
85
92
|
source = await source_service.get_by_id(source_id)
|
|
@@ -90,6 +97,8 @@ async def learn_schema(
|
|
|
90
97
|
schema = await service.learn_schema(
|
|
91
98
|
source_id,
|
|
92
99
|
infer_constraints=request.infer_constraints,
|
|
100
|
+
categorical_threshold=request.categorical_threshold,
|
|
101
|
+
sample_size=request.sample_size,
|
|
93
102
|
)
|
|
94
103
|
return SchemaResponse.from_model(schema)
|
|
95
104
|
except Exception as e:
|
|
@@ -15,6 +15,7 @@ from truthound_dashboard.schemas import (
|
|
|
15
15
|
ValidationResponse,
|
|
16
16
|
ValidationRunRequest,
|
|
17
17
|
)
|
|
18
|
+
from truthound_dashboard.schemas.validators import configs_to_truthound_format
|
|
18
19
|
|
|
19
20
|
from .deps import SourceServiceDep, ValidationServiceDep
|
|
20
21
|
|
|
@@ -34,6 +35,17 @@ async def run_validation(
|
|
|
34
35
|
) -> ValidationResponse:
|
|
35
36
|
"""Run validation on a data source.
|
|
36
37
|
|
|
38
|
+
Supports all th.check() parameters for maximum flexibility:
|
|
39
|
+
- validators: Specific validators to run
|
|
40
|
+
- schema_path: Path to schema YAML file
|
|
41
|
+
- auto_schema: Auto-learn and cache schema
|
|
42
|
+
- columns: Specific columns to validate
|
|
43
|
+
- min_severity: Minimum severity to report
|
|
44
|
+
- strict: Raise exception on failures
|
|
45
|
+
- parallel: Use parallel execution
|
|
46
|
+
- max_workers: Max threads for parallel
|
|
47
|
+
- pushdown: Enable query pushdown for SQL
|
|
48
|
+
|
|
37
49
|
Args:
|
|
38
50
|
service: Injected validation service.
|
|
39
51
|
source_id: Source to validate.
|
|
@@ -46,11 +58,31 @@ async def run_validation(
|
|
|
46
58
|
HTTPException: 404 if source not found.
|
|
47
59
|
"""
|
|
48
60
|
try:
|
|
61
|
+
# Determine validators and params based on request mode
|
|
62
|
+
validators: list[str] | None = None
|
|
63
|
+
validator_params: dict | None = None
|
|
64
|
+
|
|
65
|
+
if request.validator_configs:
|
|
66
|
+
# Advanced mode: use validator_configs (takes precedence)
|
|
67
|
+
validators, validator_params = configs_to_truthound_format(
|
|
68
|
+
request.validator_configs
|
|
69
|
+
)
|
|
70
|
+
elif request.validators:
|
|
71
|
+
# Simple mode: use validator names list (backward compatible)
|
|
72
|
+
validators = request.validators
|
|
73
|
+
|
|
49
74
|
validation = await service.run_validation(
|
|
50
75
|
source_id,
|
|
51
|
-
validators=
|
|
76
|
+
validators=validators,
|
|
77
|
+
validator_params=validator_params,
|
|
52
78
|
schema_path=request.schema_path,
|
|
53
79
|
auto_schema=request.auto_schema,
|
|
80
|
+
columns=request.columns,
|
|
81
|
+
min_severity=request.min_severity,
|
|
82
|
+
strict=request.strict,
|
|
83
|
+
parallel=request.parallel,
|
|
84
|
+
max_workers=request.max_workers,
|
|
85
|
+
pushdown=request.pushdown,
|
|
54
86
|
)
|
|
55
87
|
return ValidationResponse.from_model(validation)
|
|
56
88
|
except ValueError as e:
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Validators API endpoints.
|
|
2
|
+
|
|
3
|
+
This module provides API endpoints for validator discovery and configuration.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from fastapi import APIRouter, Query
|
|
9
|
+
|
|
10
|
+
from ..schemas.validators import (
|
|
11
|
+
VALIDATOR_REGISTRY,
|
|
12
|
+
ValidatorCategory,
|
|
13
|
+
ValidatorDefinition,
|
|
14
|
+
get_validator_by_name,
|
|
15
|
+
get_validators_by_category,
|
|
16
|
+
search_validators,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
router = APIRouter()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@router.get(
|
|
23
|
+
"/validators",
|
|
24
|
+
response_model=list[ValidatorDefinition],
|
|
25
|
+
summary="List all validators",
|
|
26
|
+
description="Returns all available validators with their parameter definitions.",
|
|
27
|
+
)
|
|
28
|
+
async def list_validators(
|
|
29
|
+
category: ValidatorCategory | None = Query(
|
|
30
|
+
default=None, description="Filter by category"
|
|
31
|
+
),
|
|
32
|
+
search: str | None = Query(
|
|
33
|
+
default=None, description="Search by name, description, or tags"
|
|
34
|
+
),
|
|
35
|
+
) -> list[ValidatorDefinition]:
|
|
36
|
+
"""List all validators, optionally filtered.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
category: Optional category filter.
|
|
40
|
+
search: Optional search query.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of validator definitions.
|
|
44
|
+
"""
|
|
45
|
+
if search:
|
|
46
|
+
return search_validators(search)
|
|
47
|
+
if category:
|
|
48
|
+
return get_validators_by_category(category)
|
|
49
|
+
return VALIDATOR_REGISTRY
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@router.get(
|
|
53
|
+
"/validators/categories",
|
|
54
|
+
response_model=list[dict[str, str]],
|
|
55
|
+
summary="List validator categories",
|
|
56
|
+
description="Returns all validator categories with their labels.",
|
|
57
|
+
)
|
|
58
|
+
async def list_categories() -> list[dict[str, str]]:
|
|
59
|
+
"""List all validator categories.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
List of category objects with value and label.
|
|
63
|
+
"""
|
|
64
|
+
return [
|
|
65
|
+
{"value": c.value, "label": c.value.replace("_", " ").title()}
|
|
66
|
+
for c in ValidatorCategory
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@router.get(
|
|
71
|
+
"/validators/{name}",
|
|
72
|
+
response_model=ValidatorDefinition | None,
|
|
73
|
+
summary="Get validator by name",
|
|
74
|
+
description="Returns a single validator definition by its name.",
|
|
75
|
+
)
|
|
76
|
+
async def get_validator(name: str) -> ValidatorDefinition | None:
|
|
77
|
+
"""Get a validator by name.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
name: Validator name.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Validator definition if found.
|
|
84
|
+
"""
|
|
85
|
+
return get_validator_by_name(name)
|