unitysvc-services 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. unitysvc_services/__init__.py +4 -0
  2. unitysvc_services/api.py +421 -0
  3. unitysvc_services/cli.py +23 -0
  4. unitysvc_services/format_data.py +140 -0
  5. unitysvc_services/interactive_prompt.py +1132 -0
  6. unitysvc_services/list.py +216 -0
  7. unitysvc_services/models/__init__.py +71 -0
  8. unitysvc_services/models/base.py +1375 -0
  9. unitysvc_services/models/listing_data.py +118 -0
  10. unitysvc_services/models/listing_v1.py +56 -0
  11. unitysvc_services/models/provider_data.py +79 -0
  12. unitysvc_services/models/provider_v1.py +54 -0
  13. unitysvc_services/models/seller_data.py +120 -0
  14. unitysvc_services/models/seller_v1.py +42 -0
  15. unitysvc_services/models/service_data.py +114 -0
  16. unitysvc_services/models/service_v1.py +81 -0
  17. unitysvc_services/populate.py +207 -0
  18. unitysvc_services/publisher.py +1628 -0
  19. unitysvc_services/py.typed +0 -0
  20. unitysvc_services/query.py +688 -0
  21. unitysvc_services/scaffold.py +1103 -0
  22. unitysvc_services/schema/base.json +777 -0
  23. unitysvc_services/schema/listing_v1.json +1286 -0
  24. unitysvc_services/schema/provider_v1.json +952 -0
  25. unitysvc_services/schema/seller_v1.json +379 -0
  26. unitysvc_services/schema/service_v1.json +1306 -0
  27. unitysvc_services/test.py +965 -0
  28. unitysvc_services/unpublisher.py +505 -0
  29. unitysvc_services/update.py +287 -0
  30. unitysvc_services/utils.py +533 -0
  31. unitysvc_services/validator.py +731 -0
  32. unitysvc_services-0.1.24.dist-info/METADATA +184 -0
  33. unitysvc_services-0.1.24.dist-info/RECORD +37 -0
  34. unitysvc_services-0.1.24.dist-info/WHEEL +5 -0
  35. unitysvc_services-0.1.24.dist-info/entry_points.txt +3 -0
  36. unitysvc_services-0.1.24.dist-info/licenses/LICENSE +21 -0
  37. unitysvc_services-0.1.24.dist-info/top_level.txt +1 -0
@@ -0,0 +1,731 @@
1
+ """Data validation module for unitysvc_services."""
2
+
3
+ import json
4
+ import re
5
+ from pathlib import Path
6
+ from typing import Any
7
+ from urllib.parse import urlparse
8
+
9
+ import typer
10
+ from jinja2 import Environment, TemplateSyntaxError
11
+ from jsonschema.validators import Draft7Validator
12
+ from rich.console import Console
13
+
14
+ import unitysvc_services
15
+
16
+ from .utils import load_data_file as load_data_file_with_override
17
+
18
+
19
+ class DataValidationError(Exception):
20
+ """Exception raised when data validation fails."""
21
+
22
+ pass
23
+
24
+
25
+ class DataValidator:
26
+ """Validates data files against JSON schemas."""
27
+
28
+ def __init__(self, data_dir: Path, schema_dir: Path):
29
+ self.data_dir = data_dir
30
+ self.schema_dir = schema_dir
31
+ self.schemas: dict[str, dict[str, Any]] = {}
32
+ self.load_schemas()
33
+
34
+ def load_schemas(self) -> None:
35
+ """Load all JSON schemas from the schema directory."""
36
+ if not self.schema_dir.exists():
37
+ raise DataValidationError(
38
+ f"Schema directory not found: {self.schema_dir}\n"
39
+ f"This may indicate the package was not installed correctly. "
40
+ f"Please reinstall with: pip install --force-reinstall unitysvc-services"
41
+ )
42
+
43
+ schema_files = list(self.schema_dir.glob("*.json"))
44
+ if not schema_files:
45
+ raise DataValidationError(
46
+ f"No schema files (*.json) found in schema directory: {self.schema_dir}\n"
47
+ f"This may indicate the package was not installed correctly. "
48
+ f"Please reinstall with: pip install --force-reinstall unitysvc-services"
49
+ )
50
+
51
+ for schema_file in schema_files:
52
+ schema_name = schema_file.stem
53
+ try:
54
+ with open(schema_file, encoding="utf-8") as f:
55
+ schema = json.load(f)
56
+ self.schemas[schema_name] = schema
57
+ except Exception as e:
58
+ print(f"Error loading schema {schema_file}: {e}")
59
+
60
+ def is_url(self, value: str) -> bool:
61
+ """Check if a string is a valid URL."""
62
+ try:
63
+ result = urlparse(value)
64
+ return all([result.scheme, result.netloc])
65
+ except Exception:
66
+ return False
67
+
68
+ def find_union_fields(self, schema: dict[str, Any]) -> set[str]:
69
+ """Find fields that are Union[str, HttpUrl] types in the schema."""
70
+ union_fields: set[str] = set()
71
+
72
+ def traverse_schema(obj: Any, path: str = "") -> None:
73
+ if isinstance(obj, dict):
74
+ # Check for Union type with string and URL format
75
+ if "anyOf" in obj:
76
+ any_of = obj["anyOf"]
77
+ # Count non-null items for the check
78
+ non_null_items = [item for item in any_of if item.get("type") != "null"]
79
+ has_plain_string = any(
80
+ item.get("type") == "string" and "format" not in item for item in non_null_items
81
+ )
82
+ has_uri_string = any(
83
+ item.get("type") == "string" and item.get("format") == "uri" for item in non_null_items
84
+ )
85
+
86
+ # Check for Union[str, HttpUrl] or Union[str, HttpUrl, None]
87
+ if len(non_null_items) == 2 and has_plain_string and has_uri_string:
88
+ union_fields.add(path)
89
+
90
+ # Recursively check properties
91
+ if "properties" in obj:
92
+ for prop_name, prop_schema in obj["properties"].items():
93
+ new_path = f"{path}.{prop_name}" if path else prop_name
94
+ traverse_schema(prop_schema, new_path)
95
+
96
+ # Check other schema structures
97
+ for key, value in obj.items():
98
+ if key not in ["properties", "anyOf"] and isinstance(value, dict | list):
99
+ traverse_schema(value, path)
100
+
101
+ elif isinstance(obj, list):
102
+ for item in obj:
103
+ traverse_schema(item, path)
104
+
105
+ traverse_schema(schema)
106
+ return union_fields
107
+
108
+ def validate_file_references(self, data: dict[str, Any], file_path: Path, union_fields: set[str]) -> list[str]:
109
+ """
110
+ Validate that file references in Union[str, HttpUrl] fields exist.
111
+
112
+ Also validates that all file_path fields use relative paths.
113
+ """
114
+ errors: list[str] = []
115
+
116
+ def check_field(obj: Any, field_path: str, current_path: str = "") -> None:
117
+ if isinstance(obj, dict):
118
+ for key, value in obj.items():
119
+ new_path = f"{current_path}.{key}" if current_path else key
120
+
121
+ # Check if this field is a Union[str, HttpUrl] field
122
+ if (
123
+ new_path in union_fields
124
+ and value is not None
125
+ and isinstance(value, str)
126
+ and not self.is_url(value)
127
+ ):
128
+ # Empty string is not a valid file reference
129
+ if value == "":
130
+ errors.append(f"Empty string in field '{new_path}' is not a valid file reference or URL")
131
+ # It's a file reference, must be relative path
132
+ elif Path(value).is_absolute():
133
+ errors.append(
134
+ f"File reference '{value}' in field '{new_path}' "
135
+ f"must be a relative path, not an absolute path"
136
+ )
137
+ else:
138
+ referenced_file = file_path.parent / value
139
+ if not referenced_file.exists():
140
+ errors.append(
141
+ f"File reference '{value}' in field '{new_path}' "
142
+ f"does not exist at {referenced_file}"
143
+ )
144
+
145
+ # Check if this is a file_path field (regardless of schema type)
146
+ if key == "file_path" and isinstance(value, str):
147
+ # file_path fields must not be URLs (use external_url instead)
148
+ if self.is_url(value):
149
+ errors.append(
150
+ f"File path '{value}' in field '{new_path}' "
151
+ f"must not be a URL. Use 'external_url' field for URLs instead."
152
+ )
153
+ # All file_path fields must use relative paths
154
+ elif Path(value).is_absolute():
155
+ errors.append(
156
+ f"File path '{value}' in field '{new_path}' "
157
+ f"must be a relative path, not an absolute path"
158
+ )
159
+ # Check that the file exists
160
+ else:
161
+ referenced_file = file_path.parent / value
162
+ if not referenced_file.exists():
163
+ errors.append(
164
+ f"File reference '{value}' in field '{new_path}' "
165
+ f"does not exist at {referenced_file}"
166
+ )
167
+
168
+ # Recurse into nested objects
169
+ if isinstance(value, dict | list):
170
+ check_field(value, field_path, new_path)
171
+
172
+ elif isinstance(obj, list):
173
+ for i, item in enumerate(obj):
174
+ if isinstance(item, dict | list):
175
+ check_field(item, field_path, f"{current_path}[{i}]")
176
+
177
+ check_field(data, str(file_path))
178
+ return errors
179
+
180
+ def validate_name_consistency(self, data: dict[str, Any], file_path: Path, schema_name: str) -> list[str]:
181
+ """Validate that the name field matches the directory name."""
182
+ errors: list[str] = []
183
+
184
+ # Only validate files with a 'name' field
185
+ if "name" not in data:
186
+ return errors
187
+
188
+ name_value = data["name"]
189
+ if not isinstance(name_value, str):
190
+ return errors
191
+
192
+ # Determine expected directory name based on file type
193
+ if file_path.name in ["provider.json", "provider.toml"]:
194
+ # For provider.json, the directory should match the provider name
195
+ directory_name = file_path.parent.name
196
+ if self._normalize_name(name_value) != self._normalize_name(directory_name):
197
+ errors.append(
198
+ f"Provider name '{name_value}' does not match directory name '{directory_name}'. "
199
+ f"Expected directory name to match normalized provider name: '{self._normalize_name(name_value)}'"
200
+ )
201
+
202
+ elif file_path.name in ["service.json", "service.toml"]:
203
+ # For service.json, the service directory should match the service name
204
+ service_directory_name = file_path.parent.name
205
+ if self._normalize_name(name_value) != self._normalize_name(service_directory_name):
206
+ normalized_name = self._normalize_name(name_value)
207
+ errors.append(
208
+ f"Service name '{name_value}' does not match "
209
+ f"service directory name '{service_directory_name}'. "
210
+ f"Expected service directory name to match "
211
+ f"normalized service name: '{normalized_name}'"
212
+ )
213
+
214
+ return errors
215
+
216
+ def _normalize_name(self, name: str) -> str:
217
+ """Normalize a name for directory comparison."""
218
+ # Convert to lowercase and replace spaces/special chars with hyphens
219
+ normalized = re.sub(r"[^a-zA-Z0-9]+", "-", name.lower())
220
+ # Remove leading/trailing hyphens
221
+ normalized = normalized.strip("-")
222
+ return normalized
223
+
224
+ def validate_with_pydantic_model(self, data: dict[str, Any], schema_name: str) -> list[str]:
225
+ """
226
+ Validate data using Pydantic models for additional validation rules.
227
+
228
+ This complements JSON schema validation with Pydantic field validators
229
+ like name format validation.
230
+
231
+ Args:
232
+ data: The data to validate
233
+ schema_name: The schema name (e.g., 'provider_v1', 'seller_v1')
234
+
235
+ Returns:
236
+ List of validation error messages
237
+ """
238
+ from pydantic import BaseModel
239
+
240
+ from unitysvc_services.models import ListingV1, ProviderV1, SellerV1, ServiceV1
241
+
242
+ errors: list[str] = []
243
+
244
+ # Map schema names to Pydantic model classes
245
+ model_map: dict[str, type[BaseModel]] = {
246
+ "provider_v1": ProviderV1,
247
+ "seller_v1": SellerV1,
248
+ "service_v1": ServiceV1,
249
+ "listing_v1": ListingV1,
250
+ }
251
+
252
+ if schema_name not in model_map:
253
+ return errors # No Pydantic model for this schema
254
+
255
+ model_class = model_map[schema_name]
256
+
257
+ try:
258
+ # Validate using the Pydantic model
259
+ model_class.model_validate(data)
260
+
261
+ except Exception as e:
262
+ # Extract meaningful error message from Pydantic ValidationError
263
+ error_msg = str(e)
264
+ # Pydantic errors can be verbose, try to extract just the relevant part
265
+ if "validation error" in error_msg.lower():
266
+ errors.append(f"Pydantic validation error: {error_msg}")
267
+ else:
268
+ errors.append(error_msg)
269
+
270
+ return errors
271
+
272
+ def load_data_file(self, file_path: Path) -> tuple[dict[str, Any] | None, list[str]]:
273
+ """Load data from JSON or TOML file, automatically merging override files.
274
+
275
+ Uses load_data_file from utils which includes override file merging.
276
+ """
277
+ errors: list[str] = []
278
+
279
+ try:
280
+ data, _file_format = load_data_file_with_override(file_path)
281
+ return data, errors
282
+ except Exception as e:
283
+ format_name = {".json": "JSON", ".toml": "TOML"}.get(file_path.suffix, "data")
284
+ return None, [f"Failed to parse {format_name}: {e}"]
285
+
286
+ def validate_data_file(self, file_path: Path) -> tuple[bool, list[str]]:
287
+ """Validate a single data file (JSON or TOML)."""
288
+ errors: list[str] = []
289
+
290
+ data, load_errors = self.load_data_file(file_path)
291
+ if load_errors:
292
+ return False, load_errors
293
+
294
+ # data could be None if loading failed
295
+ if data is None:
296
+ return False, ["Failed to load data file"]
297
+
298
+ # Check for schema field
299
+ if "schema" not in data:
300
+ return False, ["Missing 'schema' field in data file"]
301
+
302
+ schema_name = data["schema"]
303
+
304
+ # Check if schema exists
305
+ if schema_name not in self.schemas:
306
+ return False, [f"Schema '{schema_name}' not found in schema directory"]
307
+
308
+ schema = self.schemas[schema_name]
309
+
310
+ # Validate against schema with format checking enabled
311
+ try:
312
+ validator = Draft7Validator(schema, format_checker=Draft7Validator.FORMAT_CHECKER)
313
+ validator.check_schema(schema) # Validate the schema itself
314
+ validation_errors = list(validator.iter_errors(data))
315
+ for error in validation_errors:
316
+ errors.append(f"Schema validation error: {error.message}")
317
+ if error.absolute_path:
318
+ errors.append(f" Path: {'.'.join(str(p) for p in error.absolute_path)}")
319
+ except Exception as e:
320
+ errors.append(f"Validation error: {e}")
321
+
322
+ # Also validate using Pydantic models for additional validation rules
323
+ pydantic_errors = self.validate_with_pydantic_model(data, schema_name)
324
+ errors.extend(pydantic_errors)
325
+
326
+ # Find Union[str, HttpUrl] fields and validate file references
327
+ union_fields = self.find_union_fields(schema)
328
+ file_ref_errors = self.validate_file_references(data, file_path, union_fields)
329
+ errors.extend(file_ref_errors)
330
+
331
+ # Validate name consistency with directory name
332
+ name_errors = self.validate_name_consistency(data, file_path, schema_name)
333
+ errors.extend(name_errors)
334
+
335
+ return len(errors) == 0, errors
336
+
337
+ def validate_jinja2_file(self, file_path: Path) -> tuple[bool, list[str]]:
338
+ """Validate a file with Jinja2 template syntax.
339
+
340
+ This validates any file ending with .j2 extension, including:
341
+ - .md.j2 (Jinja2 markdown templates)
342
+ - .py.j2 (Jinja2 Python code example templates)
343
+ - .js.j2 (Jinja2 JavaScript code example templates)
344
+ - .sh.j2 (Jinja2 shell script templates)
345
+ """
346
+ errors: list[str] = []
347
+
348
+ try:
349
+ with open(file_path, encoding="utf-8") as f:
350
+ content = f.read()
351
+
352
+ if not content.strip():
353
+ return True, []
354
+
355
+ # Validate Jinja2 syntax
356
+ try:
357
+ env = Environment()
358
+ env.parse(content)
359
+ except TemplateSyntaxError as e:
360
+ errors.append(f"Jinja2 syntax error: {e.message} at line {e.lineno}")
361
+ except Exception as e:
362
+ errors.append(f"Jinja2 validation error: {e}")
363
+
364
+ return len(errors) == 0, errors
365
+ except Exception as e:
366
+ return False, [f"Failed to read template file: {e}"]
367
+
368
+ def validate_seller_uniqueness(self) -> tuple[bool, list[str]]:
369
+ """
370
+ Validate that there is exactly one seller_v1 file in the data directory.
371
+
372
+ Each repository should have one and only one seller.json file using the seller_v1 schema.
373
+ """
374
+ errors: list[str] = []
375
+ seller_files: list[Path] = []
376
+
377
+ if not self.data_dir.exists():
378
+ return True, []
379
+
380
+ # Find all data files with seller_v1 schema
381
+ for file_path in self.data_dir.rglob("*"):
382
+ # Skip hidden directories (those starting with .)
383
+ if any(part.startswith(".") for part in file_path.parts):
384
+ continue
385
+
386
+ if file_path.is_file() and file_path.suffix in [".json", ".toml"]:
387
+ try:
388
+ data, load_errors = self.load_data_file(file_path)
389
+ if data and "schema" in data and data["schema"] == "seller_v1":
390
+ seller_files.append(file_path.relative_to(self.data_dir))
391
+ except Exception:
392
+ # Skip files that can't be loaded (they'll be caught by other validation)
393
+ continue
394
+
395
+ # Check count
396
+ if len(seller_files) == 0:
397
+ errors.append(
398
+ "No seller file found. Each repository must have exactly one data file using the 'seller_v1' schema."
399
+ )
400
+ elif len(seller_files) > 1:
401
+ errors.append(f"Found {len(seller_files)} seller files, but only one is allowed per repository:")
402
+ for seller_file in seller_files:
403
+ errors.append(f" - {seller_file}")
404
+
405
+ return len(errors) == 0, errors
406
+
407
+ def validate_provider_status(self) -> tuple[bool, list[str]]:
408
+ """
409
+ Validate provider status and warn about services under disabled/draft providers.
410
+
411
+ Returns tuple of (is_valid, warnings) where warnings indicate services
412
+ that will be affected by provider status.
413
+ """
414
+ from unitysvc_services.models.base import ProviderStatusEnum
415
+ from unitysvc_services.models.provider_v1 import ProviderV1
416
+
417
+ warnings: list[str] = []
418
+
419
+ # Find all provider files (skip hidden directories)
420
+ provider_files = [
421
+ f for f in self.data_dir.glob("*/provider.*") if not any(part.startswith(".") for part in f.parts)
422
+ ]
423
+
424
+ for provider_file in provider_files:
425
+ try:
426
+ # Load provider data using existing helper method
427
+ data, load_errors = self.load_data_file(provider_file)
428
+ if load_errors or data is None:
429
+ warnings.append(f"Failed to load provider file {provider_file}: {load_errors}")
430
+ continue
431
+
432
+ # Parse as ProviderV1
433
+ provider = ProviderV1.model_validate(data)
434
+ provider_dir = provider_file.parent
435
+ provider_name = provider.name
436
+
437
+ # Check if provider is not active
438
+ if provider.status != ProviderStatusEnum.active:
439
+ # Find all services under this provider
440
+ services_dir = provider_dir / "services"
441
+ if services_dir.exists():
442
+ service_count = len(list(services_dir.iterdir()))
443
+ if service_count > 0:
444
+ warnings.append(
445
+ f"Provider '{provider_name}' has status '{provider.status}' but has {service_count} "
446
+ f"service(s). All services under this provider will be affected."
447
+ )
448
+
449
+ except Exception as e:
450
+ warnings.append(f"Error checking provider status in {provider_file}: {e}")
451
+
452
+ # Return True (valid) but with warnings
453
+ return True, warnings
454
+
455
+ def validate_seller_status(self) -> tuple[bool, list[str]]:
456
+ """
457
+ Validate seller status and warn if seller is disabled/draft.
458
+
459
+ Returns tuple of (is_valid, warnings) where warnings indicate seller issues.
460
+ """
461
+ from unitysvc_services.models.base import SellerStatusEnum
462
+ from unitysvc_services.models.seller_v1 import SellerV1
463
+
464
+ warnings: list[str] = []
465
+
466
+ # Find all seller files (skip hidden files)
467
+ seller_files = [f for f in self.data_dir.glob("seller.*") if not f.name.startswith(".")]
468
+
469
+ for seller_file in seller_files:
470
+ try:
471
+ # Load seller data using existing helper method
472
+ data, load_errors = self.load_data_file(seller_file)
473
+ if load_errors or data is None:
474
+ warnings.append(f"Failed to load seller file {seller_file}: {load_errors}")
475
+ continue
476
+
477
+ # Parse as SellerV1
478
+ seller = SellerV1.model_validate(data)
479
+ seller_name = seller.name
480
+
481
+ # Check if seller is not active
482
+ if seller.status != SellerStatusEnum.active:
483
+ warnings.append(
484
+ f"Seller '{seller_name}' has status '{seller.status}'. Seller will not be published to backend."
485
+ )
486
+
487
+ except Exception as e:
488
+ warnings.append(f"Error checking seller status in {seller_file}: {e}")
489
+
490
+ # Return True (valid) but with warnings
491
+ return True, warnings
492
+
493
+ def validate_all(self) -> dict[str, tuple[bool, list[str]]]:
494
+ """Validate all files in the data directory."""
495
+ results: dict[str, tuple[bool, list[str]]] = {}
496
+
497
+ if not self.data_dir.exists():
498
+ return results
499
+
500
+ # First, validate seller uniqueness (repository-level validation)
501
+ seller_valid, seller_errors = self.validate_seller_uniqueness()
502
+ if not seller_valid:
503
+ results["_seller_uniqueness"] = (False, seller_errors)
504
+
505
+ # Validate seller status
506
+ seller_status_valid, seller_warnings = self.validate_seller_status()
507
+ if seller_warnings:
508
+ results["_seller_status"] = (True, seller_warnings) # Warnings, not errors
509
+
510
+ # Validate provider status and check for affected services
511
+ provider_status_valid, provider_warnings = self.validate_provider_status()
512
+ if provider_warnings:
513
+ results["_provider_status"] = (
514
+ True,
515
+ provider_warnings,
516
+ ) # Warnings, not errors
517
+
518
+ # Find all data and MD files recursively, skipping hidden directories
519
+ for file_path in self.data_dir.rglob("*"):
520
+ # Skip hidden directories (those starting with .)
521
+ if any(part.startswith(".") for part in file_path.parts):
522
+ continue
523
+
524
+ # Skip schema directory and pyproject.toml (not data files)
525
+ if "schema" in file_path.parts or file_path.name == "pyproject.toml":
526
+ continue
527
+
528
+ # Check if file should be validated
529
+ # Only .j2 files (Jinja2 templates) are validated for Jinja2 syntax
530
+ is_template = file_path.name.endswith(".j2")
531
+ is_data_file = file_path.suffix in [".json", ".toml"]
532
+
533
+ # Skip override files - they don't need schema validation
534
+ # Override files are automatically merged with base files by load_data_file()
535
+ is_override_file = ".override." in file_path.name
536
+
537
+ if file_path.is_file() and (is_data_file or is_template) and not is_override_file:
538
+ relative_path = file_path.relative_to(self.data_dir)
539
+
540
+ if is_data_file:
541
+ is_valid, errors = self.validate_data_file(file_path)
542
+ elif is_template:
543
+ is_valid, errors = self.validate_jinja2_file(file_path)
544
+ else:
545
+ continue
546
+
547
+ results[str(relative_path)] = (is_valid, errors)
548
+
549
+ return results
550
+
551
+ def validate_directory_data(self, directory: Path) -> None:
552
+ """Validate data files in a directory for consistency.
553
+
554
+ Validation rules:
555
+ 1. All service_v1 files in same directory must have unique names
556
+ 2. All listing_v1 files must reference a service name that exists in the same directory
557
+ 3. If service_name is defined in listing_v1, it must match a service in the directory
558
+
559
+ Args:
560
+ directory: Directory containing data files to validate
561
+
562
+ Raises:
563
+ DataValidationError: If validation fails
564
+ """
565
+ # Find all JSON and TOML files in the directory (not recursive)
566
+ data_files: list[Path] = []
567
+ for pattern in ["*.json", "*.toml"]:
568
+ data_files.extend(directory.glob(pattern))
569
+
570
+ # Load all files and categorize by schema
571
+ services: dict[str, Path] = {} # name -> file_path
572
+ listings: list[tuple[Path, dict[str, Any]]] = [] # list of (file_path, data)
573
+
574
+ for file_path in data_files:
575
+ try:
576
+ data, load_errors = self.load_data_file(file_path)
577
+ if load_errors or data is None:
578
+ continue
579
+
580
+ schema = data.get("schema")
581
+
582
+ if schema == "service_v1":
583
+ service_name = data.get("name")
584
+ if not service_name:
585
+ raise DataValidationError(f"Service file {file_path} missing 'name' field")
586
+
587
+ # Check for duplicate service names in same directory
588
+ if service_name in services:
589
+ raise DataValidationError(
590
+ f"Duplicate service name '{service_name}' found in directory {directory}:\n"
591
+ f" - {services[service_name]}\n"
592
+ f" - {file_path}"
593
+ )
594
+
595
+ services[service_name] = file_path
596
+
597
+ elif schema == "listing_v1":
598
+ listings.append((file_path, data))
599
+
600
+ except Exception as e:
601
+ # Skip files that can't be loaded or don't have schema
602
+ if isinstance(e, DataValidationError):
603
+ raise
604
+ continue
605
+
606
+ # Validate listings reference valid services
607
+ for listing_file, listing_data in listings:
608
+ service_name = listing_data.get("service_name")
609
+
610
+ if service_name:
611
+ # If service_name is explicitly defined, it must match a service in the directory
612
+ if service_name not in services:
613
+ available_services = ", ".join(services.keys()) if services else "none"
614
+ raise DataValidationError(
615
+ f"Listing file {listing_file} references service_name '{service_name}' "
616
+ f"which does not exist in the same directory.\n"
617
+ f"Available services: {available_services}"
618
+ )
619
+ else:
620
+ # If service_name not defined, there should be exactly one service in the directory
621
+ if len(services) == 0:
622
+ raise DataValidationError(
623
+ f"Listing file {listing_file} does not specify 'service_name' "
624
+ f"and no service files found in the same directory."
625
+ )
626
+ elif len(services) > 1:
627
+ available_services = ", ".join(services.keys())
628
+ raise DataValidationError(
629
+ f"Listing file {listing_file} does not specify 'service_name' "
630
+ f"but multiple services exist in the same directory: {available_services}. "
631
+ f"Please add 'service_name' field to the listing to specify which service it belongs to."
632
+ )
633
+
634
+ def validate_all_service_directories(self, data_dir: Path) -> list[str]:
635
+ """
636
+ Validate all service directories in a directory tree.
637
+
638
+ Returns a list of validation error messages (empty if all valid).
639
+ """
640
+ errors = []
641
+
642
+ # Find all directories containing service or listing files
643
+ directories_to_validate = set()
644
+
645
+ for pattern in ["*.json", "*.toml"]:
646
+ for file_path in data_dir.rglob(pattern):
647
+ # Skip hidden directories (those starting with .)
648
+ if any(part.startswith(".") for part in file_path.parts):
649
+ continue
650
+
651
+ try:
652
+ data, load_errors = self.load_data_file(file_path)
653
+ if load_errors or data is None:
654
+ continue
655
+
656
+ schema = data.get("schema")
657
+ if schema in ["service_v1", "listing_v1"]:
658
+ directories_to_validate.add(file_path.parent)
659
+ except Exception:
660
+ continue
661
+
662
+ # Validate each directory
663
+ for directory in sorted(directories_to_validate):
664
+ try:
665
+ self.validate_directory_data(directory)
666
+ except DataValidationError as e:
667
+ errors.append(str(e))
668
+
669
+ return errors
670
+
671
+
672
+ # CLI command
673
+ app = typer.Typer(help="Validate data files")
674
+ console = Console()
675
+
676
+
677
+ @app.command()
678
+ def validate(
679
+ data_dir: Path | None = typer.Argument(
680
+ None,
681
+ help="Directory containing data files to validate (default: current directory)",
682
+ ),
683
+ ):
684
+ """
685
+ Validate data consistency in service and listing files.
686
+
687
+ Checks:
688
+ 1. Service names are unique within each directory
689
+ 2. Listing files reference valid service names
690
+ 3. Multiple services in a directory require explicit service_name in listings
691
+ """
692
+ # Determine data directory
693
+ if data_dir is None:
694
+ data_dir = Path.cwd()
695
+
696
+ if not data_dir.exists():
697
+ console.print(f"[red]✗[/red] Data directory not found: {data_dir}")
698
+ raise typer.Exit(1)
699
+
700
+ console.print(f"[cyan]Validating data files in:[/cyan] {data_dir}")
701
+ console.print()
702
+
703
+ # Get schema directory from installed package
704
+ schema_dir = Path(unitysvc_services.__file__).parent / "schema"
705
+
706
+ # Create validator and run validation
707
+ validator = DataValidator(data_dir, schema_dir)
708
+
709
+ # Run comprehensive validation (schema, file references, etc.)
710
+ all_results = validator.validate_all()
711
+ validation_errors = []
712
+
713
+ # Collect all errors from validate_all()
714
+ for file_path, (is_valid, errors) in all_results.items():
715
+ if not is_valid and errors:
716
+ for error in errors:
717
+ validation_errors.append(f"{file_path}: {error}")
718
+
719
+ # Also run service directory validation (service/listing relationships)
720
+ directory_errors = validator.validate_all_service_directories(data_dir)
721
+ validation_errors.extend(directory_errors)
722
+
723
+ if validation_errors:
724
+ console.print(f"[red]✗ Validation failed with {len(validation_errors)} error(s):[/red]")
725
+ console.print()
726
+ for i, error in enumerate(validation_errors, 1):
727
+ console.print(f"[red]{i}.[/red] {error}")
728
+ console.print()
729
+ raise typer.Exit(1)
730
+ else:
731
+ console.print("[green]✓ All data files are valid![/green]")