verifily 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. verifily-1.2.0/LICENSE +76 -0
  2. verifily-1.2.0/MANIFEST.in +2 -0
  3. verifily-1.2.0/PKG-INFO +222 -0
  4. verifily-1.2.0/README.md +155 -0
  5. verifily-1.2.0/pyproject.toml +97 -0
  6. verifily-1.2.0/setup.cfg +4 -0
  7. verifily-1.2.0/setup.py +36 -0
  8. verifily-1.2.0/verifily.egg-info/PKG-INFO +222 -0
  9. verifily-1.2.0/verifily.egg-info/SOURCES.txt +203 -0
  10. verifily-1.2.0/verifily.egg-info/dependency_links.txt +1 -0
  11. verifily-1.2.0/verifily.egg-info/entry_points.txt +2 -0
  12. verifily-1.2.0/verifily.egg-info/requires.txt +52 -0
  13. verifily-1.2.0/verifily.egg-info/top_level.txt +2 -0
  14. verifily-1.2.0/verifily_cli_v1/__init__.py +3 -0
  15. verifily-1.2.0/verifily_cli_v1/__main__.py +11 -0
  16. verifily-1.2.0/verifily_cli_v1/cli.py +4183 -0
  17. verifily-1.2.0/verifily_cli_v1/commands/__init__.py +0 -0
  18. verifily-1.2.0/verifily_cli_v1/commands/badge.py +72 -0
  19. verifily-1.2.0/verifily_cli_v1/commands/bundle.py +123 -0
  20. verifily-1.2.0/verifily_cli_v1/commands/ci_init.py +103 -0
  21. verifily-1.2.0/verifily_cli_v1/commands/compare.py +221 -0
  22. verifily-1.2.0/verifily_cli_v1/commands/contamination.py +299 -0
  23. verifily-1.2.0/verifily_cli_v1/commands/contract_check.py +234 -0
  24. verifily-1.2.0/verifily_cli_v1/commands/doctor.py +106 -0
  25. verifily-1.2.0/verifily_cli_v1/commands/eval.py +130 -0
  26. verifily-1.2.0/verifily_cli_v1/commands/fingerprint_cmd.py +100 -0
  27. verifily-1.2.0/verifily_cli_v1/commands/history.py +182 -0
  28. verifily-1.2.0/verifily_cli_v1/commands/ingest.py +581 -0
  29. verifily-1.2.0/verifily_cli_v1/commands/init.py +123 -0
  30. verifily-1.2.0/verifily_cli_v1/commands/lineage.py +117 -0
  31. verifily-1.2.0/verifily_cli_v1/commands/nl2sql.py +503 -0
  32. verifily-1.2.0/verifily_cli_v1/commands/pipeline.py +669 -0
  33. verifily-1.2.0/verifily_cli_v1/commands/quickstart.py +306 -0
  34. verifily-1.2.0/verifily_cli_v1/commands/registry_cmd.py +377 -0
  35. verifily-1.2.0/verifily_cli_v1/commands/report.py +143 -0
  36. verifily-1.2.0/verifily_cli_v1/commands/reproduce.py +155 -0
  37. verifily-1.2.0/verifily_cli_v1/commands/score.py +214 -0
  38. verifily-1.2.0/verifily_cli_v1/commands/train.py +138 -0
  39. verifily-1.2.0/verifily_cli_v1/commands/transform.py +355 -0
  40. verifily-1.2.0/verifily_cli_v1/core/__init__.py +0 -0
  41. verifily-1.2.0/verifily_cli_v1/core/api/__init__.py +1 -0
  42. verifily-1.2.0/verifily_cli_v1/core/api/auth.py +645 -0
  43. verifily-1.2.0/verifily_cli_v1/core/api/auth_registry.py +299 -0
  44. verifily-1.2.0/verifily_cli_v1/core/api/billing_enforce.py +132 -0
  45. verifily-1.2.0/verifily_cli_v1/core/api/budget_middleware.py +208 -0
  46. verifily-1.2.0/verifily_cli_v1/core/api/errors.py +96 -0
  47. verifily-1.2.0/verifily_cli_v1/core/api/identity.py +189 -0
  48. verifily-1.2.0/verifily_cli_v1/core/api/jobs.py +387 -0
  49. verifily-1.2.0/verifily_cli_v1/core/api/metrics.py +330 -0
  50. verifily-1.2.0/verifily_cli_v1/core/api/middleware.py +99 -0
  51. verifily-1.2.0/verifily_cli_v1/core/api/models.py +776 -0
  52. verifily-1.2.0/verifily_cli_v1/core/api/monitor_store.py +324 -0
  53. verifily-1.2.0/verifily_cli_v1/core/api/notifier.py +442 -0
  54. verifily-1.2.0/verifily_cli_v1/core/api/org_store.py +453 -0
  55. verifily-1.2.0/verifily_cli_v1/core/api/orgs.py +256 -0
  56. verifily-1.2.0/verifily_cli_v1/core/api/quotas.py +113 -0
  57. verifily-1.2.0/verifily_cli_v1/core/api/rate_limit.py +98 -0
  58. verifily-1.2.0/verifily_cli_v1/core/api/rbac.py +275 -0
  59. verifily-1.2.0/verifily_cli_v1/core/api/retrain.py +305 -0
  60. verifily-1.2.0/verifily_cli_v1/core/api/runners.py +295 -0
  61. verifily-1.2.0/verifily_cli_v1/core/api/server.py +2382 -0
  62. verifily-1.2.0/verifily_cli_v1/core/api/settings.py +391 -0
  63. verifily-1.2.0/verifily_cli_v1/core/api/startup_checks.py +266 -0
  64. verifily-1.2.0/verifily_cli_v1/core/api/usage_store.py +322 -0
  65. verifily-1.2.0/verifily_cli_v1/core/api/workspace.py +293 -0
  66. verifily-1.2.0/verifily_cli_v1/core/audit.py +171 -0
  67. verifily-1.2.0/verifily_cli_v1/core/backup_restore.py +308 -0
  68. verifily-1.2.0/verifily_cli_v1/core/benchmark.py +455 -0
  69. verifily-1.2.0/verifily_cli_v1/core/billing/__init__.py +5 -0
  70. verifily-1.2.0/verifily_cli_v1/core/billing/enforcement.py +78 -0
  71. verifily-1.2.0/verifily_cli_v1/core/billing/export.py +104 -0
  72. verifily-1.2.0/verifily_cli_v1/core/billing/invoice.py +137 -0
  73. verifily-1.2.0/verifily_cli_v1/core/billing/metering.py +100 -0
  74. verifily-1.2.0/verifily_cli_v1/core/billing/models.py +94 -0
  75. verifily-1.2.0/verifily_cli_v1/core/billing/periods.py +41 -0
  76. verifily-1.2.0/verifily_cli_v1/core/billing/pricing.py +120 -0
  77. verifily-1.2.0/verifily_cli_v1/core/billing/store.py +228 -0
  78. verifily-1.2.0/verifily_cli_v1/core/billing/stripe.py +191 -0
  79. verifily-1.2.0/verifily_cli_v1/core/billing/subscriptions.py +175 -0
  80. verifily-1.2.0/verifily_cli_v1/core/budget.py +398 -0
  81. verifily-1.2.0/verifily_cli_v1/core/classify.py +415 -0
  82. verifily-1.2.0/verifily_cli_v1/core/config_overrides.py +121 -0
  83. verifily-1.2.0/verifily_cli_v1/core/deploy_config.py +332 -0
  84. verifily-1.2.0/verifily_cli_v1/core/drift.py +463 -0
  85. verifily-1.2.0/verifily_cli_v1/core/env.py +279 -0
  86. verifily-1.2.0/verifily_cli_v1/core/fingerprint.py +327 -0
  87. verifily-1.2.0/verifily_cli_v1/core/flatten.py +176 -0
  88. verifily-1.2.0/verifily_cli_v1/core/hashing.py +83 -0
  89. verifily-1.2.0/verifily_cli_v1/core/integrity.py +402 -0
  90. verifily-1.2.0/verifily_cli_v1/core/io.py +109 -0
  91. verifily-1.2.0/verifily_cli_v1/core/licensing.py +463 -0
  92. verifily-1.2.0/verifily_cli_v1/core/lineage_graph.py +591 -0
  93. verifily-1.2.0/verifily_cli_v1/core/manifest.py +128 -0
  94. verifily-1.2.0/verifily_cli_v1/core/minhash_lsh.py +147 -0
  95. verifily-1.2.0/verifily_cli_v1/core/nl2sql.py +163 -0
  96. verifily-1.2.0/verifily_cli_v1/core/pii.py +206 -0
  97. verifily-1.2.0/verifily_cli_v1/core/readers.py +323 -0
  98. verifily-1.2.0/verifily_cli_v1/core/readiness.py +525 -0
  99. verifily-1.2.0/verifily_cli_v1/core/redaction_audit.py +345 -0
  100. verifily-1.2.0/verifily_cli_v1/core/registry.py +578 -0
  101. verifily-1.2.0/verifily_cli_v1/core/remote.py +125 -0
  102. verifily-1.2.0/verifily_cli_v1/core/runtime_paths.py +213 -0
  103. verifily-1.2.0/verifily_cli_v1/core/schemas.py +233 -0
  104. verifily-1.2.0/verifily_cli_v1/core/scoring.py +657 -0
  105. verifily-1.2.0/verifily_cli_v1/core/secrets.py +280 -0
  106. verifily-1.2.0/verifily_cli_v1/core/security/__init__.py +1 -0
  107. verifily-1.2.0/verifily_cli_v1/core/security/audit_store.py +63 -0
  108. verifily-1.2.0/verifily_cli_v1/core/security/policies.py +85 -0
  109. verifily-1.2.0/verifily_cli_v1/core/security/rbac.py +77 -0
  110. verifily-1.2.0/verifily_cli_v1/core/security/tokens.py +104 -0
  111. verifily-1.2.0/verifily_cli_v1/core/signed_decisions.py +338 -0
  112. verifily-1.2.0/verifily_cli_v1/core/subprocess.py +68 -0
  113. verifily-1.2.0/verifily_cli_v1/core/teams/__init__.py +5 -0
  114. verifily-1.2.0/verifily_cli_v1/core/teams/models.py +104 -0
  115. verifily-1.2.0/verifily_cli_v1/core/teams/scopes.py +54 -0
  116. verifily-1.2.0/verifily_cli_v1/core/teams/store.py +238 -0
  117. verifily-1.2.0/verifily_cli_v1/core/usage.py +133 -0
  118. verifily-1.2.0/verifily_cli_v1/core/workspaces/__init__.py +1 -0
  119. verifily-1.2.0/verifily_cli_v1/core/workspaces/models.py +62 -0
  120. verifily-1.2.0/verifily_cli_v1/core/workspaces/store.py +250 -0
  121. verifily-1.2.0/verifily_cli_v1/integrations/__init__.py +1 -0
  122. verifily-1.2.0/verifily_cli_v1/integrations/huggingface.py +160 -0
  123. verifily-1.2.0/verifily_cli_v1/integrations/mlflow.py +118 -0
  124. verifily-1.2.0/verifily_cli_v1/integrations/wandb.py +124 -0
  125. verifily-1.2.0/verifily_cli_v1/tests/__init__.py +0 -0
  126. verifily-1.2.0/verifily_cli_v1/tests/conftest.py +27 -0
  127. verifily-1.2.0/verifily_cli_v1/tests/test_adoption_hooks.py +277 -0
  128. verifily-1.2.0/verifily_cli_v1/tests/test_api_auth.py +174 -0
  129. verifily-1.2.0/verifily_cli_v1/tests/test_api_hardening.py +371 -0
  130. verifily-1.2.0/verifily_cli_v1/tests/test_api_jobs.py +368 -0
  131. verifily-1.2.0/verifily_cli_v1/tests/test_api_jobs_classify.py +260 -0
  132. verifily-1.2.0/verifily_cli_v1/tests/test_api_jobs_retrain.py +283 -0
  133. verifily-1.2.0/verifily_cli_v1/tests/test_api_monitor.py +208 -0
  134. verifily-1.2.0/verifily_cli_v1/tests/test_api_org_mode.py +346 -0
  135. verifily-1.2.0/verifily_cli_v1/tests/test_api_production.py +188 -0
  136. verifily-1.2.0/verifily_cli_v1/tests/test_api_server.py +287 -0
  137. verifily-1.2.0/verifily_cli_v1/tests/test_audit_usage.py +378 -0
  138. verifily-1.2.0/verifily_cli_v1/tests/test_benchmark.py +277 -0
  139. verifily-1.2.0/verifily_cli_v1/tests/test_billing.py +593 -0
  140. verifily-1.2.0/verifily_cli_v1/tests/test_billing_metering.py +431 -0
  141. verifily-1.2.0/verifily_cli_v1/tests/test_budget.py +473 -0
  142. verifily-1.2.0/verifily_cli_v1/tests/test_ci_action_templates.py +137 -0
  143. verifily-1.2.0/verifily_cli_v1/tests/test_ci_init.py +66 -0
  144. verifily-1.2.0/verifily_cli_v1/tests/test_classify_exports.py +286 -0
  145. verifily-1.2.0/verifily_cli_v1/tests/test_cli_error_messages.py +38 -0
  146. verifily-1.2.0/verifily_cli_v1/tests/test_customer_drill.py +266 -0
  147. verifily-1.2.0/verifily_cli_v1/tests/test_decision_gate.py +295 -0
  148. verifily-1.2.0/verifily_cli_v1/tests/test_deploy_enterprise.py +426 -0
  149. verifily-1.2.0/verifily_cli_v1/tests/test_drift.py +370 -0
  150. verifily-1.2.0/verifily_cli_v1/tests/test_enterprise_audit_export.py +176 -0
  151. verifily-1.2.0/verifily_cli_v1/tests/test_enterprise_policies.py +105 -0
  152. verifily-1.2.0/verifily_cli_v1/tests/test_enterprise_rbac.py +91 -0
  153. verifily-1.2.0/verifily_cli_v1/tests/test_enterprise_tokens.py +109 -0
  154. verifily-1.2.0/verifily_cli_v1/tests/test_enterprise_trust.py +494 -0
  155. verifily-1.2.0/verifily_cli_v1/tests/test_fingerprint.py +214 -0
  156. verifily-1.2.0/verifily_cli_v1/tests/test_hosted_readiness.py +120 -0
  157. verifily-1.2.0/verifily_cli_v1/tests/test_ingest.py +363 -0
  158. verifily-1.2.0/verifily_cli_v1/tests/test_integrations.py +330 -0
  159. verifily-1.2.0/verifily_cli_v1/tests/test_integrity.py +225 -0
  160. verifily-1.2.0/verifily_cli_v1/tests/test_licensing.py +410 -0
  161. verifily-1.2.0/verifily_cli_v1/tests/test_lineage_graph.py +449 -0
  162. verifily-1.2.0/verifily_cli_v1/tests/test_metrics_percentiles.py +229 -0
  163. verifily-1.2.0/verifily_cli_v1/tests/test_nl2sql.py +353 -0
  164. verifily-1.2.0/verifily_cli_v1/tests/test_notifier.py +412 -0
  165. verifily-1.2.0/verifily_cli_v1/tests/test_orgs_projects.py +262 -0
  166. verifily-1.2.0/verifily_cli_v1/tests/test_perf_guard.py +148 -0
  167. verifily-1.2.0/verifily_cli_v1/tests/test_quickstart.py +91 -0
  168. verifily-1.2.0/verifily_cli_v1/tests/test_readiness.py +384 -0
  169. verifily-1.2.0/verifily_cli_v1/tests/test_real_conditions_fixtures.py +339 -0
  170. verifily-1.2.0/verifily_cli_v1/tests/test_redaction_audit.py +250 -0
  171. verifily-1.2.0/verifily_cli_v1/tests/test_registry.py +487 -0
  172. verifily-1.2.0/verifily_cli_v1/tests/test_scoring.py +482 -0
  173. verifily-1.2.0/verifily_cli_v1/tests/test_secrets_hardening.py +254 -0
  174. verifily-1.2.0/verifily_cli_v1/tests/test_startup_checks.py +333 -0
  175. verifily-1.2.0/verifily_cli_v1/tests/test_stripe_stub.py +373 -0
  176. verifily-1.2.0/verifily_cli_v1/tests/test_teams_rbac.py +550 -0
  177. verifily-1.2.0/verifily_cli_v1/tests/test_trust_signals.py +142 -0
  178. verifily-1.2.0/verifily_cli_v1/tests/test_usage_aggregation.py +269 -0
  179. verifily-1.2.0/verifily_cli_v1/tests/test_v11_widening.py +409 -0
  180. verifily-1.2.0/verifily_cli_v1/tests/test_v1_package.py +254 -0
  181. verifily-1.2.0/verifily_cli_v1/tests/test_verify_run_cli.py +149 -0
  182. verifily-1.2.0/verifily_cli_v1/tests/test_version_command.py +51 -0
  183. verifily-1.2.0/verifily_cli_v1/tests/test_workspace_security.py +300 -0
  184. verifily-1.2.0/verifily_cli_v1/tests/test_workspaces.py +408 -0
  185. verifily-1.2.0/verifily_sdk/tests/__init__.py +0 -0
  186. verifily-1.2.0/verifily_sdk/tests/conftest.py +16 -0
  187. verifily-1.2.0/verifily_sdk/tests/test_sdk_async.py +156 -0
  188. verifily-1.2.0/verifily_sdk/tests/test_sdk_client.py +170 -0
  189. verifily-1.2.0/verifily_sdk/tests/test_sdk_effective_config.py +58 -0
  190. verifily-1.2.0/verifily_sdk/tests/test_sdk_jobs.py +133 -0
  191. verifily-1.2.0/verifily_sdk/tests/test_sdk_jobs_classify.py +73 -0
  192. verifily-1.2.0/verifily_sdk/tests/test_sdk_jobs_retrain.py +96 -0
  193. verifily-1.2.0/verifily_sdk/tests/test_sdk_monitor.py +131 -0
  194. verifily-1.2.0/verifily_sdk/tests/test_sdk_org_mode.py +166 -0
  195. verifily-1.2.0/verifily_sdk/tests/test_sdk_orgs_projects.py +97 -0
  196. verifily-1.2.0/verifily_sdk/tests/test_sdk_retries.py +159 -0
  197. verifily-1.2.0/verifily_sdk/tests/test_sdk_stripe.py +98 -0
  198. verifily-1.2.0/verifily_sdk/tests/test_sdk_usage.py +172 -0
  199. verifily-1.2.0/verifily_sdk/verifily_sdk/__init__.py +29 -0
  200. verifily-1.2.0/verifily_sdk/verifily_sdk/async_client.py +450 -0
  201. verifily-1.2.0/verifily_sdk/verifily_sdk/auth.py +18 -0
  202. verifily-1.2.0/verifily_sdk/verifily_sdk/client.py +1133 -0
  203. verifily-1.2.0/verifily_sdk/verifily_sdk/errors.py +57 -0
  204. verifily-1.2.0/verifily_sdk/verifily_sdk/models.py +504 -0
  205. verifily-1.2.0/verifily_sdk/verifily_sdk/utils.py +43 -0
verifily-1.2.0/LICENSE ADDED
@@ -0,0 +1,76 @@
1
+ Business Source License 1.1
2
+
3
+ Parameters
4
+
5
+ Licensor: Verifily / Arsenis Papachristos
6
+ Licensed Work: Verifily 1.2.0
7
+ The Licensed Work is (c) 2026 Verifily.
8
+ Additional Use Grant: You may use the Licensed Work for any purpose
9
+ except offering it as a commercial data quality
10
+ or ML pipeline gating service to third parties.
11
+ Change Date: 2030-02-16
12
+ Change License: Apache License, Version 2.0
13
+
14
+ Terms
15
+
16
+ The Licensor hereby grants you the right to copy, modify, create derivative
17
+ works, redistribute, and make non-production use of the Licensed Work. The
18
+ Licensor may make an Additional Use Grant, above, permitting limited
19
+ production use.
20
+
21
+ Effective on the Change Date, or the fourth anniversary of the first publicly
22
+ available distribution of a specific version of the Licensed Work under this
23
+ License, whichever comes first, the Licensor hereby grants you rights under
24
+ the terms of the Change License, and the rights granted in the paragraph
25
+ above terminate.
26
+
27
+ If your use of the Licensed Work does not comply with the requirements
28
+ currently in effect as described in this License, you must purchase a
29
+ commercial license from the Licensor, its affiliated entities, or authorized
30
+ resellers, or you must refrain from using the Licensed Work.
31
+
32
+ All copies of the original and modified Licensed Work, and derivative works
33
+ of the Licensed Work, are subject to this License. This License applies
34
+ separately for each version of the Licensed Work and the Change Date may vary
35
+ for each version of the Licensed Work released by Licensor.
36
+
37
+ You must conspicuously display this License on each original or modified copy
38
+ of the Licensed Work. If you receive the Licensed Work in original or
39
+ modified form from a third party, the terms and conditions set forth in this
40
+ License apply to your use of that work.
41
+
42
+ Any use of the Licensed Work in violation of this License will automatically
43
+ terminate your rights under this License for the current and all other
44
+ versions of the Licensed Work.
45
+
46
+ This License does not grant you any right in any trademark or logo of
47
+ Licensor or its affiliates (provided that you may use a trademark or logo of
48
+ Licensor as expressly required by this License).
49
+
50
+ TO THE EXTENT PERMITTED BY APPLICABLE LAW, THE LICENSED WORK IS PROVIDED ON
51
+ AN "AS IS" BASIS. LICENSOR HEREBY DISCLAIMS ALL WARRANTIES AND CONDITIONS,
52
+ EXPRESS OR IMPLIED, INCLUDING (WITHOUT LIMITATION) WARRANTIES OF
53
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT, AND
54
+ TITLE.
55
+
56
+ MariaDB hereby grants you permission to use this License's text to license
57
+ your works, and to refer to it using the trademark "Business Source License",
58
+ as long as you comply with the Covenants of Licensor below.
59
+
60
+ Covenants of Licensor
61
+
62
+ In consideration of the right to use this License's text and the "Business
63
+ Source License" name and trademark, Licensor covenants to MariaDB, and to all
64
+ other recipients of the licensed work to be provided by Licensor:
65
+
66
+ 1. To specify as the Change License the GPL Version 2.0 or any later version,
67
+ or a license that is compatible with GPL Version 2.0 or a later version,
68
+ where "compatible" means that software provided under the Change License
69
+ can be included in a program with software provided under GPL Version 2.0
70
+ or a later version. Licensor may specify additional Change Licenses without
71
+ limitation.
72
+
73
+ 2. To either: (a) specify an additional grant of rights to use that does not
74
+ impose any additional restriction on the right granted in this License, as
75
+ the Additional Use Grant; or (b) insert the text "None" to specify a
76
+ Change Date. Not to modify this License in any other way.
@@ -0,0 +1,2 @@
1
+ exclude scripts/keygen.py
2
+ prune scripts/.keys
@@ -0,0 +1,222 @@
1
+ Metadata-Version: 2.4
2
+ Name: verifily
3
+ Version: 1.2.0
4
+ Summary: ML data quality gate — ingest, validate, and ship datasets with confidence.
5
+ Author: Verifily Team
6
+ License: BSL-1.1
7
+ Project-URL: Homepage, https://verifily.io
8
+ Project-URL: Documentation, https://verifily.io/docs
9
+ Project-URL: Repository, https://github.com/verifily/verifily
10
+ Keywords: ml,data-quality,dataset,validation,ci,pipeline
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: typer>=0.9.0
24
+ Requires-Dist: rich>=13.0.0
25
+ Requires-Dist: pyyaml>=6.0
26
+ Requires-Dist: cryptography>=41.0.0
27
+ Provides-Extra: api
28
+ Requires-Dist: fastapi>=0.100.0; extra == "api"
29
+ Requires-Dist: uvicorn>=0.23.0; extra == "api"
30
+ Requires-Dist: pydantic>=2.0.0; extra == "api"
31
+ Provides-Extra: sdk
32
+ Requires-Dist: httpx>=0.24.0; extra == "sdk"
33
+ Requires-Dist: pydantic>=2.0.0; extra == "sdk"
34
+ Provides-Extra: arrow
35
+ Requires-Dist: pyarrow>=14.0.0; extra == "arrow"
36
+ Provides-Extra: pii
37
+ Requires-Dist: spacy>=3.5.0; extra == "pii"
38
+ Provides-Extra: huggingface
39
+ Requires-Dist: datasets>=2.14.0; extra == "huggingface"
40
+ Provides-Extra: wandb
41
+ Requires-Dist: wandb>=0.16.0; extra == "wandb"
42
+ Provides-Extra: mlflow
43
+ Requires-Dist: mlflow>=2.8.0; extra == "mlflow"
44
+ Provides-Extra: integrations
45
+ Requires-Dist: datasets>=2.14.0; extra == "integrations"
46
+ Requires-Dist: wandb>=0.16.0; extra == "integrations"
47
+ Requires-Dist: mlflow>=2.8.0; extra == "integrations"
48
+ Provides-Extra: dev
49
+ Requires-Dist: pytest>=7.0; extra == "dev"
50
+ Requires-Dist: fastapi>=0.100.0; extra == "dev"
51
+ Requires-Dist: uvicorn>=0.23.0; extra == "dev"
52
+ Requires-Dist: pydantic>=2.0.0; extra == "dev"
53
+ Requires-Dist: httpx>=0.24.0; extra == "dev"
54
+ Provides-Extra: all
55
+ Requires-Dist: fastapi>=0.100.0; extra == "all"
56
+ Requires-Dist: uvicorn>=0.23.0; extra == "all"
57
+ Requires-Dist: pydantic>=2.0.0; extra == "all"
58
+ Requires-Dist: httpx>=0.24.0; extra == "all"
59
+ Requires-Dist: pytest>=7.0; extra == "all"
60
+ Requires-Dist: pyarrow>=14.0.0; extra == "all"
61
+ Requires-Dist: spacy>=3.5.0; extra == "all"
62
+ Requires-Dist: datasets>=2.14.0; extra == "all"
63
+ Requires-Dist: wandb>=0.16.0; extra == "all"
64
+ Requires-Dist: mlflow>=2.8.0; extra == "all"
65
+ Dynamic: license-file
66
+ Dynamic: requires-python
67
+
68
+ # Verifily
69
+
70
+ ML data quality gate. Ingest, validate, and ship datasets with confidence.
71
+
72
+ Verifily catches contamination, PII leaks, SQL template leakage, contract violations, and metric regressions before they reach production. It runs locally — no network, no GPU, no external services.
73
+
74
+ One command gates your CI pipeline. Exit 0 means ship.
75
+
76
+ ## Install
77
+
78
+ ```bash
79
+ pip install -e .
80
+ ```
81
+
82
+ For integrations (HuggingFace, W&B, MLflow) and API server:
83
+
84
+ ```bash
85
+ pip install -e ".[all]"
86
+ ```
87
+
88
+ ## 60-Second Quick Start
89
+
90
+ ```bash
91
+ # 1. Scaffold a project
92
+ verifily quickstart my_project
93
+
94
+ # 2. Ingest raw data (JSONL, CSV, Parquet, or HuggingFace)
95
+ verifily ingest --in my_project/data/raw/sample.csv \
96
+ --out my_project/data/artifact \
97
+ --schema sft
98
+
99
+ # 3. Run the CI gate
100
+ verifily pipeline --config my_project/verifily.yaml --ci
101
+ # Exit 0 = SHIP, 1 = DONT_SHIP, 2 = INVESTIGATE
102
+ ```
103
+
104
+ Or run the full demo end-to-end:
105
+
106
+ ```bash
107
+ bash scripts/demo_quickstart_ci.sh
108
+ ```
109
+
110
+ ## What Verifily Prevents
111
+
112
+ | Risk | How Verifily catches it |
113
+ |------|------------------------|
114
+ | Train/eval data leakage | Exact-match + Jaccard contamination detection via MinHash LSH |
115
+ | SQL template leakage | Three-tier NL2SQL gate: exact SQL, template fingerprint, question near-dup |
116
+ | PII in training data | Regex-based PII scan with configurable thresholds and redaction |
117
+ | Missing or corrupt artifacts | Run contract validation (hashes, configs, eval results) |
118
+ | Metric regressions | Threshold checks against baselines with delta tracking |
119
+ | Ambiguous ship decisions | Deterministic gate: blockers always block, no silent passes |
120
+ | Dataset drift | Privacy-safe fingerprinting and diff without raw data exposure |
121
+
122
+ ## Supported Schemas
123
+
124
+ 8 canonical dataset types, auto-detected from field names:
125
+
126
+ | Schema | Required fields | Use case |
127
+ |--------|----------------|----------|
128
+ | `sft` | instruction, output | Supervised fine-tuning |
129
+ | `qa` | question, answer | Question answering |
130
+ | `classification` | text, label | Text classification |
131
+ | `chat` | messages | Multi-turn conversations |
132
+ | `summarization` | document, summary | Summarization tasks |
133
+ | `translation` | source, target | Translation pairs |
134
+ | `rm_pairwise` | prompt, chosen, rejected | Reward model training |
135
+ | `nl2sql` | question, sql, schema | Natural language to SQL |
136
+
137
+ ## CLI Commands
138
+
139
+ | Command | Purpose |
140
+ |---------|---------|
141
+ | `verifily quickstart <path>` | Scaffold a working project |
142
+ | `verifily ingest` | Normalize raw data to artifact format (JSONL, CSV, Parquet, hf://) |
143
+ | `verifily pipeline --ci` | Run full quality gate (CI mode) |
144
+ | `verifily report` | Dataset quality report with PII scan |
145
+ | `verifily contamination` | Detect train/eval overlap |
146
+ | `verifily contract-check` | Validate run artifacts |
147
+ | `verifily fingerprint` | Privacy-safe dataset summary |
148
+ | `verifily diff-datasets` | Compare two datasets |
149
+ | `verifily ci-init` | Generate GitHub/GitLab CI config |
150
+ | `verifily serve` | Start API server |
151
+ | `verifily version` | Show version, Python, platform |
152
+
153
+ ### NL2SQL Commands
154
+
155
+ | Command | Purpose |
156
+ |---------|---------|
157
+ | `verifily nl2sql validate` | Validate NL2SQL dataset structure |
158
+ | `verifily nl2sql fingerprint` | SQL normalization + template fingerprinting |
159
+ | `verifily nl2sql split` | Leakage-resistant train/eval splitting |
160
+ | `verifily nl2sql gate` | Three-tier contamination gate for NL2SQL |
161
+
162
+ ## Integrations
163
+
164
+ All opt-in with lazy imports. No hard dependencies.
165
+
166
+ | Integration | What it does |
167
+ |-------------|-------------|
168
+ | **HuggingFace Datasets** | Load datasets via `hf://` URIs |
169
+ | **Weights & Biases** | Log decisions, metrics, and artifacts |
170
+ | **MLflow** | Track runs with model registry integration |
171
+ | **GitHub Actions** | Pre-built action + CI workflow generator |
172
+
173
+ ```bash
174
+ # HuggingFace
175
+ verifily ingest --in "hf://squad" --out datasets/squad --schema qa
176
+
177
+ # W&B + MLflow
178
+ verifily pipeline --config pipeline.yaml --wandb --mlflow
179
+ ```
180
+
181
+ ## CI Exit Codes
182
+
183
+ | Code | Label | Meaning |
184
+ |------|-------|---------|
185
+ | `0` | SHIP | All quality gates passed |
186
+ | `1` | DONT_SHIP | One or more blockers failed |
187
+ | `2` | INVESTIGATE | Risk flags present, no hard blockers |
188
+ | `3` | CONTRACT_FAIL | Run contract invalid |
189
+ | `4` | TOOL_ERROR | Invalid config or unexpected error |
190
+
191
+ ## Documentation
192
+
193
+ - [Product Overview](docs/product-overview.md)
194
+ - [Quick Install](docs/quick_install.md)
195
+ - [3-Minute Quickstart](docs/3_minute_quickstart.md)
196
+ - [Decision Gate](docs/decision_gate.md)
197
+ - [Dataset Fingerprints](docs/fingerprints.md)
198
+ - [CI Init](docs/ci/quick_ci_init.md)
199
+ - [API & Jobs](docs/api_jobs.md)
200
+ - [Monitor](docs/monitor.md)
201
+ - [Versioning & Stability](VERSIONING.md)
202
+ - [Changelog](CHANGELOG.md)
203
+
204
+ ## Versioning
205
+
206
+ Verifily follows [Semantic Versioning](https://semver.org/). See [VERSIONING.md](VERSIONING.md).
207
+
208
+ Current version: `1.2.0`
209
+
210
+ ## Stability Guarantees
211
+
212
+ - **Deterministic outputs** — fixed seed produces identical results across runs
213
+ - **Stable contracts** — `run_contract_v1` schema is frozen within the v1.x line
214
+ - **Stable exit codes** — 0/1/2/3/4 semantics are frozen
215
+ - **Backward compatibility** within MAJOR version — artifacts from any v1.x release are accepted
216
+ - **1,300+ tests** — all deterministic, no network, no GPU
217
+
218
+ ## License
219
+
220
+ Business Source License 1.1 (BSL-1.1). See [LICENSE](LICENSE) for details.
221
+
222
+ You may use Verifily for any purpose except offering it as a commercial data quality or ML pipeline gating service to third parties. On 2030-02-16, the license converts to Apache 2.0.
@@ -0,0 +1,155 @@
1
+ # Verifily
2
+
3
+ ML data quality gate. Ingest, validate, and ship datasets with confidence.
4
+
5
+ Verifily catches contamination, PII leaks, SQL template leakage, contract violations, and metric regressions before they reach production. It runs locally — no network, no GPU, no external services.
6
+
7
+ One command gates your CI pipeline. Exit 0 means ship.
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install -e .
13
+ ```
14
+
15
+ For integrations (HuggingFace, W&B, MLflow) and API server:
16
+
17
+ ```bash
18
+ pip install -e ".[all]"
19
+ ```
20
+
21
+ ## 60-Second Quick Start
22
+
23
+ ```bash
24
+ # 1. Scaffold a project
25
+ verifily quickstart my_project
26
+
27
+ # 2. Ingest raw data (JSONL, CSV, Parquet, or HuggingFace)
28
+ verifily ingest --in my_project/data/raw/sample.csv \
29
+ --out my_project/data/artifact \
30
+ --schema sft
31
+
32
+ # 3. Run the CI gate
33
+ verifily pipeline --config my_project/verifily.yaml --ci
34
+ # Exit 0 = SHIP, 1 = DONT_SHIP, 2 = INVESTIGATE
35
+ ```
36
+
37
+ Or run the full demo end-to-end:
38
+
39
+ ```bash
40
+ bash scripts/demo_quickstart_ci.sh
41
+ ```
42
+
43
+ ## What Verifily Prevents
44
+
45
+ | Risk | How Verifily catches it |
46
+ |------|------------------------|
47
+ | Train/eval data leakage | Exact-match + Jaccard contamination detection via MinHash LSH |
48
+ | SQL template leakage | Three-tier NL2SQL gate: exact SQL, template fingerprint, question near-dup |
49
+ | PII in training data | Regex-based PII scan with configurable thresholds and redaction |
50
+ | Missing or corrupt artifacts | Run contract validation (hashes, configs, eval results) |
51
+ | Metric regressions | Threshold checks against baselines with delta tracking |
52
+ | Ambiguous ship decisions | Deterministic gate: blockers always block, no silent passes |
53
+ | Dataset drift | Privacy-safe fingerprinting and diff without raw data exposure |
54
+
55
+ ## Supported Schemas
56
+
57
+ 8 canonical dataset types, auto-detected from field names:
58
+
59
+ | Schema | Required fields | Use case |
60
+ |--------|----------------|----------|
61
+ | `sft` | instruction, output | Supervised fine-tuning |
62
+ | `qa` | question, answer | Question answering |
63
+ | `classification` | text, label | Text classification |
64
+ | `chat` | messages | Multi-turn conversations |
65
+ | `summarization` | document, summary | Summarization tasks |
66
+ | `translation` | source, target | Translation pairs |
67
+ | `rm_pairwise` | prompt, chosen, rejected | Reward model training |
68
+ | `nl2sql` | question, sql, schema | Natural language to SQL |
69
+
70
+ ## CLI Commands
71
+
72
+ | Command | Purpose |
73
+ |---------|---------|
74
+ | `verifily quickstart <path>` | Scaffold a working project |
75
+ | `verifily ingest` | Normalize raw data to artifact format (JSONL, CSV, Parquet, hf://) |
76
+ | `verifily pipeline --ci` | Run full quality gate (CI mode) |
77
+ | `verifily report` | Dataset quality report with PII scan |
78
+ | `verifily contamination` | Detect train/eval overlap |
79
+ | `verifily contract-check` | Validate run artifacts |
80
+ | `verifily fingerprint` | Privacy-safe dataset summary |
81
+ | `verifily diff-datasets` | Compare two datasets |
82
+ | `verifily ci-init` | Generate GitHub/GitLab CI config |
83
+ | `verifily serve` | Start API server |
84
+ | `verifily version` | Show version, Python, platform |
85
+
86
+ ### NL2SQL Commands
87
+
88
+ | Command | Purpose |
89
+ |---------|---------|
90
+ | `verifily nl2sql validate` | Validate NL2SQL dataset structure |
91
+ | `verifily nl2sql fingerprint` | SQL normalization + template fingerprinting |
92
+ | `verifily nl2sql split` | Leakage-resistant train/eval splitting |
93
+ | `verifily nl2sql gate` | Three-tier contamination gate for NL2SQL |
94
+
95
+ ## Integrations
96
+
97
+ All opt-in with lazy imports. No hard dependencies.
98
+
99
+ | Integration | What it does |
100
+ |-------------|-------------|
101
+ | **HuggingFace Datasets** | Load datasets via `hf://` URIs |
102
+ | **Weights & Biases** | Log decisions, metrics, and artifacts |
103
+ | **MLflow** | Track runs with model registry integration |
104
+ | **GitHub Actions** | Pre-built action + CI workflow generator |
105
+
106
+ ```bash
107
+ # HuggingFace
108
+ verifily ingest --in "hf://squad" --out datasets/squad --schema qa
109
+
110
+ # W&B + MLflow
111
+ verifily pipeline --config pipeline.yaml --wandb --mlflow
112
+ ```
113
+
114
+ ## CI Exit Codes
115
+
116
+ | Code | Label | Meaning |
117
+ |------|-------|---------|
118
+ | `0` | SHIP | All quality gates passed |
119
+ | `1` | DONT_SHIP | One or more blockers failed |
120
+ | `2` | INVESTIGATE | Risk flags present, no hard blockers |
121
+ | `3` | CONTRACT_FAIL | Run contract invalid |
122
+ | `4` | TOOL_ERROR | Invalid config or unexpected error |
123
+
124
+ ## Documentation
125
+
126
+ - [Product Overview](docs/product-overview.md)
127
+ - [Quick Install](docs/quick_install.md)
128
+ - [3-Minute Quickstart](docs/3_minute_quickstart.md)
129
+ - [Decision Gate](docs/decision_gate.md)
130
+ - [Dataset Fingerprints](docs/fingerprints.md)
131
+ - [CI Init](docs/ci/quick_ci_init.md)
132
+ - [API & Jobs](docs/api_jobs.md)
133
+ - [Monitor](docs/monitor.md)
134
+ - [Versioning & Stability](VERSIONING.md)
135
+ - [Changelog](CHANGELOG.md)
136
+
137
+ ## Versioning
138
+
139
+ Verifily follows [Semantic Versioning](https://semver.org/). See [VERSIONING.md](VERSIONING.md).
140
+
141
+ Current version: `1.2.0`
142
+
143
+ ## Stability Guarantees
144
+
145
+ - **Deterministic outputs** — fixed seed produces identical results across runs
146
+ - **Stable contracts** — `run_contract_v1` schema is frozen within the v1.x line
147
+ - **Stable exit codes** — 0/1/2/3/4 semantics are frozen
148
+ - **Backward compatibility** within MAJOR version — artifacts from any v1.x release are accepted
149
+ - **1,300+ tests** — all deterministic, no network, no GPU
150
+
151
+ ## License
152
+
153
+ Business Source License 1.1 (BSL-1.1). See [LICENSE](LICENSE) for details.
154
+
155
+ You may use Verifily for any purpose except offering it as a commercial data quality or ML pipeline gating service to third parties. On 2030-02-16, the license converts to Apache 2.0.
@@ -0,0 +1,97 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "verifily"
7
+ version = "1.2.0"
8
+ description = "ML data quality gate — ingest, validate, and ship datasets with confidence."
9
+ readme = "README.md"
10
+ license = {text = "BSL-1.1"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "Verifily Team"},
14
+ ]
15
+ keywords = ["ml", "data-quality", "dataset", "validation", "ci", "pipeline"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Developers",
19
+ "Intended Audience :: Science/Research",
20
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
21
+ "Programming Language :: Python :: 3",
22
+ "Programming Language :: Python :: 3.9",
23
+ "Programming Language :: Python :: 3.10",
24
+ "Programming Language :: Python :: 3.11",
25
+ "Programming Language :: Python :: 3.12",
26
+ ]
27
+
28
+ dependencies = [
29
+ "typer>=0.9.0",
30
+ "rich>=13.0.0",
31
+ "pyyaml>=6.0",
32
+ "cryptography>=41.0.0",
33
+ ]
34
+
35
+ [project.urls]
36
+ Homepage = "https://verifily.io"
37
+ Documentation = "https://verifily.io/docs"
38
+ Repository = "https://github.com/verifily/verifily"
39
+
40
+ [project.optional-dependencies]
41
+ api = [
42
+ "fastapi>=0.100.0",
43
+ "uvicorn>=0.23.0",
44
+ "pydantic>=2.0.0",
45
+ ]
46
+ sdk = [
47
+ "httpx>=0.24.0",
48
+ "pydantic>=2.0.0",
49
+ ]
50
+ arrow = [
51
+ "pyarrow>=14.0.0",
52
+ ]
53
+ pii = [
54
+ "spacy>=3.5.0",
55
+ ]
56
+ huggingface = [
57
+ "datasets>=2.14.0",
58
+ ]
59
+ wandb = [
60
+ "wandb>=0.16.0",
61
+ ]
62
+ mlflow = [
63
+ "mlflow>=2.8.0",
64
+ ]
65
+ integrations = [
66
+ "datasets>=2.14.0",
67
+ "wandb>=0.16.0",
68
+ "mlflow>=2.8.0",
69
+ ]
70
+ dev = [
71
+ "pytest>=7.0",
72
+ "fastapi>=0.100.0",
73
+ "uvicorn>=0.23.0",
74
+ "pydantic>=2.0.0",
75
+ "httpx>=0.24.0",
76
+ ]
77
+ all = [
78
+ "fastapi>=0.100.0",
79
+ "uvicorn>=0.23.0",
80
+ "pydantic>=2.0.0",
81
+ "httpx>=0.24.0",
82
+ "pytest>=7.0",
83
+ "pyarrow>=14.0.0",
84
+ "spacy>=3.5.0",
85
+ "datasets>=2.14.0",
86
+ "wandb>=0.16.0",
87
+ "mlflow>=2.8.0",
88
+ ]
89
+
90
+ [project.scripts]
91
+ verifily = "verifily_cli_v1.cli:app"
92
+
93
+ [tool.setuptools.packages.find]
94
+ include = ["verifily_cli_v1*", "verifily_sdk*"]
95
+
96
+ [tool.pytest.ini_options]
97
+ testpaths = ["verifily_cli_v1/tests", "verifily_sdk/tests"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,36 @@
1
+ """Package setup for verifily-train."""
2
+
3
+ from setuptools import setup, find_packages
4
+
5
+ setup(
6
+ name="verifily-train",
7
+ version="1.1.0",
8
+ description="Dataset-aware fine-tuning in one command",
9
+ packages=["verifily_train"],
10
+ python_requires=">=3.9",
11
+ install_requires=[
12
+ "torch>=2.0.0",
13
+ "transformers>=4.35.0",
14
+ "datasets>=2.14.0",
15
+ "peft>=0.6.0",
16
+ "accelerate>=0.24.0",
17
+ "evaluate>=0.4.0",
18
+ "scikit-learn>=1.3.0",
19
+ "numpy>=1.24.0",
20
+ "tqdm>=4.65.0",
21
+ "pyyaml>=6.0",
22
+ "click>=8.0.0",
23
+ ],
24
+ extras_require={
25
+ "qlora": ["bitsandbytes>=0.42.0"],
26
+ "wandb": ["wandb>=0.16.0"],
27
+ "web": ["gradio>=4.0.0"],
28
+ "rouge": ["rouge-score>=0.1.2"],
29
+ "all": ["bitsandbytes>=0.42.0", "wandb>=0.16.0", "gradio>=4.0.0", "rouge-score>=0.1.2"],
30
+ },
31
+ entry_points={
32
+ "console_scripts": [
33
+ "verifily=verifily_train.cli:main",
34
+ ],
35
+ },
36
+ )