specsmith 0.10.1.dev287__tar.gz → 0.10.1.dev292__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (229) hide show
  1. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/PKG-INFO +1 -1
  2. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/pyproject.toml +1 -1
  3. specsmith-0.10.1.dev292/src/specsmith/agent/hf_sync.py +181 -0
  4. specsmith-0.10.1.dev292/src/specsmith/agent/spawner.py +92 -0
  5. specsmith-0.10.1.dev292/src/specsmith/agent/teams.py +103 -0
  6. specsmith-0.10.1.dev292/src/specsmith/eval/__init__.py +111 -0
  7. specsmith-0.10.1.dev292/src/specsmith/eval/builtins.py +95 -0
  8. specsmith-0.10.1.dev292/src/specsmith/eval/runner.py +102 -0
  9. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/governance_logic.py +58 -0
  10. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith.egg-info/PKG-INFO +1 -1
  11. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith.egg-info/SOURCES.txt +7 -0
  12. specsmith-0.10.1.dev292/tests/test_new_modules.py +260 -0
  13. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/LICENSE +0 -0
  14. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/README.md +0 -0
  15. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/setup.cfg +0 -0
  16. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/__init__.py +0 -0
  17. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/belief.py +0 -0
  18. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/certainty.py +0 -0
  19. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/failure_graph.py +0 -0
  20. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/py.typed +0 -0
  21. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/recovery.py +0 -0
  22. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/session.py +0 -0
  23. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/stress_tester.py +0 -0
  24. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/epistemic/trace.py +0 -0
  25. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/__init__.py +0 -0
  26. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/__main__.py +0 -0
  27. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/__init__.py +0 -0
  28. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/broker.py +0 -0
  29. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/chat_runner.py +0 -0
  30. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/cleanup.py +0 -0
  31. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/core.py +0 -0
  32. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/endpoints.py +0 -0
  33. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/events.py +0 -0
  34. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/execution_profiles.py +0 -0
  35. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/fallback.py +0 -0
  36. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/indexer.py +0 -0
  37. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/mcp.py +0 -0
  38. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/memory.py +0 -0
  39. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/model_intelligence.py +0 -0
  40. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/orchestrator.py +0 -0
  41. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/permissions.py +0 -0
  42. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/profiles.py +0 -0
  43. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/provider_registry.py +0 -0
  44. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/repl.py +0 -0
  45. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/router.py +0 -0
  46. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/rules.py +0 -0
  47. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/runner.py +0 -0
  48. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/safety.py +0 -0
  49. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/suggester.py +0 -0
  50. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/tools.py +0 -0
  51. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/verifier.py +0 -0
  52. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/agent/voice.py +0 -0
  53. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/architect.py +0 -0
  54. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/auditor.py +0 -0
  55. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/auth.py +0 -0
  56. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/block_export.py +0 -0
  57. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/cli.py +0 -0
  58. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/commands/__init__.py +0 -0
  59. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/commands/intelligence.py +0 -0
  60. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/compliance.py +0 -0
  61. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/compressor.py +0 -0
  62. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/config.py +0 -0
  63. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/console_utils.py +0 -0
  64. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/context_window.py +0 -0
  65. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/credit_analyzer.py +0 -0
  66. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/credits.py +0 -0
  67. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/__init__.py +0 -0
  68. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/base.py +0 -0
  69. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/citations.py +0 -0
  70. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/fpd.py +0 -0
  71. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/odp.py +0 -0
  72. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/patentsview.py +0 -0
  73. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/pfw.py +0 -0
  74. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/ppubs.py +0 -0
  75. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/datasources/ptab.py +0 -0
  76. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/differ.py +0 -0
  77. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/doctor.py +0 -0
  78. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/drive.py +0 -0
  79. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/editor.py +0 -0
  80. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/epistemic/__init__.py +0 -0
  81. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/epistemic/belief.py +0 -0
  82. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/epistemic/certainty.py +0 -0
  83. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/epistemic/failure_graph.py +0 -0
  84. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/epistemic/recovery.py +0 -0
  85. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/epistemic/stress_tester.py +0 -0
  86. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/executor.py +0 -0
  87. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/exporter.py +0 -0
  88. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/__init__.py +0 -0
  89. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/app.py +0 -0
  90. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/main_window.py +0 -0
  91. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/session_tab.py +0 -0
  92. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/theme.py +0 -0
  93. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/__init__.py +0 -0
  94. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/chat_view.py +0 -0
  95. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/input_bar.py +0 -0
  96. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/provider_bar.py +0 -0
  97. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/token_meter.py +0 -0
  98. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/tool_panel.py +0 -0
  99. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/widgets/update_checker.py +0 -0
  100. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/gui/worker.py +0 -0
  101. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/history_search.py +0 -0
  102. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/importer.py +0 -0
  103. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/instinct.py +0 -0
  104. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/__init__.py +0 -0
  105. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/agent_skill.py +0 -0
  106. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/aider.py +0 -0
  107. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/base.py +0 -0
  108. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/claude_code.py +0 -0
  109. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/copilot.py +0 -0
  110. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/cursor.py +0 -0
  111. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/gemini.py +0 -0
  112. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/integrations/windsurf.py +0 -0
  113. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/languages.py +0 -0
  114. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/ledger.py +0 -0
  115. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/patent.py +0 -0
  116. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/paths.py +0 -0
  117. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/phase.py +0 -0
  118. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/plugins.py +0 -0
  119. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/profiles.py +0 -0
  120. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/rate_limits.py +0 -0
  121. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/releaser.py +0 -0
  122. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/requirements.py +0 -0
  123. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/requirements_parser.py +0 -0
  124. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/retrieval.py +0 -0
  125. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/safe_write.py +0 -0
  126. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/scaffolder.py +0 -0
  127. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/serve.py +0 -0
  128. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/session.py +0 -0
  129. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/session_init.py +0 -0
  130. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/skills.py +0 -0
  131. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/sync.py +0 -0
  132. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/agents.md.j2 +0 -0
  133. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/bug_report.md.j2 +0 -0
  134. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/code_of_conduct.md.j2 +0 -0
  135. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/contributing.md.j2 +0 -0
  136. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/feature_request.md.j2 +0 -0
  137. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/license-Apache-2.0.j2 +0 -0
  138. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/license-MIT.j2 +0 -0
  139. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/pull_request_template.md.j2 +0 -0
  140. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/community/security.md.j2 +0 -0
  141. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/docs/architecture.md.j2 +0 -0
  142. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/docs/mkdocs.yml.j2 +0 -0
  143. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/docs/readthedocs.yaml.j2 +0 -0
  144. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/docs/requirements.md.j2 +0 -0
  145. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/docs/test-spec.md.j2 +0 -0
  146. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/editorconfig.j2 +0 -0
  147. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/gitattributes.j2 +0 -0
  148. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/gitignore.j2 +0 -0
  149. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/go/go.mod.j2 +0 -0
  150. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/go/main.go.j2 +0 -0
  151. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/belief-registry.md.j2 +0 -0
  152. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/context-budget.md.j2 +0 -0
  153. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/drift-metrics.md.j2 +0 -0
  154. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/epistemic-axioms.md.j2 +0 -0
  155. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/failure-modes.md.j2 +0 -0
  156. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/lifecycle.md.j2 +0 -0
  157. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/roles.md.j2 +0 -0
  158. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/rules.md.j2 +0 -0
  159. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/session-protocol.md.j2 +0 -0
  160. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/uncertainty-map.md.j2 +0 -0
  161. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/governance/verification.md.j2 +0 -0
  162. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/js/package.json.j2 +0 -0
  163. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/ledger.md.j2 +0 -0
  164. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/python/cli.py.j2 +0 -0
  165. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/python/init.py.j2 +0 -0
  166. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/python/pyproject.toml.j2 +0 -0
  167. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/readme.md.j2 +0 -0
  168. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/rust/Cargo.toml.j2 +0 -0
  169. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/rust/main.rs.j2 +0 -0
  170. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/scripts/exec.cmd.j2 +0 -0
  171. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/scripts/exec.sh.j2 +0 -0
  172. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/scripts/run.cmd.j2 +0 -0
  173. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/scripts/run.sh.j2 +0 -0
  174. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/scripts/setup.cmd.j2 +0 -0
  175. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/scripts/setup.sh.j2 +0 -0
  176. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/templates/workflows/release.yml.j2 +0 -0
  177. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/tool_installer.py +0 -0
  178. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/toolrules.py +0 -0
  179. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/tools.py +0 -0
  180. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/trace.py +0 -0
  181. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/updater.py +0 -0
  182. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/upgrader.py +0 -0
  183. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/validator.py +0 -0
  184. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/vcs/__init__.py +0 -0
  185. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/vcs/base.py +0 -0
  186. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/vcs/bitbucket.py +0 -0
  187. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/vcs/github.py +0 -0
  188. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/vcs/gitlab.py +0 -0
  189. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/vcs_commands.py +0 -0
  190. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/wireframes.py +0 -0
  191. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith/workspace.py +0 -0
  192. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith.egg-info/dependency_links.txt +0 -0
  193. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith.egg-info/entry_points.txt +0 -0
  194. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith.egg-info/requires.txt +0 -0
  195. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/src/specsmith.egg-info/top_level.txt +0 -0
  196. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_CMD_001.py +0 -0
  197. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_agent_profiles.py +0 -0
  198. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_agent_runner_ready.py +0 -0
  199. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_auditor.py +0 -0
  200. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_chat_diff_decision.py +0 -0
  201. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_chat_runner_openai_compat.py +0 -0
  202. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_chat_stdin_protocol.py +0 -0
  203. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_cli.py +0 -0
  204. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_cli_workflows_history_drive.py +0 -0
  205. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_compliance.py +0 -0
  206. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_compressor.py +0 -0
  207. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_e2e_nexus.py +0 -0
  208. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_endpoints_cli.py +0 -0
  209. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_endpoints_store.py +0 -0
  210. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_epistemic.py +0 -0
  211. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_fallback_chain.py +0 -0
  212. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_importer.py +0 -0
  213. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_integrations.py +0 -0
  214. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_intelligence.py +0 -0
  215. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_mcp_client.py +0 -0
  216. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_nexus.py +0 -0
  217. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_permissions.py +0 -0
  218. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_phase1_4_new.py +0 -0
  219. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_phase34_completion.py +0 -0
  220. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_rate_limits.py +0 -0
  221. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_scaffolder.py +0 -0
  222. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_skill_marketplace.py +0 -0
  223. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_smoke.py +0 -0
  224. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_suggester.py +0 -0
  225. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_tools.py +0 -0
  226. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_validator.py +0 -0
  227. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_vcs.py +0 -0
  228. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_warp_parity.py +0 -0
  229. {specsmith-0.10.1.dev287 → specsmith-0.10.1.dev292}/tests/test_warp_parity_followup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: specsmith
3
- Version: 0.10.1.dev287
3
+ Version: 0.10.1.dev292
4
4
  Summary: Applied Epistemic Engineering toolkit — AEE agent sessions, execution profiles, FPGA/HDL governance, tool installer, 50+ CLI commands.
5
5
  Author: BitConcepts
6
6
  License-Expression: MIT
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "specsmith"
7
- version = "0.10.1.dev287"
7
+ version = "0.10.1.dev292"
8
8
  description = "Applied Epistemic Engineering toolkit — AEE agent sessions, execution profiles, FPGA/HDL governance, tool installer, 50+ CLI commands."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,181 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3
+ """HuggingFace Open LLM Leaderboard sync for model intelligence (REQ-223).
4
+
5
+ Fetches benchmark scores from the HuggingFace API and populates
6
+ `.specsmith/model_scores.json` so that `rank_models_for_role()` uses
7
+ real data instead of hardcoded baselines.
8
+
9
+ Usage:
10
+ from specsmith.agent.hf_sync import sync_scores
11
+ results = sync_scores() # returns dict of model_id -> {benchmark: score}
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import time
18
+ import urllib.request
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ # HF Inference API endpoint for model info
23
+ HF_API_BASE = "https://huggingface.co/api"
24
+
25
+ # Models we track (subset of popular models with known benchmark data)
26
+ TRACKED_MODELS: list[str] = [
27
+ "gpt-4.1",
28
+ "gpt-4.1-mini",
29
+ "gpt-4o",
30
+ "gpt-4o-mini",
31
+ "claude-sonnet-4-20250514",
32
+ "claude-3.5-sonnet",
33
+ "gemini-2.5-pro",
34
+ "gemini-2.5-flash",
35
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
36
+ "Qwen/Qwen2.5-Coder-7B-Instruct",
37
+ "mistralai/Mistral-Large-Latest",
38
+ "meta-llama/Llama-3.3-70B-Instruct",
39
+ "deepseek-ai/DeepSeek-V3",
40
+ ]
41
+
42
+ # Default scores file path (relative to project root)
43
+ SCORES_FILENAME = "model_scores.json"
44
+
45
+
46
+ def _scores_path(project_dir: str | Path = ".") -> Path:
47
+ return Path(project_dir).resolve() / ".specsmith" / SCORES_FILENAME
48
+
49
+
50
+ def load_cached_scores(project_dir: str | Path = ".") -> dict[str, Any]:
51
+ """Load cached model scores from disk."""
52
+ path = _scores_path(project_dir)
53
+ if not path.is_file():
54
+ return {}
55
+ try:
56
+ result: dict[str, Any] = json.loads(path.read_text(encoding="utf-8"))
57
+ return result
58
+ except (OSError, ValueError):
59
+ return {}
60
+
61
+
62
+ def save_scores(scores: dict[str, Any], project_dir: str | Path = ".") -> None:
63
+ """Persist model scores to disk."""
64
+ path = _scores_path(project_dir)
65
+ path.parent.mkdir(parents=True, exist_ok=True)
66
+ data = {
67
+ "synced_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
68
+ "models": scores,
69
+ }
70
+ path.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
71
+
72
+
73
+ def fetch_hf_model_info(model_id: str, timeout: int = 10) -> dict[str, Any]:
74
+ """Fetch model metadata from HuggingFace API.
75
+
76
+ Returns a dict with model card data. On failure returns empty dict.
77
+ """
78
+ url = f"{HF_API_BASE}/models/{model_id}"
79
+ try:
80
+ req = urllib.request.Request(url, headers={"Accept": "application/json"})
81
+ with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310
82
+ result: dict[str, Any] = json.loads(resp.read())
83
+ return result
84
+ except Exception: # noqa: BLE001
85
+ return {}
86
+
87
+
88
+ def _extract_benchmark_scores(model_info: dict[str, Any]) -> dict[str, float]:
89
+ """Extract benchmark scores from HF model card metadata.
90
+
91
+ Looks for eval_results in the model card data. Returns a dict of
92
+ benchmark_name -> score.
93
+ """
94
+ scores: dict[str, float] = {}
95
+ # HF model cards store eval results in cardData.eval_results
96
+ card_data = model_info.get("cardData", {}) or {}
97
+ eval_results = card_data.get("eval_results", []) or []
98
+ for result in eval_results:
99
+ if not isinstance(result, dict):
100
+ continue
101
+ dataset = result.get("dataset", {})
102
+ name = dataset.get("name", "") if isinstance(dataset, dict) else str(dataset)
103
+ metrics = result.get("metrics", []) or []
104
+ for metric in metrics:
105
+ if isinstance(metric, dict):
106
+ metric_name = metric.get("name", "")
107
+ value = metric.get("value")
108
+ if metric_name and value is not None:
109
+ try:
110
+ key = f"{name}/{metric_name}" if name else metric_name
111
+ scores[key] = float(value)
112
+ except (TypeError, ValueError):
113
+ continue
114
+ return scores
115
+
116
+
117
+ def sync_scores(
118
+ project_dir: str | Path = ".",
119
+ models: list[str] | None = None,
120
+ timeout: int = 10,
121
+ ) -> dict[str, Any]:
122
+ """Sync model scores from HuggingFace.
123
+
124
+ For HF-hosted models, fetches real benchmark data from model cards.
125
+ For proprietary models (GPT, Claude, Gemini), uses curated baselines.
126
+
127
+ Returns dict of model_id -> {benchmark: score}.
128
+ """
129
+ from specsmith.agent.model_intelligence import BASELINE_SCORES
130
+
131
+ target_models = models or TRACKED_MODELS
132
+ all_scores: dict[str, Any] = {}
133
+
134
+ for model_id in target_models:
135
+ # For non-HF models, use baseline scores
136
+ if "/" not in model_id:
137
+ baseline = BASELINE_SCORES.get(model_id)
138
+ if baseline:
139
+ all_scores[model_id] = {"baseline_composite": baseline}
140
+ continue
141
+
142
+ # For HF models, try to fetch real data
143
+ info = fetch_hf_model_info(model_id, timeout=timeout)
144
+ if info:
145
+ benchmarks = _extract_benchmark_scores(info)
146
+ if benchmarks:
147
+ all_scores[model_id] = benchmarks
148
+ continue
149
+
150
+ # Fallback to baseline
151
+ baseline = BASELINE_SCORES.get(model_id)
152
+ if baseline:
153
+ all_scores[model_id] = {"baseline_composite": baseline}
154
+
155
+ save_scores(all_scores, project_dir)
156
+ return all_scores
157
+
158
+
159
+ def is_stale(project_dir: str | Path = ".", max_age_hours: int = 24) -> bool:
160
+ """Check if cached scores are older than max_age_hours."""
161
+ cached = load_cached_scores(project_dir)
162
+ synced_at = cached.get("synced_at", "")
163
+ if not synced_at:
164
+ return True
165
+ try:
166
+ from datetime import datetime, timezone
167
+
168
+ synced = datetime.fromisoformat(synced_at.replace("Z", "+00:00"))
169
+ age = datetime.now(timezone.utc) - synced
170
+ return age.total_seconds() > max_age_hours * 3600
171
+ except (ValueError, TypeError):
172
+ return True
173
+
174
+
175
+ __all__ = [
176
+ "fetch_hf_model_info",
177
+ "is_stale",
178
+ "load_cached_scores",
179
+ "save_scores",
180
+ "sync_scores",
181
+ ]
@@ -0,0 +1,92 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3
+ """Sub-agent spawner — spawn isolated agent workers with tool subsets.
4
+
5
+ ARCHITECTURE.md §13 Phase 2: Multi-Agent Layer.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any
12
+
13
+
14
+ @dataclass
15
+ class SpawnedAgent:
16
+ """Metadata for a spawned sub-agent."""
17
+
18
+ id: str
19
+ role: str
20
+ tools: list[str]
21
+ status: str = "idle" # idle, running, completed, failed
22
+ result: dict[str, Any] = field(default_factory=dict)
23
+
24
+ def to_dict(self) -> dict[str, Any]:
25
+ return {
26
+ "id": self.id,
27
+ "role": self.role,
28
+ "tools": self.tools,
29
+ "status": self.status,
30
+ "result": self.result,
31
+ }
32
+
33
+
34
+ # Tool subsets for different agent roles
35
+ ROLE_TOOLS: dict[str, list[str]] = {
36
+ "coder": ["read_file", "write_file", "run_shell", "apply_diff"],
37
+ "reviewer": ["read_file", "run_shell", "git_diff"],
38
+ "tester": ["read_file", "run_shell", "run_tests"],
39
+ "architect": ["read_file", "write_file"],
40
+ "researcher": ["read_file", "search_web", "search_repo"],
41
+ }
42
+
43
+
44
+ class SubAgentSpawner:
45
+ """Spawn and manage isolated agent workers.
46
+
47
+ Each spawned agent gets a restricted tool subset based on its role,
48
+ preventing accidental cross-domain actions (e.g., a reviewer can't
49
+ write files).
50
+ """
51
+
52
+ def __init__(self) -> None:
53
+ self._agents: dict[str, SpawnedAgent] = {}
54
+ self._counter = 0
55
+
56
+ def spawn(self, role: str, tools: list[str] | None = None) -> SpawnedAgent:
57
+ """Spawn a new sub-agent with the given role and tool set."""
58
+ self._counter += 1
59
+ agent_id = f"agent-{role}-{self._counter:03d}"
60
+ effective_tools = tools or ROLE_TOOLS.get(role, [])
61
+ agent = SpawnedAgent(id=agent_id, role=role, tools=effective_tools)
62
+ self._agents[agent_id] = agent
63
+ return agent
64
+
65
+ def get(self, agent_id: str) -> SpawnedAgent | None:
66
+ """Get a spawned agent by ID."""
67
+ return self._agents.get(agent_id)
68
+
69
+ def list_active(self) -> list[SpawnedAgent]:
70
+ """List all agents that are not completed/failed."""
71
+ return [a for a in self._agents.values() if a.status in ("idle", "running")]
72
+
73
+ def list_all(self) -> list[SpawnedAgent]:
74
+ """List all spawned agents."""
75
+ return list(self._agents.values())
76
+
77
+ def complete(self, agent_id: str, result: dict[str, Any]) -> None:
78
+ """Mark an agent as completed with its result."""
79
+ agent = self._agents.get(agent_id)
80
+ if agent:
81
+ agent.status = "completed"
82
+ agent.result = result
83
+
84
+ def fail(self, agent_id: str, error: str) -> None:
85
+ """Mark an agent as failed."""
86
+ agent = self._agents.get(agent_id)
87
+ if agent:
88
+ agent.status = "failed"
89
+ agent.result = {"error": error}
90
+
91
+
92
+ __all__ = ["ROLE_TOOLS", "SpawnedAgent", "SubAgentSpawner"]
@@ -0,0 +1,103 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3
+ """Team definitions for multi-agent coordination.
4
+
5
+ ARCHITECTURE.md §13 Phase 2: predefined agent team compositions.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any
12
+
13
+
14
+ @dataclass
15
+ class TeamMember:
16
+ """A role slot within a team."""
17
+
18
+ role: str
19
+ required: bool = True
20
+ tools_override: list[str] | None = None
21
+
22
+
23
+ @dataclass
24
+ class TeamDefinition:
25
+ """A named team of agent roles that work together."""
26
+
27
+ id: str
28
+ name: str
29
+ description: str
30
+ members: list[TeamMember] = field(default_factory=list)
31
+
32
+ def to_dict(self) -> dict[str, Any]:
33
+ return {
34
+ "id": self.id,
35
+ "name": self.name,
36
+ "description": self.description,
37
+ "members": [{"role": m.role, "required": m.required} for m in self.members],
38
+ }
39
+
40
+
41
+ # Pre-defined teams
42
+ PAIR_REVIEW = TeamDefinition(
43
+ id="pair-review",
44
+ name="Pair Review",
45
+ description="Coder + Reviewer pair for code changes with built-in review",
46
+ members=[
47
+ TeamMember(role="coder"),
48
+ TeamMember(role="reviewer"),
49
+ ],
50
+ )
51
+
52
+ FULL_STACK = TeamDefinition(
53
+ id="full-stack",
54
+ name="Full Stack",
55
+ description="Architect + Coder + Tester trio for complete feature development",
56
+ members=[
57
+ TeamMember(role="architect"),
58
+ TeamMember(role="coder"),
59
+ TeamMember(role="tester"),
60
+ ],
61
+ )
62
+
63
+ IP_ANALYSIS = TeamDefinition(
64
+ id="ip-analysis",
65
+ name="IP Analysis",
66
+ description="IP Analyst + Researcher + Strategist for patent work",
67
+ members=[
68
+ TeamMember(role="ip-analyst"),
69
+ TeamMember(role="researcher"),
70
+ TeamMember(role="strategist"),
71
+ ],
72
+ )
73
+
74
+ SPEC_DRAFT = TeamDefinition(
75
+ id="spec-draft",
76
+ name="Specification Drafting",
77
+ description="Architect + Drafter + Reviewer for specification writing",
78
+ members=[
79
+ TeamMember(role="architect"),
80
+ TeamMember(role="drafter"),
81
+ TeamMember(role="reviewer"),
82
+ ],
83
+ )
84
+
85
+ BUILTIN_TEAMS: dict[str, TeamDefinition] = {
86
+ "pair-review": PAIR_REVIEW,
87
+ "full-stack": FULL_STACK,
88
+ "ip-analysis": IP_ANALYSIS,
89
+ "spec-draft": SPEC_DRAFT,
90
+ }
91
+
92
+
93
+ def get_team(team_id: str) -> TeamDefinition | None:
94
+ """Get a built-in team by ID."""
95
+ return BUILTIN_TEAMS.get(team_id)
96
+
97
+
98
+ def list_teams() -> list[TeamDefinition]:
99
+ """List all available teams."""
100
+ return list(BUILTIN_TEAMS.values())
101
+
102
+
103
+ __all__ = ["BUILTIN_TEAMS", "TeamDefinition", "TeamMember", "get_team", "list_teams"]
@@ -0,0 +1,111 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3
+ """Eval-Driven Development framework (ARCHITECTURE.md §13 Phase 1).
4
+
5
+ Provides structured eval suites that test AI model capabilities against
6
+ concrete tasks. Used for model intelligence scoring, regression testing,
7
+ and provider qualification.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass, field
13
+ from typing import Any
14
+
15
+
16
+ @dataclass
17
+ class EvalCase:
18
+ """A single evaluation case — one prompt + expected behavior."""
19
+
20
+ id: str
21
+ name: str
22
+ role: str # which agent role this tests
23
+ prompt: str
24
+ expected_keywords: list[str] = field(default_factory=list)
25
+ max_tokens: int = 1024
26
+ timeout_seconds: int = 30
27
+ tags: list[str] = field(default_factory=list)
28
+
29
+ def to_dict(self) -> dict[str, Any]:
30
+ return {
31
+ "id": self.id,
32
+ "name": self.name,
33
+ "role": self.role,
34
+ "prompt": self.prompt,
35
+ "expected_keywords": self.expected_keywords,
36
+ "max_tokens": self.max_tokens,
37
+ "tags": self.tags,
38
+ }
39
+
40
+
41
+ @dataclass
42
+ class EvalResult:
43
+ """Result of running a single eval case."""
44
+
45
+ case_id: str
46
+ passed: bool
47
+ score: float # 0.0–1.0
48
+ latency_ms: float
49
+ model: str
50
+ provider: str
51
+ output_preview: str = ""
52
+ error: str = ""
53
+
54
+ def to_dict(self) -> dict[str, Any]:
55
+ return {
56
+ "case_id": self.case_id,
57
+ "passed": self.passed,
58
+ "score": round(self.score, 3),
59
+ "latency_ms": round(self.latency_ms, 1),
60
+ "model": self.model,
61
+ "provider": self.provider,
62
+ "output_preview": self.output_preview[:200],
63
+ "error": self.error,
64
+ }
65
+
66
+
67
+ @dataclass
68
+ class EvalSuite:
69
+ """A named collection of eval cases."""
70
+
71
+ id: str
72
+ name: str
73
+ description: str
74
+ cases: list[EvalCase] = field(default_factory=list)
75
+ tags: list[str] = field(default_factory=list)
76
+
77
+ def to_dict(self) -> dict[str, Any]:
78
+ return {
79
+ "id": self.id,
80
+ "name": self.name,
81
+ "description": self.description,
82
+ "case_count": len(self.cases),
83
+ "tags": self.tags,
84
+ }
85
+
86
+
87
+ @dataclass
88
+ class EvalReport:
89
+ """Aggregated results from running an eval suite."""
90
+
91
+ suite_id: str
92
+ total: int
93
+ passed: int
94
+ failed: int
95
+ avg_score: float
96
+ avg_latency_ms: float
97
+ results: list[EvalResult] = field(default_factory=list)
98
+
99
+ def to_dict(self) -> dict[str, Any]:
100
+ return {
101
+ "suite_id": self.suite_id,
102
+ "total": self.total,
103
+ "passed": self.passed,
104
+ "failed": self.failed,
105
+ "avg_score": round(self.avg_score, 3),
106
+ "avg_latency_ms": round(self.avg_latency_ms, 1),
107
+ "results": [r.to_dict() for r in self.results],
108
+ }
109
+
110
+
111
+ __all__ = ["EvalCase", "EvalReport", "EvalResult", "EvalSuite"]
@@ -0,0 +1,95 @@
1
+ # SPDX-License-Identifier: MIT
2
+ # Copyright (c) 2026 BitConcepts, LLC. All rights reserved.
3
+ """Built-in eval suites for core agent capabilities."""
4
+
5
+ from __future__ import annotations
6
+
7
+ from specsmith.eval import EvalCase, EvalSuite
8
+
9
+ CODE_GEN = EvalCase(
10
+ id="eval-code-gen-001",
11
+ name="Python function generation",
12
+ role="coder",
13
+ prompt=(
14
+ "Write a Python function `fibonacci(n: int) -> list[int]` that returns "
15
+ "the first n Fibonacci numbers. Include type hints and a docstring."
16
+ ),
17
+ expected_keywords=["def fibonacci", "list[int]", "return"],
18
+ tags=["code", "python"],
19
+ )
20
+
21
+ ARCHITECTURE_REVIEW = EvalCase(
22
+ id="eval-arch-001",
23
+ name="Architecture review",
24
+ role="architect",
25
+ prompt=(
26
+ "Review this architecture decision: 'We will use a monolithic SQLite database "
27
+ "for a multi-tenant SaaS application serving 10,000 concurrent users.' "
28
+ "Identify risks and suggest alternatives."
29
+ ),
30
+ expected_keywords=["scalability", "concurrent", "alternative"],
31
+ tags=["architecture", "review"],
32
+ )
33
+
34
+ TEST_GEN = EvalCase(
35
+ id="eval-test-gen-001",
36
+ name="Test generation",
37
+ role="tester",
38
+ prompt=(
39
+ "Write pytest tests for a function `add(a: int, b: int) -> int` that adds "
40
+ "two integers. Cover edge cases: negative numbers, zero, large values."
41
+ ),
42
+ expected_keywords=["def test_", "assert", "add"],
43
+ tags=["test", "pytest"],
44
+ )
45
+
46
+ PATENT_CLAIM = EvalCase(
47
+ id="eval-patent-001",
48
+ name="Patent claim analysis",
49
+ role="ip-analyst",
50
+ prompt=(
51
+ "Analyze this patent claim: 'A method for sorting data records comprising: "
52
+ "receiving a dataset, applying a comparison function, and outputting sorted "
53
+ "records in ascending order.' Identify the key limitations and suggest "
54
+ "potential design-arounds."
55
+ ),
56
+ expected_keywords=["limitation", "claim", "design"],
57
+ tags=["patent", "ip"],
58
+ )
59
+
60
+ INTENT_CLASSIFY = EvalCase(
61
+ id="eval-classify-001",
62
+ name="Intent classification",
63
+ role="classifier",
64
+ prompt=(
65
+ "Classify the following user request into one of these categories: "
66
+ "[code_change, bug_fix, documentation, question, refactor]. "
67
+ "Request: 'Can you rename the variable foo to bar in utils.py?'"
68
+ ),
69
+ expected_keywords=["refactor"],
70
+ max_tokens=128,
71
+ tags=["classify", "intent"],
72
+ )
73
+
74
+ # Pre-built suites
75
+ CORE_SUITE = EvalSuite(
76
+ id="core",
77
+ name="Core Capabilities",
78
+ description="Tests fundamental AI capabilities across 5 roles",
79
+ cases=[CODE_GEN, ARCHITECTURE_REVIEW, TEST_GEN, PATENT_CLAIM, INTENT_CLASSIFY],
80
+ tags=["core", "smoke"],
81
+ )
82
+
83
+ ALL_SUITES: dict[str, EvalSuite] = {
84
+ "core": CORE_SUITE,
85
+ }
86
+
87
+
88
+ def get_suite(suite_id: str) -> EvalSuite | None:
89
+ """Get a built-in suite by ID."""
90
+ return ALL_SUITES.get(suite_id)
91
+
92
+
93
+ def list_suites() -> list[EvalSuite]:
94
+ """List all available built-in suites."""
95
+ return list(ALL_SUITES.values())