zettelforge 2.5.0__tar.gz → 2.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (312) hide show
  1. {zettelforge-2.5.0 → zettelforge-2.5.1}/CHANGELOG.md +23 -0
  2. {zettelforge-2.5.0 → zettelforge-2.5.1}/PKG-INFO +1 -1
  3. zettelforge-2.5.1/SECURITY.md +75 -0
  4. zettelforge-2.5.1/docs/THREAT_MODEL.md +248 -0
  5. {zettelforge-2.5.0 → zettelforge-2.5.1}/governance/controls.yaml +10 -2
  6. {zettelforge-2.5.0 → zettelforge-2.5.1}/pyproject.toml +1 -1
  7. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/__init__.py +1 -1
  8. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/knowledge_graph.py +69 -12
  9. zettelforge-2.5.1/tests/test_kg_edge_schema.py +135 -0
  10. zettelforge-2.5.0/SECURITY.md +0 -25
  11. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/CODEOWNERS +0 -0
  12. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/ISSUE_TEMPLATE/bug_report.md +0 -0
  13. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/ISSUE_TEMPLATE/feature_request.md +0 -0
  14. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/SECURITY.md +0 -0
  15. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/dependabot.yml +0 -0
  16. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/pull_request_template.md +0 -0
  17. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/workflows/ci.yml +0 -0
  18. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/workflows/docs.yml +0 -0
  19. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/workflows/publish.yml +0 -0
  20. {zettelforge-2.5.0 → zettelforge-2.5.1}/.github/workflows/snyk-security.yml +0 -0
  21. {zettelforge-2.5.0 → zettelforge-2.5.1}/.gitignore +0 -0
  22. {zettelforge-2.5.0 → zettelforge-2.5.1}/ARCHITECTURE.md +0 -0
  23. {zettelforge-2.5.0 → zettelforge-2.5.1}/CODEOWNERS +0 -0
  24. {zettelforge-2.5.0 → zettelforge-2.5.1}/CODE_OF_CONDUCT.md +0 -0
  25. {zettelforge-2.5.0 → zettelforge-2.5.1}/CONTRIBUTING.md +0 -0
  26. {zettelforge-2.5.0 → zettelforge-2.5.1}/Dockerfile +0 -0
  27. {zettelforge-2.5.0 → zettelforge-2.5.1}/GOVERNANCE.md +0 -0
  28. {zettelforge-2.5.0 → zettelforge-2.5.1}/LICENSE +0 -0
  29. {zettelforge-2.5.0 → zettelforge-2.5.1}/MANIFEST.in +0 -0
  30. {zettelforge-2.5.0 → zettelforge-2.5.1}/README.md +0 -0
  31. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/BENCHMARK_REPORT.md +0 -0
  32. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/LOCOMO_BENCHMARK_COMPARISON.md +0 -0
  33. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/auto_ralph.py +0 -0
  34. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/benchmark_harness.py +0 -0
  35. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/cti_benchmark_v2.py +0 -0
  36. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/cti_retrieval_benchmark.py +0 -0
  37. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/cti_retrieval_results.json +0 -0
  38. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/cti_v2_results.json +0 -0
  39. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/ctibench_benchmark.py +0 -0
  40. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/ctibench_results.json +0 -0
  41. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/dataset.json +0 -0
  42. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/enterprise-attack.json +0 -0
  43. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/evolve_benchmark.py +0 -0
  44. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/evolve_results.json +0 -0
  45. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/graph_test.py +0 -0
  46. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/locomo_benchmark.py +0 -0
  47. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/locomo_results.json +0 -0
  48. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/locomo_results_v1.3.0_baseline.json +0 -0
  49. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/memoryagentbench.py +0 -0
  50. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/memoryagentbench_results.json +0 -0
  51. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/mempalace_benchmark.py +0 -0
  52. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/mempalace_results.json +0 -0
  53. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/naive_memory.py +0 -0
  54. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/opencti_benchmark.py +0 -0
  55. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/ragas_benchmark.py +0 -0
  56. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/ragas_cti_results.json +0 -0
  57. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/ragas_results.json +0 -0
  58. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/results/benchmark_report.md +0 -0
  59. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/results/ralph_optimization_log.json +0 -0
  60. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/scale_benchmark.py +0 -0
  61. {zettelforge-2.5.0 → zettelforge-2.5.1}/benchmarks/scale_results.json +0 -0
  62. {zettelforge-2.5.0 → zettelforge-2.5.1}/config.default.yaml +0 -0
  63. {zettelforge-2.5.0 → zettelforge-2.5.1}/config.example.yaml +0 -0
  64. {zettelforge-2.5.0 → zettelforge-2.5.1}/docker/docker-compose.yml +0 -0
  65. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/.well-known/security.txt +0 -0
  66. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/CNAME +0 -0
  67. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/architecture-diagram.mmd +0 -0
  68. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/archive/PACKAGE_SUMMARY.md +0 -0
  69. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/archive/README.md +0 -0
  70. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/archive/SKILL.md +0 -0
  71. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/ZettelForge_Architecture.mmd +0 -0
  72. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/architecture-overview.mmd +0 -0
  73. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/architecture-read-path.mmd +0 -0
  74. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/architecture-write-path.mmd +0 -0
  75. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/cf-analytics.js +0 -0
  76. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/demo.gif +0 -0
  77. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon-16.png +0 -0
  78. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon-32.png +0 -0
  79. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon-512.png +0 -0
  80. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon-64.png +0 -0
  81. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon-apple-touch.png +0 -0
  82. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon-old.svg +0 -0
  83. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/favicon.svg +0 -0
  84. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/logo.svg +0 -0
  85. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/social-preview.png +0 -0
  86. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/threatrecall-lockup-monogram.svg +0 -0
  87. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/threatrecall-lockup.svg +0 -0
  88. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/threatrecall-logo-flat.svg +0 -0
  89. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/threatrecall-logo-philosophy.md +0 -0
  90. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/threatrecall-logo.png +0 -0
  91. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/threatrecall-mark.png +0 -0
  92. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/zettelforge_architecture-light.svg +0 -0
  93. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/assets/zettelforge_architecture.svg +0 -0
  94. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/brand/brandIdentity.md +0 -0
  95. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/brand/colors_and_type.css +0 -0
  96. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/explanation/architecture.md +0 -0
  97. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/explanation/epistemic-tiers.md +0 -0
  98. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/explanation/stix-in-zettelforge.md +0 -0
  99. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/explanation/two-phase-pipeline.md +0 -0
  100. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/explanation/zettelkasten-philosophy.md +0 -0
  101. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/configure-lancedb.md +0 -0
  102. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/configure-opencti.md +0 -0
  103. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/configure-pii.md +0 -0
  104. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/configure-typedb.md +0 -0
  105. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/ingest-news-report.md +0 -0
  106. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/integrate-llm-agent.md +0 -0
  107. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/migrate-jsonl-to-sqlite.md +0 -0
  108. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/query-apt-tools.md +0 -0
  109. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/reproduce-benchmarks.md +0 -0
  110. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/resolve-aliases.md +0 -0
  111. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/run-temporal-query.md +0 -0
  112. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/store-threat-actor.md +0 -0
  113. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/troubleshoot.md +0 -0
  114. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/how-to/upgrade.md +0 -0
  115. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/human-evaluation-rubric.md +0 -0
  116. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/index.md +0 -0
  117. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/llms.txt +0 -0
  118. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/narrative/2026-04-16-the-memory-problem.md +0 -0
  119. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/overrides/main.html +0 -0
  120. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/architecture-deep-dive.md +0 -0
  121. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/configuration.md +0 -0
  122. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/governance-controls.md +0 -0
  123. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/memory-manager-api.md +0 -0
  124. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/module-inventory.md +0 -0
  125. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/retrieval-policies.md +0 -0
  126. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/reference/stix-schema.md +0 -0
  127. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-001-conversational-entity-extractor.md +0 -0
  128. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-002-universal-llm-provider.md +0 -0
  129. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-003-adversarial-review.md +0 -0
  130. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-003-read-path-depth-routing.md +0 -0
  131. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-007-operational-telemetry.md +0 -0
  132. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-009-enrichment-pipeline-v2.md +0 -0
  133. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-010-enrichment-hotfix.md +0 -0
  134. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-011-local-llm-backend-config.md +0 -0
  135. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-012-litellm-unified-provider.md +0 -0
  136. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/rfcs/RFC-013-presidio-pii-detection.md +0 -0
  137. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/stylesheets/brand-tokens.css +0 -0
  138. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/stylesheets/extra.css +0 -0
  139. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/stylesheets/fonts/Neuropol.otf +0 -0
  140. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-09-ctibench-ragas-benchmarks.md +0 -0
  141. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-09-fastembed-local-embeddings.md +0 -0
  142. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-09-hybrid-typedb-lancedb-architecture.md +0 -0
  143. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-09-local-llm-llama-cpp.md +0 -0
  144. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-anti-aversion-cleanup.md +0 -0
  145. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-causal-graph.md +0 -0
  146. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-ctibench-ate-fix.md +0 -0
  147. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-format-stability.md +0 -0
  148. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-memory-evolution.md +0 -0
  149. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-merge-consolidation.md +0 -0
  150. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-persistence-semantics.md +0 -0
  151. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-15-sqlite-migration.md +0 -0
  152. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-17-test-suite-audit.md +0 -0
  153. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-24-phase-0.5-attribution-prelim.md +0 -0
  154. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-25-graph-retriever-silence.md +0 -0
  155. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/2026-04-25-phase-0.5-attribution.md +0 -0
  156. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/research/README.md +0 -0
  157. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/superpowers/specs/2026-04-15-p1-features-prd.md +0 -0
  158. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/tutorials/01-quickstart.md +0 -0
  159. {zettelforge-2.5.0 → zettelforge-2.5.1}/docs/tutorials/02-first-cti-report.md +0 -0
  160. {zettelforge-2.5.0 → zettelforge-2.5.1}/examples/athf_bridge.py +0 -0
  161. {zettelforge-2.5.0 → zettelforge-2.5.1}/examples/mcp_claude_code.md +0 -0
  162. {zettelforge-2.5.0 → zettelforge-2.5.1}/examples/quickstart.py +0 -0
  163. {zettelforge-2.5.0 → zettelforge-2.5.1}/mkdocs.yml +0 -0
  164. {zettelforge-2.5.0 → zettelforge-2.5.1}/scripts/migrate_jsonl_to_sqlite.py +0 -0
  165. {zettelforge-2.5.0 → zettelforge-2.5.1}/scripts/rebuild_index.py +0 -0
  166. {zettelforge-2.5.0 → zettelforge-2.5.1}/scripts/record-demo.sh +0 -0
  167. {zettelforge-2.5.0 → zettelforge-2.5.1}/scripts/typedb-setup.sh +0 -0
  168. {zettelforge-2.5.0 → zettelforge-2.5.1}/scripts/zettelforge-rebuild.service +0 -0
  169. {zettelforge-2.5.0 → zettelforge-2.5.1}/scripts/zettelforge-rebuild.timer +0 -0
  170. {zettelforge-2.5.0 → zettelforge-2.5.1}/server.json +0 -0
  171. {zettelforge-2.5.0 → zettelforge-2.5.1}/skills/claude-code-skill.md +0 -0
  172. {zettelforge-2.5.0 → zettelforge-2.5.1}/skills/openclaw-skill.md +0 -0
  173. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/__main__.py +0 -0
  174. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/alias_resolver.py +0 -0
  175. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/backend_factory.py +0 -0
  176. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/blended_retriever.py +0 -0
  177. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/cache.py +0 -0
  178. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/config.py +0 -0
  179. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/consolidation.py +0 -0
  180. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/demo.py +0 -0
  181. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/detection/__init__.py +0 -0
  182. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/detection/base.py +0 -0
  183. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/detection/consumers.py +0 -0
  184. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/detection/explainer.py +0 -0
  185. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/edition.py +0 -0
  186. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/entity_indexer.py +0 -0
  187. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/extensions.py +0 -0
  188. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/fact_extractor.py +0 -0
  189. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/governance_validator.py +0 -0
  190. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/graph_retriever.py +0 -0
  191. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/integrations/__init__.py +0 -0
  192. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/integrations/langchain_retriever.py +0 -0
  193. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/intent_classifier.py +0 -0
  194. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/json_parse.py +0 -0
  195. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/lance_maintenance.py +0 -0
  196. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_client.py +0 -0
  197. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/__init__.py +0 -0
  198. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/base.py +0 -0
  199. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/litellm_provider.py +0 -0
  200. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/local_provider.py +0 -0
  201. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/mock_provider.py +0 -0
  202. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/ollama_provider.py +0 -0
  203. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/llm_providers/registry.py +0 -0
  204. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/log.py +0 -0
  205. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/mcp/__init__.py +0 -0
  206. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/mcp/__main__.py +0 -0
  207. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/mcp/server.py +0 -0
  208. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/memory_evolver.py +0 -0
  209. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/memory_manager.py +0 -0
  210. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/memory_store.py +0 -0
  211. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/memory_updater.py +0 -0
  212. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/note_constructor.py +0 -0
  213. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/note_schema.py +0 -0
  214. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/observability.py +0 -0
  215. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/ocsf.py +0 -0
  216. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/ontology.py +0 -0
  217. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/pii_validator.py +0 -0
  218. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/retry.py +0 -0
  219. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/scripts/compact_lance.py +0 -0
  220. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/scripts/human_eval_sampler.py +0 -0
  221. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/scripts/telemetry_aggregator.py +0 -0
  222. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/scripts/telemetry_dashboard.py +0 -0
  223. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/__init__.py +0 -0
  224. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/cli.py +0 -0
  225. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/entities.py +0 -0
  226. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/ingest.py +0 -0
  227. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/parser.py +0 -0
  228. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/schemas/NOTICE.md +0 -0
  229. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/schemas/__init__.py +0 -0
  230. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/schemas/sigma-correlation-rules-schema.json +0 -0
  231. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/schemas/sigma-detection-rule-schema.json +0 -0
  232. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/schemas/sigma-filters-schema.json +0 -0
  233. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sigma/tags.py +0 -0
  234. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/sqlite_backend.py +0 -0
  235. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/storage_backend.py +0 -0
  236. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/synthesis_generator.py +0 -0
  237. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/synthesis_validator.py +0 -0
  238. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/telemetry.py +0 -0
  239. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/vector_memory.py +0 -0
  240. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/vector_retriever.py +0 -0
  241. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/__init__.py +0 -0
  242. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/cccs_metadata.py +0 -0
  243. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/cli.py +0 -0
  244. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/entities.py +0 -0
  245. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/ingest.py +0 -0
  246. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/parser.py +0 -0
  247. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/schemas/CCCS_YARA.yml +0 -0
  248. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/schemas/CCCS_YARA_values.yml +0 -0
  249. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/schemas/NOTICE.md +0 -0
  250. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/schemas/__init__.py +0 -0
  251. {zettelforge-2.5.0 → zettelforge-2.5.1}/src/zettelforge/yara/tags.py +0 -0
  252. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/__init__.py +0 -0
  253. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/benchmark_scale.py +0 -0
  254. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/conftest.py +0 -0
  255. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/sigma/cloud_example.yml +0 -0
  256. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/sigma/correlation_example.yml +0 -0
  257. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/sigma/process_creation_example.yml +0 -0
  258. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/sigma/tagged_example.yml +0 -0
  259. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/yara/malware_hash.yar +0 -0
  260. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/yara/technique_loader.yar +0 -0
  261. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/fixtures/yara/webshell.yar +0 -0
  262. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_basic.py +0 -0
  263. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_blended_retriever.py +0 -0
  264. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_causal_extraction.py +0 -0
  265. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_config.py +0 -0
  266. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_consolidation.py +0 -0
  267. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_conversational_entities.py +0 -0
  268. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_core.py +0 -0
  269. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_cti_integration.py +0 -0
  270. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_detection_explainer.py +0 -0
  271. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_detection_rule_entities.py +0 -0
  272. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_edition.py +0 -0
  273. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_embedding.py +0 -0
  274. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_entity_indexer_races.py +0 -0
  275. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_extensions.py +0 -0
  276. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_fact_extractor.py +0 -0
  277. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_governance.py +0 -0
  278. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_governance_spec_drift.py +0 -0
  279. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_graph_retriever.py +0 -0
  280. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_human_eval_sampler.py +0 -0
  281. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_intent_classifier.py +0 -0
  282. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_json_parse.py +0 -0
  283. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_lance_maintenance.py +0 -0
  284. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_langchain_retriever.py +0 -0
  285. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_llm_client.py +0 -0
  286. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_llm_providers.py +0 -0
  287. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_logging_compliance.py +0 -0
  288. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_mcp_server.py +0 -0
  289. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_memory_evolver.py +0 -0
  290. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_memory_updater.py +0 -0
  291. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_performance.py +0 -0
  292. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_pii_validator.py +0 -0
  293. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_recall_integration.py +0 -0
  294. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_sigma_entities.py +0 -0
  295. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_sigma_ingest.py +0 -0
  296. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_sigma_parser.py +0 -0
  297. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_sqlite_backend.py +0 -0
  298. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_sqlite_integration.py +0 -0
  299. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_storage_backend.py +0 -0
  300. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_telemetry_aggregator.py +0 -0
  301. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_telemetry_collector.py +0 -0
  302. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_telemetry_dashboard.py +0 -0
  303. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_telemetry_integration.py +0 -0
  304. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_temporal_graph.py +0 -0
  305. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_two_phase_e2e.py +0 -0
  306. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_typedb_client.py +0 -0
  307. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_yara_entities.py +0 -0
  308. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_yara_ingest.py +0 -0
  309. {zettelforge-2.5.0 → zettelforge-2.5.1}/tests/test_yara_parser.py +0 -0
  310. {zettelforge-2.5.0 → zettelforge-2.5.1}/web/app.py +0 -0
  311. {zettelforge-2.5.0 → zettelforge-2.5.1}/web/auth.py +0 -0
  312. {zettelforge-2.5.0 → zettelforge-2.5.1}/web/mcp_server.py +0 -0
@@ -6,6 +6,29 @@ Versioning follows [Semantic Versioning](https://semver.org/).
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [2.5.1] - 2026-04-25
10
+
11
+ Hotfix release. Surfaced during the v2.5.0 perf benchmark run.
12
+
13
+ ### Fixed
14
+
15
+ - **`KnowledgeGraph._cache_edge` crashed on legacy-schema edges**.
16
+ Long-running deployments accumulated `kg_edges.jsonl` entries written
17
+ by a now-removed pre-v2.5.x writer that used
18
+ `{source_id, target_id, relation_type}` instead of the canonical
19
+ `{from_node_id, to_node_id, relationship}` keys. The loader hard-failed
20
+ with `KeyError: 'from_node_id'` on the first such row, taking down
21
+ every `recall()` and `synthesize()` that touches the KG. Affects any
22
+ workspace with mixed-schema edge history; observed locally with 189k
23
+ edges where ~80k were the legacy shape.
24
+ `_normalize_edge_schema()` now remaps legacy keys to canonical on load
25
+ and silently drops entries that are still un-normalizable, with a
26
+ count logged at WARNING so operators can see the skip volume.
27
+ Six new regression tests in `tests/test_kg_edge_schema.py` cover
28
+ pass-through, remap, missing-fields, non-dict, mixed-batch, and
29
+ corrupt-JSON cases. The previously-broken environment-dependent
30
+ `test_basic.py::test_ingest_relationship` now passes deterministically.
31
+
9
32
  ## [2.5.0] - 2026-04-25
10
33
 
11
34
  Compliance-driven minor release. Closes every CRITICAL and HIGH audit
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zettelforge
3
- Version: 2.5.0
3
+ Version: 2.5.1
4
4
  Summary: ZettelForge: Agentic Memory System with vector search, knowledge graph, and synthesis
5
5
  Project-URL: Homepage, https://github.com/rolandpg/zettelforge
6
6
  Project-URL: Documentation, https://docs.threatrecall.ai
@@ -0,0 +1,75 @@
1
+ # Security Policy
2
+
3
+ ## Reporting a Vulnerability
4
+
5
+ This is a solo-maintainer project. For security-related issues:
6
+ - Open a GitHub Security Advisory in the repository
7
+ - Tag with `security` label
8
+ - Expect acknowledgement within 48 hours
9
+
10
+ ## Supported Versions
11
+
12
+ | Version | Supported |
13
+ |---------|-----------|
14
+ | latest release | ✅ |
15
+ | master branch | ✅ (CI gates) |
16
+ | older releases | ❌ |
17
+
18
+ ## Supply Chain Security
19
+
20
+ This project implements:
21
+ - SHA-pinned GitHub Actions (all third-party actions pinned by commit SHA)
22
+ - PyPI trusted publishing (OIDC, no long-lived tokens)
23
+ - pip-audit on every CI run (HIGH/CRITICAL must pass)
24
+ - Dependabot for weekly dependency updates
25
+ - Snyk SAST scanning on every push/PR
26
+
27
+ ## Known Security Architecture
28
+
29
+ See [THREAT_MODEL.md](docs/THREAT_MODEL.md) for the complete STRIDE threat model.
30
+
31
+ ### Data at Rest
32
+
33
+ - Notes, the knowledge graph, and the entity index are stored in a local SQLite database (WAL mode) under the configured data directory. No encryption at rest is applied by ZettelForge itself -- encrypt the filesystem or volume at the OS level for sensitive deployments.
34
+ - LanceDB vector index files live alongside the SQLite database and carry the same recommendation.
35
+
36
+ ### PII Protection
37
+
38
+ - As of v2.5.0 (RFC-013), optional PII detection via Microsoft Presidio scans content before `remember()` storage. Three modes: log (discovery), redact (compliance), block (strict). Disabled by default. Requires `pip install zettelforge[pii]` to activate.
39
+ - Raw PII text is never written to structured logs. Only entity type and detection score are recorded.
40
+
41
+ ### LLM Provider Security
42
+
43
+ - Four providers: `local` (in-process, no network), `ollama` (localhost HTTP), `litellm` (cloud APIs), `mock` (testing). Each is configurable via `llm.provider` in config.yaml.
44
+ - `local` provider is fully offline. `ollama` runs on localhost only. `litellm` makes outbound HTTPS calls to configured cloud APIs.
45
+ - API keys use `${ENV_VAR}` resolution -- never committed to YAML. Redacted from all log output via `LLMConfig.__repr__`.
46
+ - Provider timeout is configurable (default 60s). LiteLLM provider supports configurable retry count.
47
+
48
+ ### Injection Defenses
49
+
50
+ - As of v2.1.1, all LanceDB query expressions are parameterized. String-interpolated queries were present in v2.1.0 and earlier (see CVE advisory, if issued, or CHANGELOG v2.1.1 P0-3).
51
+
52
+ ### File Locking
53
+
54
+ - As of v2.1.1, all JSONL and entity index write paths use `fcntl.flock()` exclusive locks to prevent concurrent-write corruption.
55
+
56
+ ### Audit Logging
57
+
58
+ - All security-relevant operations emit OCSF v1.3 structured events via `structlog`. Authorization decisions, API activity, and file activity are auditable in any SIEM that ingests JSON logs.
59
+
60
+ ### Air-Gap Deployments
61
+
62
+ - ZettelForge supports fully offline operation (fastembed ONNX + llama-cpp-python). No telemetry or external calls are made in this configuration.
63
+
64
+ ## Disclosure Policy
65
+
66
+ ZettelForge follows a coordinated disclosure model:
67
+
68
+ 1. Reporter submits vulnerability privately via email.
69
+ 2. We acknowledge within 48 hours and begin assessment.
70
+ 3. We develop and test a fix on a private branch.
71
+ 4. We notify the reporter when a fix is ready and agree on a disclosure date.
72
+ 5. We release the fix and publish a security advisory simultaneously.
73
+ 6. We credit the reporter in the advisory (unless they opt out).
74
+
75
+ We ask reporters to give us a reasonable time to fix issues before public disclosure. We will not take legal action against good-faith security researchers who follow this policy.
@@ -0,0 +1,248 @@
1
+ # ZettelForge Threat Model
2
+
3
+ > **Document ID:** THREAT-001
4
+ > **Classification:** Internal (Tier 2)
5
+ > **Last Updated:** 2026-04-25
6
+ > **Framework:** STRIDE (GOV-011 SSDL Requirement)
7
+ > **Scope:** Community Edition v2.5.x (MIT-licensed codebase)
8
+ > **Compliance Mapping:** FedRAMP SA-3, SA-8, SA-11, SA-15; NIST 800-171 3.11, 3.13, 3.14
9
+
10
+ ## 1. System Overview
11
+
12
+ ### 1.1 High-Level Architecture
13
+
14
+ ZettelForge is an agentic memory system for cyber threat intelligence (CTI). It ingests unstructured text (threat reports, analyst notes, agent observations) through `remember()`, stores it in a hybrid SQLite + LanceDB backend, and retrieves it via `recall()` and `synthesize()` with intent-classified, policy-weighted blended retrieval.
15
+
16
+ ```
17
+ ┌─────────────────────────────┐
18
+ │ External Actors │
19
+ │ (Analyst / AI Agent / MCP) │
20
+ └─────────────┬───────────────┘
21
+
22
+ ┌─────────────▼───────────────┐
23
+ │ MemoryManager │
24
+ │ remember() / recall() │
25
+ │ synthesize() │
26
+ └─────┬───────────────┬───────┘
27
+ │ │
28
+ ┌────────────▼───┐ ┌──────▼────────────┐
29
+ │ Governance │ │ LLM Providers │
30
+ │ Validator │ │ (local/ollama/ │
31
+ │ (PII, rules) │ │ litellm/mock) │
32
+ └────────┬───────┘ └──────┬────────────┘
33
+ │ │
34
+ ┌────────▼───────┐ ┌──────▼────────────┐
35
+ │ SQLite + │ │ Enrichment Queue │
36
+ │ LanceDB │ │ (causal / LLM │
37
+ │ (notes, vec) │ │ NER extraction)│
38
+ └────────────────┘ └───────────────────┘
39
+ ```
40
+
41
+ ### 1.2 Trust Boundaries
42
+
43
+ | Boundary # | Description | Type |
44
+ |------------|-------------|------|
45
+ | TB-1 | External → API surface (MCP, REST, direct Python API) | External network / process |
46
+ | TB-2 | Python API → MemoryManager | Internal process |
47
+ | TB-3 | MemoryManager → SQLite / LanceDB filesystem | Local filesystem |
48
+ | TB-4 | LLM Provider → External API (litellm, ollama) | Outbound network |
49
+ | TB-5 | Enrichment worker → LLM (fact extraction, NER) | Internal process |
50
+ | TB-6 | Configuration loader → env vars / YAML files | Local filesystem |
51
+
52
+ ### 1.3 Data Flow Diagram
53
+
54
+ ```
55
+ [C2] Analyst/AI Agent
56
+
57
+ │ remember(content) / recall(query)
58
+
59
+ TB-1 ──────────────────────────────────────────────────┐
60
+ │ │
61
+ ▼ │
62
+ [P1] MemoryManager._remember_inner() │
63
+ │ │
64
+ │ content │
65
+ ▼ │
66
+ [P2] GovernanceValidator.validate_remember() │
67
+ │ ┌──────────────────┐ │
68
+ │ │ (Optional) PII │ TB-5 (lazy) │
69
+ │ │ Validator │──→ presidio-analyzer │
70
+ │ │ (log/redact/block)│ (in-process spaCy) │
71
+ │ └──────────────────┘ │
72
+ │ │
73
+ │ redacted content (or original) │
74
+ ▼ │
75
+ [P3] NoteConstructor → construct MetadataNote │
76
+ │ │
77
+ ├──→ [DS1] EntityIndexer → extract entities │
78
+ ├──→ [DS2] AliasResolver → resolve APT28/Fancy Bear │
79
+ ├──→ [DS3] SQLite DB (notes, KG, entity index) │
80
+ ├──→ [DS4] LanceDB (vector index, IVF_PQ 768-dim) │
81
+ │ │
82
+ └──→ Enrichment Queue (async) │
83
+ ├──→ [P4] LLM Causal Triple Extraction │
84
+ └──→ [P5] LLM NER (background) │
85
+
86
+ [S1] LLM Provider Dispatch │
87
+ ├──→ local (in-process llama-cpp-python / onnx) │
88
+ ├──→ ollama (HTTP to localhost:11434) TB-4 │
89
+ └──→ litellm (HTTP to cloud APIs) TB-4 │
90
+
91
+ [C1] Configuration Loader │
92
+ ├── config.yaml / config.default.yaml TB-6 │
93
+ ├── Environment variables (ZETTELFORGE_*) │
94
+ └── ${ENV_VAR} resolution for secrets │
95
+ ```
96
+
97
+ ---
98
+
99
+ ## 2. STRIDE Threat Analysis
100
+
101
+ ### 2.1 Spoofing
102
+
103
+ | ID | Threat | Component | Risk | Mitigation |
104
+ |----|--------|-----------|------|------------|
105
+ | S-01 | Attacker spoofs a valid MCP client to call `remember()` / `recall()` with malicious content | MCP Server / REST API (TB-1) | **High** — unauthorized memory access | MCP server relies on transport-level auth (stdio transport for local agents; TLS client certs or API tokens for remote). No built-in authentication in Community edition. Enterprise edition adds JWT/OAuth. |
106
+ | S-02 | Attacker spoofs an LLM provider endpoint (e.g., fake Ollama server) to return malicious model output | LLM Provider (TB-4, ollama/litellm) | **Medium** — model output is treated as data, not executable; but could inject false threat intelligence | No TLS verification for localhost endpoints (default ollama). litellm uses HTTPS for cloud APIs. Local deployments are responsible for network isolation. |
107
+ | S-03 | Attacker spoofs configuration file to inject malicious settings | Config Loader (TB-6) | **High** — could set `provider: litellm` with attacker-controlled API key or disable governance | Config files are local filesystem; `config.yaml` is in `.gitignore` to prevent accidental commits. No integrity verification on config files. |
108
+
109
+ ### 2.2 Tampering
110
+
111
+ | ID | Threat | Component | Risk | Mitigation |
112
+ |----|--------|-----------|------|------------|
113
+ | T-01 | Attacker modifies SQLite database or LanceDB index files on disk | Storage (TB-3) | **Critical** — persistent memory corruption | SQLite WAL mode with no built-in integrity check on reads. No HMAC or signature on stored notes. Mitigation relies on OS-level filesystem permissions. Encrypt filesystem at OS level for sensitive deployments (noted in SECURITY.md). |
114
+ | T-02 | Attacker modifies config.yaml in-place to change LLM provider, disable PII validation, or alter governance settings | Config Loader (TB-6) | **High** — silent security downgrade | Config files are local. `config.default.yaml` is tracked in git. `config.yaml` is user-owned. No integrity verification. |
115
+ | T-03 | Attacker tampers with enrichment queue data in memory | Enrichment Queue (P4/P5) | **Low** — in-process queue, not network-accessible | The queue is an in-memory Python `queue.Queue` with `maxsize=500`. No external access path. |
116
+ | T-04 | Attacker modifies a note's embedding to bias recall results | LanceDB (DS4) | **Medium** — retrieval poisoning | LanceDB stores vectors as parquet files. OS-level file permissions are the only protection. |
117
+
118
+ ### 2.3 Repudiation
119
+
120
+ | ID | Threat | Component | Risk | Mitigation |
121
+ |----|--------|-----------|------|------------|
122
+ | R-01 | Attacker performs operations (remember, recall, synthesize) without audit trail | MemoryManager | **High** — compliance failure for FedRAMP AU-2/AU-3 | All operations emit OCSF structured events via `log_api_activity()` / `log_authorization()`. OCSF class 1001 (API Activity) and 3001/3003 (Authorization) are emitted for every operation. Events include `request_id`, `actor`, `resource`, `status_id`. |
123
+ | R-02 | Governance violation occurs without attribution | GovernanceValidator | **Medium** — violation logged but no actor identity | `log_authorization()` records `actor="system"` for automatic calls. MCP and REST API paths should include authenticated actor. Currently Community edition uses hardcoded `"system"` actor. |
124
+ | R-03 | PII detection events without traceability | PIIValidator (RFC-013) | **Medium** — compliance requirement for data protection | `pii_detected` structured log event includes count, action, entity types, and scores. No raw PII text is logged (fixed in commit 5ac162c). |
125
+
126
+ ### 2.4 Information Disclosure
127
+
128
+ | ID | Threat | Component | Risk | Mitigation |
129
+ |----|--------|-----------|------|------------|
130
+ | I-01 | Stored threat intelligence (notes, entities, IOCs) leaked via filesystem access | SQLite / LanceDB (DS3/DS4) | **Critical** — all CTI data exposed | No encryption at rest in Community edition. SQLite WAL files and LanceDB parquet files contain plaintext. **Mitigation:** encrypt filesystem at OS level. Enterprise edition adds optional SQLite encryption. |
131
+ | I-02 | PII stored in notes leaks through recall/synthesize responses | Storage → Retrieval | **High** — PII compliance | RFC-013 PIIValidator with `action=redact` strips PII before storage. `action=block` prevents storage entirely. Disabled by default — user must opt in. |
132
+ | I-03 | API keys logged in structured logs | LLM Provider / Config Loader | **Critical** — credential exposure | `LLMConfig.__repr__` redacts `api_key` as `'***'`. `extra` dict fields matching sensitive key patterns (`key`, `token`, `secret`, `password`, `credential`, `auth`) are also redacted. Config resolution uses `${ENV_VAR}` references so raw keys never appear in YAML. |
133
+ | I-04 | Error messages leak internal paths, configuration, or stack traces | All components | **Medium** — information gathering | No global exception handler catches and sanitizes errors. structlog can redact PII from log messages if configured. |
134
+ | I-05 | Raw PII text previously logged in structured events | PIIValidator (fixed) | **Medium** — historical exposure | Fixed in 5ac162c: PII text removed from log entities. Only entity type and score are logged. Users on prior commits should rotate logs containing PII. |
135
+
136
+ ### 2.5 Denial of Service
137
+
138
+ | ID | Threat | Component | Risk | Mitigation |
139
+ |----|--------|-----------|------|------------|
140
+ | D-01 | Large content in `remember()` exhausts memory or blocks the enrichment queue | MemoryManager (P1) | **Medium** — degraded performance | `remember_report()` chunks long documents. No explicit size limit on `remember()` content. Enrichment queue has `maxsize=500` backpressure. |
141
+ | D-02 | LLM provider (ollama, litellm) hangs and blocks `remember()` | LLM Provider (TB-4) | **High** — operation blocks | OllamaProvider has timeout (RFC-010, default 60s). LitellmProvider has timeout + num_retries. `generate()` returns empty string on recoverable failure. Fallback provider (e.g., local -> ollama) gives alternative path. |
142
+ | D-03 | Malicious query triggers deep graph traversal exhausting time/resources | BlendedRetriever | **Medium** — slow recall | `max_graph_depth` config (default 2) limits BFS hops. `default_k` (default 10) limits results. No timeout on recall queries. |
143
+ | D-04 | spaCy model download blocks first `remember()` when PII is enabled | PIIValidator (lazy load) | **Low** — delayed first call (~2-3 seconds) | One-time download cost. Matching fastembed pattern. Can be pre-downloaded for air-gapped deployments. |
144
+
145
+ ### 2.6 Elevation of Privilege
146
+
147
+ | ID | Threat | Component | Risk | Mitigation |
148
+ |----|--------|-----------|------|------------|
149
+ | E-01 | MCP client accesses notes from a different domain/tenant than authorized | MemoryManager / MCP Server | **High** — cross-tenant data access | No domain-level access control in Community edition. Enterprise edition adds multi_tenant config. Domain is a metadata field on notes, not an access control boundary. |
150
+ | E-02 | Attacker bypasses governance validation (PII, rules) by calling storage backend directly | Direct filesystem / SQLite access | **Critical** — all governance controls bypassed | Governance runs in-memory in `_remember_inner()`. Direct SQLite or LanceDB access bypasses it entirely. Mitigation: OS-level filesystem permissions. |
151
+ | E-03 | Config change elevates provider from mocked/local to cloud API without user knowledge | Config Loader | **Medium** — unexpected outbound calls | No change of config is signed or validated. User is responsible for config integrity. |
152
+
153
+ ---
154
+
155
+ ## 3. Risk Summary
156
+
157
+ | Risk Level | Count | Key Concerns |
158
+ |------------|-------|--------------|
159
+ | **Critical** | 2 | T-01 (storage tampering), I-01 (unencrypted data at rest), E-02 (governance bypass via filesystem) |
160
+ | **High** | 7 | S-01 (spoofed MCP client), S-03 (config tampering), T-02 (config security downgrade), R-01 (repudiation without audit), I-02 (PII in stored notes), D-02 (LLM provider hang), E-01 (cross-tenant data access) |
161
+ | **Medium** | 9 | S-02 (fake LLM provider), T-04 (retrieval poisoning), R-02, R-03, I-04 (error message leakage), D-01, D-03, E-03 |
162
+ | **Low** | 1 | D-04 (PII model download delay) |
163
+
164
+ ### Top 5 Mitigations (Priority Order)
165
+
166
+ 1. **Encryption at rest** — Encrypt the data directory filesystem (OS-level LUKS, BitLocker, or eCryptfs). ZettelForge does not apply at-rest encryption itself.
167
+ 2. **Filesystem permissions** — Restrict access to `~/.amem/` to the ZettelForge process user only. Prevents governance bypass (E-02) and storage tampering (T-01).
168
+ 3. **Network isolation** — Run Ollama and ZettelForge on a dedicated VLAN or firewall zone. Prevent unauthorized MCP clients (S-01) and fake provider attacks (S-02).
169
+ 4. **Enable PII redaction** — Set `governance.pii.enabled: true` and `action: redact` in production. Prevents PII persistence (I-02).
170
+ 5. **Audit log retention** — Ensure OCSF logs are shipped to a SIEM (via structlog JSON output). Satisfies FedRAMP AU-2/AU-3 (R-01).
171
+
172
+ ---
173
+
174
+ ## 4. Mitigation Details
175
+
176
+ ### 4.1 Existing Controls
177
+
178
+ | Control | Threat(s) | Mechanism | Verification |
179
+ |---------|-----------|-----------|--------------|
180
+ | OCSF audit logging | R-01, R-02 | `log_api_activity()`, `log_authorization()` emitted on every operation | CI test coverage, structlog configuration |
181
+ | API key redaction | I-03 | `LLMConfig.__repr__` redacts api_key and sensitive extra keys | Unit tests in `test_llm_providers.py` |
182
+ | PII detection + redaction | I-02 | PIIValidator (RFC-013): log/redact/block | Unit tests in `test_pii_validator.py` |
183
+ | LLM provider timeout | D-02 | `OllamaProvider` timeout=60s, `LiteLLMProvider` timeout + num_retries | Unit tests (RFC-010, RFC-012) |
184
+ | Config env-var resolution | I-03 | `${ENV_VAR}` syntax prevents raw secrets in YAML | Unit tests |
185
+ | Configurable model provider | S-02, E-03 | `provider` key selects backend; no implicit unauthenticated outbound calls | Config validation |
186
+ | Enrichment queue backpressure | D-01 | `maxsize=500` bounded queue | Code review |
187
+
188
+ ### 4.2 Recommended Additions (Not Yet Implemented)
189
+
190
+ | Recommendation | Threat(s) | Effort | Priority |
191
+ |---------------|-----------|--------|----------|
192
+ | Add content size limit to `remember()` | D-01 | Small | P3 |
193
+ | Add global exception handler that sanitizes error output | I-04 | Medium | P2 |
194
+ | Add TLS verification option for self-hosted LLM endpoints | S-02 | Small | P2 |
195
+ | Add config file integrity check (SHA-256 of default vs. loaded) | T-02, S-03 | Medium | P3 |
196
+ | Add recall timeout (configurable, default 30s) | D-03 | Medium | P3 |
197
+ | Domain-level access control for multi-tenant | E-01 | Large | Enterprise |
198
+
199
+ ---
200
+
201
+ ## 5. Threat Model Maintenance
202
+
203
+ | Activity | Frequency | Owner | Evidence |
204
+ |----------|-----------|-------|----------|
205
+ | Threat model review | Per quarter or per significant feature | CTO/CIO | Updated THREAT_MODEL.md |
206
+ | STRIDE assessment for new components | Per RFC (GOV-016 requirement) | RFC Author | Threats section in RFC |
207
+ | SAST scan | Every PR (CI) | Automated | CI pipeline logs |
208
+ | SCA scan | Every PR + daily scheduled | Automated | pip-audit, Snyk reports |
209
+ | Secret scan | Every PR (CI) | Automated | GitGuardian |
210
+ | Dependency vulnerability review | Per advisory (GOV-009 timelines) | Maintainer | GitHub Dependabot, Snyk |
211
+
212
+ ---
213
+
214
+ ## 6. Data Classification Mapping
215
+
216
+ Per GOV-021, the following data types exist in the system:
217
+
218
+ | Data | Classification | Storage | Handling |
219
+ |------|---------------|---------|----------|
220
+ | Threat intelligence notes (actor TTPs, IOCs, campaigns) | Internal (Tier 2) | SQLite + LanceDB, no encryption at rest | OS-level filesystem encryption recommended |
221
+ | PII (names, emails, phones — if not redacted) | Confidential (Tier 3) | SQLite (if PII passes through without redaction) | **Must** enable PII redaction (RFC-013) |
222
+ | API keys / credentials | Confidential (Tier 3) | Never committed; env vars only | Redacted from logs, resolved at runtime |
223
+ | Audit logs (OCSF events) | Internal (Tier 2) | Structured logs (GOV-012) | Logs must not contain Tier 3/4 data values |
224
+ | Configuration files | Internal (Tier 2) | config.yaml, config.default.yaml | `.gitignore` excludes user config; no secrets in YAML |
225
+ | Embedding vectors | Internal (Tier 2) | LanceDB parquet files | Derived from notes; same classification as source |
226
+ | CUI (federal contract data) | CUI (Tier 4) | **Not handled** in Community edition | Enterprise edition only, after FedRAMP authorization |
227
+
228
+ ---
229
+
230
+ ## 7. Recent Changes Affecting Threat Model
231
+
232
+ | Change | RFC/PR | Date | Threat Model Impact |
233
+ |--------|--------|------|---------------------|
234
+ | PII detection and redaction | RFC-013 (PR #118) | 2026-04-25 | New control for I-02; new attack surface (D-04); PII text logging fixed |
235
+ | LiteLLM unified provider | RFC-012 (PR #108) | 2026-04-25 | New provider for I-03 (API keys); new outbound traffic pattern (TB-4) |
236
+ | Local LLM backend selection | RFC-011 (PR #104) | 2026-04-25 | No new threat surface — extends existing local provider |
237
+ | Ollama provider timeout | RFC-010 | 2026-04-24 | Mitigation for D-02 |
238
+ | LLM provider registry | RFC-002 | 2026-04-16 | Foundation for S-02, E-03 via provider selection |
239
+ | SQLite backend default | v2.2.0 | 2026-04-14 | Migration path changes attack surface of legacy JSONL |
240
+ | Injection defenses | v2.1.1 | 2026-04-10 | Fixed parameterized queries (was: P0 SQL injection — see CHANGELOG) |
241
+
242
+ ---
243
+
244
+ ## 8. Threat Model Review Log
245
+
246
+ | Date | Reviewer | Changes | Next Review |
247
+ |------|----------|---------|-------------|
248
+ | 2026-04-25 | Hermes Agent (automated) | Initial threat model creation per GOV-011 | 2026-07-25 |
@@ -60,15 +60,23 @@ controls:
60
60
  - id: input_validation
61
61
  description: "Content must be str or have .content attribute"
62
62
  runtime_method: "GovernanceValidator.validate_operation"
63
+ - id: threat_model
64
+ description: "STRIDE threat model maintained and reviewed quarterly per GOV-011 SSDL"
65
+ artifact: "docs/THREAT_MODEL.md"
66
+ review_frequency: "quarterly"
67
+ last_reviewed: "2026-04-25"
68
+ test: "docs/THREAT_MODEL.md"
69
+ # test field satisfies the spec-drift validator's requirement that
70
+ # runtime-enforced rules have a test or runtime_method reference.
63
71
  # The 2026-04-25 compliance audit (C-2) found that a previously-declared
64
72
  # `no_hardcoded_secrets` rule pointed at GovernanceValidator.validate_operation
65
73
  # as its runtime_method, but that method contains no secret-detection
66
74
  # logic. Honest state: NOT IMPLEMENTED at runtime today. Static
67
75
  # enforcement is provided by GitGuardian (CI) and (once GOV-003-mandated
68
- # `S` rules are restored to ruff config audit H-1) Bandit S105/S106/S108.
76
+ # `S` rules are restored to ruff config -- audit H-1) Bandit S105/S106/S108.
69
77
  # Runtime detector (regex + entropy + detect-secrets) is tracked as
70
78
  # follow-up work; the rule will be re-declared here when implemented.
71
- # Removed rather than left fabricated see tasks/compliance-audit-2026-04-25.md.
79
+ # Removed rather than left fabricated -- see tasks/compliance-audit-2026-04-25.md.
72
80
 
73
81
  GOV-012:
74
82
  name: Audit Logging
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "zettelforge"
7
- version = "2.5.0"
7
+ version = "2.5.1"
8
8
  description = "ZettelForge: Agentic Memory System with vector search, knowledge graph, and synthesis"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -57,7 +57,7 @@ from zettelforge.vector_retriever import VectorRetriever
57
57
  # importable for advanced use but are not part of the advertised public API
58
58
  # and are therefore excluded from __all__ below.
59
59
 
60
- __version__ = "2.4.3"
60
+ __version__ = "2.5.1"
61
61
  __all__ = [
62
62
  # Ontology reference tables (TypedEntityStore / OntologyValidator are
63
63
  # importable from zettelforge.ontology but are not part of the public API
@@ -22,6 +22,44 @@ from collections import deque
22
22
  from datetime import datetime
23
23
  from pathlib import Path
24
24
 
25
+ from zettelforge.log import get_logger
26
+
27
+ _logger = get_logger("zettelforge.knowledge_graph")
28
+
29
+
30
+ # Pre-v2.5.1 writers (now removed from the codebase, but persisted on disk
31
+ # in older deployments) used {source_id, target_id, relation_type} instead of
32
+ # {from_node_id, to_node_id, relationship}. _normalize_edge_schema() rewrites
33
+ # legacy entries on load so both shapes are tolerated. Missing edge_id is
34
+ # treated as terminal — we cannot index without one.
35
+ _LEGACY_EDGE_KEY_MAP = {
36
+ "source_id": "from_node_id",
37
+ "target_id": "to_node_id",
38
+ "relation_type": "relationship",
39
+ }
40
+
41
+
42
+ def _normalize_edge_schema(edge: dict) -> dict | None:
43
+ """Return a copy of ``edge`` with legacy keys remapped, or ``None`` if
44
+ the entry is missing fields the cache requires.
45
+
46
+ Idempotent: edges already in the canonical shape pass through unchanged.
47
+
48
+ ``relationship`` is required because downstream code (``add_edge`` dedup
49
+ scan, ``get_neighbors``, traversal) does direct subscripting on it; a
50
+ legacy row without ``relation_type`` would otherwise survive load and
51
+ trigger a deferred KeyError on first read.
52
+ """
53
+ if not isinstance(edge, dict) or not edge.get("edge_id"):
54
+ return None
55
+ out = dict(edge)
56
+ for legacy, canonical in _LEGACY_EDGE_KEY_MAP.items():
57
+ if canonical not in out and legacy in out:
58
+ out[canonical] = out[legacy]
59
+ if "from_node_id" not in out or "to_node_id" not in out or "relationship" not in out:
60
+ return None
61
+ return out
62
+
25
63
 
26
64
  class KnowledgeGraph:
27
65
  """
@@ -64,20 +102,39 @@ class KnowledgeGraph:
64
102
  continue
65
103
 
66
104
  if self.edges_file.exists():
105
+ skipped_malformed = 0
67
106
  with open(self.edges_file) as f:
68
107
  for line in f:
69
- if line.strip():
70
- try:
71
- edge = json.loads(line)
72
- self._cache_edge(edge)
73
- # Index temporal edges
74
- if (
75
- edge.get("relationship", "").startswith("TEMPORAL_")
76
- or edge.get("relationship") == "SUPERSEDES"
77
- ):
78
- self._index_temporal_edge(edge)
79
- except json.JSONDecodeError:
80
- continue
108
+ if not line.strip():
109
+ continue
110
+ try:
111
+ edge = json.loads(line)
112
+ except json.JSONDecodeError:
113
+ skipped_malformed += 1
114
+ continue
115
+ edge = _normalize_edge_schema(edge)
116
+ if edge is None:
117
+ skipped_malformed += 1
118
+ continue
119
+ self._cache_edge(edge)
120
+ # Index temporal edges
121
+ if (
122
+ edge.get("relationship", "").startswith("TEMPORAL_")
123
+ or edge.get("relationship") == "SUPERSEDES"
124
+ ):
125
+ self._index_temporal_edge(edge)
126
+ if skipped_malformed:
127
+ # Pre-v2.5.1 deployments wrote edges under both
128
+ # {from_node_id, to_node_id, relationship} and
129
+ # {source_id, target_id, relation_type}; the loader now
130
+ # normalizes the latter to the former. Anything still
131
+ # un-normalizable is silently dropped here. Logged at
132
+ # warning so operators can see the count without crashing.
133
+ _logger.warning(
134
+ "kg_edges_skipped_malformed",
135
+ count=skipped_malformed,
136
+ file=str(self.edges_file),
137
+ )
81
138
 
82
139
  def _cache_node(self, node: dict):
83
140
  self._nodes[node["node_id"]] = node
@@ -0,0 +1,135 @@
1
+ """Regression test for v2.5.1 hotfix: KnowledgeGraph._cache_edge crashed
2
+ with KeyError on legacy edges that used {source_id, target_id, relation_type}
3
+ instead of {from_node_id, to_node_id, relationship}.
4
+
5
+ Tickled in production by long-running deployments where pre-v2.5.x writers
6
+ left ~80k+ legacy entries in kg_edges.jsonl alongside canonical-shape rows.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ import tempfile
13
+ from pathlib import Path
14
+
15
+ from zettelforge.knowledge_graph import KnowledgeGraph, _normalize_edge_schema
16
+
17
+
18
+ def test_normalize_edge_schema_passes_canonical_shape_through():
19
+ edge = {
20
+ "edge_id": "edge_1",
21
+ "from_node_id": "node_a",
22
+ "to_node_id": "node_b",
23
+ "relationship": "MENTIONED_IN",
24
+ }
25
+ normalized = _normalize_edge_schema(edge)
26
+ assert normalized == edge
27
+
28
+
29
+ def test_normalize_edge_schema_remaps_legacy_keys():
30
+ legacy = {
31
+ "edge_id": "edge_2",
32
+ "source_id": "node_a",
33
+ "target_id": "node_b",
34
+ "relation_type": "MENTIONED_IN",
35
+ }
36
+ normalized = _normalize_edge_schema(legacy)
37
+ assert normalized is not None
38
+ assert normalized["from_node_id"] == "node_a"
39
+ assert normalized["to_node_id"] == "node_b"
40
+ assert normalized["relationship"] == "MENTIONED_IN"
41
+ # Legacy keys preserved alongside the canonical ones (we don't drop them
42
+ # on load — write path will overwrite if needed).
43
+ assert normalized["source_id"] == "node_a"
44
+
45
+
46
+ def test_normalize_edge_schema_returns_none_when_unrecoverable():
47
+ # Missing both legacy and canonical id keys — cannot cache without them.
48
+ assert _normalize_edge_schema({"edge_id": "edge_3"}) is None
49
+ # Missing edge_id — cannot index even if we had the rest.
50
+ assert (
51
+ _normalize_edge_schema({"from_node_id": "a", "to_node_id": "b", "relationship": "R"})
52
+ is None
53
+ )
54
+ # Missing relationship (and no legacy relation_type to remap from).
55
+ # Downstream code does direct subscripting on edge["relationship"], so
56
+ # entries without it must be rejected at load time, not deferred.
57
+ assert (
58
+ _normalize_edge_schema(
59
+ {"edge_id": "edge_4", "from_node_id": "a", "to_node_id": "b"}
60
+ )
61
+ is None
62
+ )
63
+
64
+
65
+ def test_normalize_edge_schema_handles_non_dict():
66
+ assert _normalize_edge_schema("not a dict") is None
67
+ assert _normalize_edge_schema(None) is None
68
+
69
+
70
+ def test_load_all_tolerates_mixed_schema_kg_edges():
71
+ """KnowledgeGraph._load_all() must not crash when kg_edges.jsonl contains
72
+ both canonical and legacy edge entries."""
73
+ with tempfile.TemporaryDirectory() as tmp:
74
+ data_dir = Path(tmp)
75
+ # Two canonical edges + one legacy + one totally broken.
76
+ edges = [
77
+ {
78
+ "edge_id": "edge_1",
79
+ "from_node_id": "node_a",
80
+ "to_node_id": "node_b",
81
+ "relationship": "MENTIONED_IN",
82
+ "properties": {},
83
+ },
84
+ {
85
+ "edge_id": "edge_2",
86
+ "source_id": "node_c",
87
+ "target_id": "node_d",
88
+ "relation_type": "MENTIONED_IN",
89
+ "properties": {},
90
+ },
91
+ {"edge_id": "edge_3"}, # malformed — no nodes
92
+ {
93
+ "edge_id": "edge_4",
94
+ "from_node_id": "node_e",
95
+ "to_node_id": "node_f",
96
+ "relationship": "TEMPORAL_BEFORE",
97
+ "properties": {"timestamp": "2026-04-25T00:00:00Z"},
98
+ },
99
+ ]
100
+ edges_file = data_dir / "kg_edges.jsonl"
101
+ with open(edges_file, "w") as f:
102
+ for edge in edges:
103
+ f.write(json.dumps(edge) + "\n")
104
+
105
+ # Even with a broken entry mixed in, construction must succeed and the
106
+ # cache must contain the three salvageable edges.
107
+ kg = KnowledgeGraph(data_dir=str(data_dir))
108
+ assert "edge_1" in kg._edges
109
+ assert "edge_2" in kg._edges # legacy schema normalized in
110
+ assert "edge_4" in kg._edges
111
+ assert "edge_3" not in kg._edges # dropped — no node ids
112
+
113
+ # The legacy entry's normalized from_node_id wires into the index.
114
+ assert "node_c" in kg._edges_from
115
+ assert "node_d" in kg._edges_to
116
+
117
+
118
+ def test_load_all_skips_corrupt_json_lines():
119
+ """Pre-existing tolerance for malformed JSON should still hold."""
120
+ with tempfile.TemporaryDirectory() as tmp:
121
+ data_dir = Path(tmp)
122
+ edges_file = data_dir / "kg_edges.jsonl"
123
+ with open(edges_file, "w") as f:
124
+ f.write(
125
+ '{"edge_id": "ok", "from_node_id": "a", "to_node_id": "b", "relationship": "R"}\n'
126
+ )
127
+ f.write("{not valid json\n")
128
+ f.write(
129
+ '{"edge_id": "ok2", "from_node_id": "c", "to_node_id": "d", "relationship": "R"}\n'
130
+ )
131
+
132
+ kg = KnowledgeGraph(data_dir=str(data_dir))
133
+ assert "ok" in kg._edges
134
+ assert "ok2" in kg._edges
135
+ assert len(kg._edges) == 2