PyPI - stixdb-engine - Versions diffs - 0.1.0__tar.gz - Mend

stixdb-engine 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

stixdb_engine-0.1.0/.env.example +91 -0
stixdb_engine-0.1.0/.gitattributes +2 -0
stixdb_engine-0.1.0/.gitignore +71 -0
stixdb_engine-0.1.0/CHANGELOG.md +49 -0
stixdb_engine-0.1.0/CONTRIBUTING.md +123 -0
stixdb_engine-0.1.0/COOKBOOKS_SUMMARY.md +528 -0
stixdb_engine-0.1.0/Dockerfile +38 -0
stixdb_engine-0.1.0/LICENSE +21 -0
stixdb_engine-0.1.0/PKG-INFO +535 -0
stixdb_engine-0.1.0/PRODUCTION.md +563 -0
stixdb_engine-0.1.0/QUICKSTART.md +471 -0
stixdb_engine-0.1.0/README.md +485 -0
stixdb_engine-0.1.0/SECURITY.md +44 -0
stixdb_engine-0.1.0/SKILLS_SUMMARY.md +221 -0
stixdb_engine-0.1.0/cookbooks/INDEX.md +362 -0
stixdb_engine-0.1.0/cookbooks/core-sdk/01_basic_store_retrieve.py +140 -0
stixdb_engine-0.1.0/cookbooks/core-sdk/02_agent_tuning.py +131 -0
stixdb_engine-0.1.0/cookbooks/core-sdk/03_local_vs_server.py +323 -0
stixdb_engine-0.1.0/cookbooks/custom-embeddings/README.md +339 -0
stixdb_engine-0.1.0/cookbooks/custom-embeddings/domain_specialized_embeddings.py +281 -0
stixdb_engine-0.1.0/cookbooks/custom-embeddings/hybrid_search_strategy.py +244 -0
stixdb_engine-0.1.0/cookbooks/custom-embeddings/openai_embeddings.py +200 -0
stixdb_engine-0.1.0/cookbooks/custom-embeddings/privacy_first_local_embeddings.py +248 -0
stixdb_engine-0.1.0/cookbooks/custom-llm/README.md +306 -0
stixdb_engine-0.1.0/cookbooks/custom-llm/anthropic.py +141 -0
stixdb_engine-0.1.0/cookbooks/custom-llm/multi_model_routing.py +250 -0
stixdb_engine-0.1.0/cookbooks/custom-llm/ollama_local.py +153 -0
stixdb_engine-0.1.0/cookbooks/custom-llm/openai_gpt4o.py +169 -0
stixdb_engine-0.1.0/cookbooks/custom-llm/privacy_first_local_llm.py +189 -0
stixdb_engine-0.1.0/cookbooks/langchain/rag_pipeline.py +167 -0
stixdb_engine-0.1.0/cookbooks/langchain/stixdb_retriever.py +192 -0
stixdb_engine-0.1.0/cookbooks/multi-agent/concurrent_agents.py +167 -0
stixdb_engine-0.1.0/cookbooks/openai-compatible/with_openai_sdk.py +223 -0
stixdb_engine-0.1.0/cookbooks/rest-api/curl_examples.sh +170 -0
stixdb_engine-0.1.0/cookbooks/rest-api/local_to_server.md +245 -0
stixdb_engine-0.1.0/doc/ARCHITECTURE_DIAGRAMS.md +97 -0
stixdb_engine-0.1.0/doc/README.md +64 -0
stixdb_engine-0.1.0/doc/STIXDB_COMPREHENSIVE_GUIDE.md +141 -0
stixdb_engine-0.1.0/doc/architecture/00-project-guide.md +686 -0
stixdb_engine-0.1.0/doc/architecture/10-app-overview.md +143 -0
stixdb_engine-0.1.0/doc/architecture/11-system-architecture.md +261 -0
stixdb_engine-0.1.0/doc/architecture/12-repo-organization.md +214 -0
stixdb_engine-0.1.0/doc/architecture/13-openai-compatibility.md +156 -0
stixdb_engine-0.1.0/doc/architecture/14-search-api.md +186 -0
stixdb_engine-0.1.0/doc/architecture/15-sdk-usage.md +180 -0
stixdb_engine-0.1.0/doc/performance/01-streaming-overview.md +119 -0
stixdb_engine-0.1.0/doc/performance/02-retrieval-latency-fix.md +125 -0
stixdb_engine-0.1.0/doc/performance/03-verbose-progress-mode.md +106 -0
stixdb_engine-0.1.0/doc/performance/04-benchmarking-guide.md +179 -0
stixdb_engine-0.1.0/docker-compose.yml +127 -0
stixdb_engine-0.1.0/pyproject.toml +79 -0
stixdb_engine-0.1.0/sdk/LICENSE +21 -0
stixdb_engine-0.1.0/sdk/README.md +408 -0
stixdb_engine-0.1.0/sdk/examples/async_usage.py +26 -0
stixdb_engine-0.1.0/sdk/examples/health_check.py +17 -0
stixdb_engine-0.1.0/sdk/examples/ingest_folder_openai_chat.py +49 -0
stixdb_engine-0.1.0/sdk/examples/query_ask.py +25 -0
stixdb_engine-0.1.0/sdk/examples/store_and_search.py +38 -0
stixdb_engine-0.1.0/sdk/pyproject.toml +70 -0
stixdb_engine-0.1.0/sdk/skills/SKILL.md +732 -0
stixdb_engine-0.1.0/sdk/skills/sdk-memory-layer/README.md +16 -0
stixdb_engine-0.1.0/sdk/skills/sdk-memory-layer/SKILL.md +732 -0
stixdb_engine-0.1.0/sdk/skills/sdk-memory-layer/evals/evals.json +30 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/__init__.py +23 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/base.py +16 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/client.py +92 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/memory.py +309 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/py.typed +0 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/query.py +96 -0
stixdb_engine-0.1.0/sdk/src/stixdb_sdk/search.py +90 -0
stixdb_engine-0.1.0/skills/NAMING_CONVENTIONS.md +209 -0
stixdb_engine-0.1.0/skills/SKILL.md +905 -0
stixdb_engine-0.1.0/start-local.ps1 +80 -0
stixdb_engine-0.1.0/start-local.sh +77 -0
stixdb_engine-0.1.0/stixdb/__init__.py +15 -0
stixdb_engine-0.1.0/stixdb/agent/__init__.py +17 -0
stixdb_engine-0.1.0/stixdb/agent/consolidator.py +336 -0
stixdb_engine-0.1.0/stixdb/agent/maintenance.py +443 -0
stixdb_engine-0.1.0/stixdb/agent/memory_agent.py +84 -0
stixdb_engine-0.1.0/stixdb/agent/planner.py +163 -0
stixdb_engine-0.1.0/stixdb/agent/reasoner.py +817 -0
stixdb_engine-0.1.0/stixdb/agent/sessions.py +67 -0
stixdb_engine-0.1.0/stixdb/agent/worker.py +205 -0
stixdb_engine-0.1.0/stixdb/api/__init__.py +1 -0
stixdb_engine-0.1.0/stixdb/api/routes/__init__.py +1 -0
stixdb_engine-0.1.0/stixdb/api/routes/agent.py +56 -0
stixdb_engine-0.1.0/stixdb/api/routes/collections.py +251 -0
stixdb_engine-0.1.0/stixdb/api/routes/openai.py +278 -0
stixdb_engine-0.1.0/stixdb/api/routes/query.py +76 -0
stixdb_engine-0.1.0/stixdb/api/routes/search.py +420 -0
stixdb_engine-0.1.0/stixdb/api/server.py +137 -0
stixdb_engine-0.1.0/stixdb/backup/__init__.py +3 -0
stixdb_engine-0.1.0/stixdb/backup/minio_store.py +61 -0
stixdb_engine-0.1.0/stixdb/cli.py +90 -0
stixdb_engine-0.1.0/stixdb/config.py +252 -0
stixdb_engine-0.1.0/stixdb/context/__init__.py +5 -0
stixdb_engine-0.1.0/stixdb/context/broker.py +233 -0
stixdb_engine-0.1.0/stixdb/context/response.py +80 -0
stixdb_engine-0.1.0/stixdb/engine.py +1264 -0
stixdb_engine-0.1.0/stixdb/graph/__init__.py +12 -0
stixdb_engine-0.1.0/stixdb/graph/cluster.py +85 -0
stixdb_engine-0.1.0/stixdb/graph/edge.py +104 -0
stixdb_engine-0.1.0/stixdb/graph/memory_graph.py +434 -0
stixdb_engine-0.1.0/stixdb/graph/node.py +147 -0
stixdb_engine-0.1.0/stixdb/ingestion/__init__.py +13 -0
stixdb_engine-0.1.0/stixdb/ingestion/documents.py +134 -0
stixdb_engine-0.1.0/stixdb/observability/__init__.py +3 -0
stixdb_engine-0.1.0/stixdb/observability/tracer.py +268 -0
stixdb_engine-0.1.0/stixdb/skills/engine-memory-layer/README.md +17 -0
stixdb_engine-0.1.0/stixdb/skills/engine-memory-layer/SKILL.md +905 -0
stixdb_engine-0.1.0/stixdb/skills/engine-memory-layer/evals/evals.json +35 -0
stixdb_engine-0.1.0/stixdb/storage/__init__.py +27 -0
stixdb_engine-0.1.0/stixdb/storage/base.py +176 -0
stixdb_engine-0.1.0/stixdb/storage/embeddings.py +136 -0
stixdb_engine-0.1.0/stixdb/storage/kuzu_backend.py +693 -0
stixdb_engine-0.1.0/stixdb/storage/neo4j_backend.py +452 -0
stixdb_engine-0.1.0/stixdb/storage/networkx_backend.py +263 -0
stixdb_engine-0.1.0/stixdb/storage/vector_store.py +295 -0

stixdb_engine-0.1.0/.env.example ADDED Viewed

@@ -0,0 +1,91 @@
+# ==========================================
+# StixDB Environment Configuration Example
+# ==========================================
+# ------------------------------------------
+# 1. Background Agent Configuration
+# ------------------------------------------
+STIXDB_AGENT_CYCLE_INTERVAL=30.0               # Seconds between perceive/plan/act loops
+STIXDB_AGENT_CONSOLIDATION_THRESHOLD=0.88      # Minimum cosine similarity to merge redundant nodes
+STIXDB_AGENT_DECAY_HALF_LIFE=48.0              # Hours before an unaccessed node's importance halves
+STIXDB_AGENT_PRUNE_THRESHOLD=0.00              # Node importance below this will be permanently pruned
+STIXDB_AGENT_WORKING_MEMORY_MAX=256            # Max number of hot nodes kept in working memory
+STIXDB_AGENT_MAX_CONSOLIDATION_BATCH=64        # Max nodes the regenerator merges per cycle
+STIXDB_AGENT_AUTO_SUMMARIZE=true               # Should agents automatically summarize large clusters?
+STIXDB_AGENT_LINEAGE_SAFE_MODE=true            # Preserve summarized source nodes instead of pruning them later
+# ------------------------------------------
+# 2. LLM Reasoner Configuration
+# ------------------------------------------
+STIXDB_LLM_PROVIDER=custom                     # openai | anthropic | ollama | custom | none
+STIXDB_LLM_MODEL=nvidia/nemotron-3-super-120b-a12b # E.g., gpt-4o, claude-3-5-sonnet, llama3, or custom model
+STIXDB_LLM_TEMPERATURE=0.2                     # Temp controlling generation parameters
+STIXDB_LLM_MAX_TOKENS=2048                     # Max context token sequence length
+STIXDB_LLM_MAX_CONTEXT_NODES=20                # Max number of graph nodes provided as context
+STIXDB_LLM_GRAPH_TRAVERSAL_DEPTH=3             # Max depth of graph traversal during query retrieval
+STIXDB_LLM_TIMEOUT=60.0                        # Rest API timeout for completion calls
+# ------------------------------------------
+# 3. Embedding Provider Configuration
+# ------------------------------------------
+STIXDB_EMBEDDING_PROVIDER=custom               # sentence_transformers | openai | ollama | custom
+STIXDB_EMBEDDING_MODEL=Qwen/Qwen3-Embedding-8B # Model architecture mapped via string name
+STIXDB_EMBEDDING_DIMENSIONS=384                # Dimension size for embeddings generated
+# ------------------------------------------
+# 4. Storage & Vector DB Configuration
+# ------------------------------------------
+# STIXDB_STORAGE_MODE controls the graph backend:
+#
+#   memory  — In-process NetworkX (fastest, no persistence, data lost on restart)
+#   kuzu    — KuzuDB embedded (persistent on disk, no Docker required) ✔ RECOMMENDED FOR LOCAL DEV
+#   neo4j   — Neo4j via Docker (production)
+#
+STIXDB_STORAGE_MODE=kuzu                       # memory | kuzu | neo4j
+STIXDB_DATA_DIR=./stixdb_data                   # Root data directory
+STIXDB_KUZU_PATH=./stixdb_data/kuzu             # Path for KuzuDB files (kuzu mode only)
+STIXDB_VECTOR_BACKEND=memory                   # memory (in-proc) | chroma | qdrant
+STIXDB_STORAGE_MAX_ACTIVE_NODES=1000000        # Safety cap enforcing eviction threshold
+# ExtDB Endpoints (Required only if using Qdrant/Chroma)
+# QDRANT_HOST=localhost
+# QDRANT_PORT=6333
+# CHROMA_HOST=localhost
+# ------------------------------------------
+# 5. External API Keys & Endpoints
+# ------------------------------------------
+# OPENAI_API_KEY=sk-your-openai-key-here
+# ANTHROPIC_API_KEY=sk-ant-your-anthropic-key-here
+# OLLAMA_BASE_URL=http://localhost:11434
+# Custom OpenAI-compatible provider (only if STIXDB_LLM_PROVIDER=custom)
+# STIXDB_LLM_CUSTOM_BASE_URL=https://your-provider.com/v1/
+# STIXDB_LLM_CUSTOM_API_KEY=your-custom-llm-api-key
+# Custom embedding provider (only if STIXDB_EMBEDDING_PROVIDER=custom)
+# STIXDB_EMBEDDING_CUSTOM_BASE_URL=https://your-provider.com/v1/
+# STIXDB_EMBEDDING_CUSTOM_API_KEY=your-custom-embedding-api-key
+# ------------------------------------------
+# 6. Ingestion Configuration
+# ------------------------------------------
+STIXDB_CHUNK_SIZE=1000                       # Characters per ingestion chunk
+STIXDB_CHUNK_OVERLAP=200                     # Overlap between consecutive chunks
+                                              # Supported: .txt .md .pdf (native text) and all code/
+                                              # markup extensions. For scanned PDFs, run OCR externally
+                                              # and upload the resulting .md or .txt file.
+# ------------------------------------------
+# 7. Observability & Telemetry
+# ------------------------------------------
+STIXDB_ENABLE_TRACES=true                      # Enables distributed tracing
+STIXDB_ENABLE_METRICS=true                     # Enables Prometheus metrics monitoring
+STIXDB_METRICS_PORT=9090                       # Port Prometheus server exposes
+STIXDB_LOG_LEVEL=INFO                          # DEBUG, INFO, WARNING, ERROR
+# ------------------------------------------
+# 7. Server & API Configuration
+# ------------------------------------------
+STIXDB_API_PORT=4020                           # Port where the REST API runs
+STIXDB_API_KEY=your-secure-api-key-here        # If set, all requests must pass X-API-Key header

stixdb_engine-0.1.0/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ # Auto detect text files and perform LF normalization
2	+ * text=auto

stixdb_engine-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,71 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*.so
+# Virtual environments
+.venv/
+venv/
+env/
+ENV/
+# Build & distribution
+*.egg-info/
+dist/
+build/
+*.egg
+MANIFEST
+*.whl
+# Pytest / coverage
+.pytest_cache/
+.coverage
+.coverage.*
+htmlcov/
+.hypothesis/
+# Mypy / type checkers
+.mypy_cache/
+.dmypy.json
+dmypy.json
+.pyre/
+# Ruff
+.ruff_cache/
+# Environment files — NEVER commit real credentials
+.env
+.env.local
+.env.*.local
+stix/.env
+# IDE / editor
+.vscode/
+.idea/
+*.sublime-project
+*.sublime-workspace
+.DS_Store
+Thumbs.db
+# Claude Code local settings
+.claude/
+# STIX runtime data
+stix_data/
+stix/stix_data/
+*.db
+# Logs
+*.log
+logs/
+# Notebooks checkpoints
+.ipynb_checkpoints/
+# Docker volumes (local dev)
+neo4j_data/
+chroma_data/
+pg_data/

stixdb_engine-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,49 @@
+# Changelog
+All notable changes to StixDB are documented here.
+The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
+StixDB uses [Semantic Versioning](https://semver.org/).
+---
+## [Unreleased]
+### Planned
+- Pinecone vector backend
+- Multi-hop graph reasoning traces UI
+- Collection-level RBAC
+- Webhook callbacks on agent cycle events
+---
+## [0.1.0] — 2025-01-01
+### Added
+- **StixDBEngine** — top-level async engine managing multiple isolated collections
+- **MemoryGraph** — unified graph + vector store interface (NetworkX / KuzuDB / Neo4j)
+- **MemoryAgent** — per-collection autonomous background agent
+  - `AccessPlanner` — hybrid LRU+LFU heat scoring for node tier promotion
+  - `Consolidator` — cosine-similarity merging (threshold 0.88) + exponential decay pruning
+  - `MemoryAgentWorker` — decoupled `perceive → plan → act` async loop (30s default)
+- **ContextBroker** — 7-phase retrieval: embed → vector search → graph BFS → re-rank → truncate → LLM reason → record
+- **Reasoner** — LLM synthesis over graph context (OpenAI / Anthropic / Ollama / Custom / None)
+- **REST API** (FastAPI)
+  - Collection CRUD, bulk ingest, file upload
+  - `POST /collections/{id}/ask` — agentic Q&A
+  - `POST /search` — multi-query, cross-collection, filterable search
+  - `GET /collections/{id}/agent/status` — agent introspection
+  - `GET /traces` — execution trace log
+  - OpenAI-compatible `/v1/chat/completions`, `/v1/models`, `/v1/embeddings`
+- **Python SDK** (`stixdb-sdk`) — sync + async HTTP client
+- **Storage backends**: NetworkX (ephemeral), KuzuDB (local persistent, no Docker), Neo4j (Docker, production)
+- **Vector backends**: NumPy (in-process), ChromaDB, Qdrant
+- **Embedding providers**: sentence-transformers, OpenAI, Ollama, custom OpenAI-compatible
+- **Memory tiers**: `working`, `episodic`, `semantic`, `procedural`, `archived`
+- **Node types**: `fact`, `entity`, `event`, `concept`, `procedure`, `summary`, `question`
+- **Lineage safety mode** — source nodes pinned across consolidation cycles
+- **Document ingestion** — PDF (page-level provenance), plain text (character offset chunking)
+- **Observability** — structlog, Prometheus metrics, distributed trace log
+- **Docker Compose** stack — StixDB + Neo4j + ChromaDB + PostgreSQL
+- **CLI** — `stixdb serve`, `stixdb demo`, `stixdb multi-demo`
+- Full test suite — agent, graph, lineage, search API, OpenAI compatibility, SDK

stixdb_engine-0.1.0/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,123 @@
+# Contributing to StixDB
+Thanks for your interest — all contributions are welcome: bug reports, features, documentation, examples, and storage/embedding backend integrations.
+---
+## Getting Started
+### Prerequisites
+- Python 3.10+
+- Git
+### Setup
+```bash
+git clone https://github.com/your-org/stix.git
+cd stix
+python -m venv .venv
+source .venv/bin/activate      # Windows: .venv\Scripts\activate
+pip install -e ".[dev]"
+```
+Copy the example env and configure it:
+```bash
+cp .env.example .env
+# Edit .env — at minimum set STIXDB_LLM_PROVIDER=none for testing without an API key
+```
+### Run the tests
+```bash
+pytest tests/ -v
+```
+Tests run in heuristic mode (`LLMProvider.NONE`) by default — no API key needed.
+---
+## Project Structure
+```
+stix/               Core engine
+  agent/            MemoryAgent — AccessPlanner, Consolidator, Worker
+  api/              FastAPI server and routes
+  context/          ContextBroker (7-phase retrieval) and Reasoner
+  graph/            MemoryGraph, node/edge/cluster models
+  storage/          StorageBackend implementations (NetworkX, KuzuDB, Neo4j)
+                    VectorStore implementations (NumPy, ChromaDB, Qdrant)
+                    EmbeddingClient (sentence-transformers, OpenAI, Ollama, custom)
+  ingestion/        Document parsing and chunking
+  observability/    Structured logging and trace emission
+sdk/                Python HTTP client (stixdb-sdk)
+  src/stixdb_sdk/     Client, MemoryAPI, QueryAPI, SearchAPI
+examples/           Runnable examples
+tests/              Automated test suite
+doc/                Architecture and performance documentation
+```
+---
+## How to Contribute
+### Reporting a bug
+Open a [GitHub Issue](https://github.com/your-org/stix/issues/new?template=bug_report.md) with:
+- Reproduction steps (minimal code)
+- Your environment (Python version, storage/vector backend, OS)
+- Full stack trace
+### Requesting a feature
+Open a [GitHub Issue](https://github.com/your-org/stix/issues/new?template=feature_request.md) describing the problem and your proposed solution.
+### Submitting a pull request
+1. Fork the repo and create a branch: `feature/my-thing` or `fix/issue-123`
+2. Make your changes
+3. Add or update tests in `tests/`
+4. Run `pytest tests/ -v` — all tests must pass
+5. Run `ruff check stix/ sdk/src/ && ruff format --check stix/ sdk/src/`
+6. Open a PR with a clear description (use the PR template)
+---
+## Adding a New Storage Backend
+Implement `stix/storage/base.py:StorageBackend` and register it in `stix/config.py` under `StorageMode`. See `stix/storage/networkx_backend.py` for the simplest reference implementation.
+## Adding a New Vector Backend
+Implement the `VectorStore` protocol in `stix/storage/vector_store.py`. See `MemoryVectorStore` for a minimal reference.
+## Adding a New LLM Provider
+Extend `stix/agent/reasoner.py` — add a branch in `Reasoner._call_llm` and register the new value in `LLMProvider`.
+---
+## Code Style
+- Formatter: `ruff format` (line length 100)
+- Linter: `ruff check`
+- Logging: `structlog` — never use `print()` in library code
+- Models: Pydantic v2
+- Tests: `pytest` + `pytest-asyncio`
+---
+## Security
+Do not open public issues for security vulnerabilities. See [SECURITY.md](SECURITY.md).
+---
+## License
+By contributing you agree that your contributions are licensed under the [MIT License](LICENSE).