widemem-ai 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- widemem_ai-1.3.0/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- widemem_ai-1.3.0/.github/ISSUE_TEMPLATE/feature_request.md +16 -0
- widemem_ai-1.3.0/.github/pull_request_template.md +18 -0
- widemem_ai-1.3.0/.github/workflows/ci.yml +33 -0
- widemem_ai-1.3.0/.gitignore +31 -0
- widemem_ai-1.3.0/CHANGELOG.md +74 -0
- widemem_ai-1.3.0/CODE_OF_CONDUCT.md +31 -0
- widemem_ai-1.3.0/CONTRIBUTING.md +61 -0
- widemem_ai-1.3.0/LICENSE +15 -0
- widemem_ai-1.3.0/PKG-INFO +677 -0
- widemem_ai-1.3.0/README.md +635 -0
- widemem_ai-1.3.0/SECURITY.md +31 -0
- widemem_ai-1.3.0/YMYL.md +168 -0
- widemem_ai-1.3.0/examples/basic_usage.py +29 -0
- widemem_ai-1.3.0/examples/hierarchical_memory.py +39 -0
- widemem_ai-1.3.0/examples/temporal_queries.py +32 -0
- widemem_ai-1.3.0/pyproject.toml +64 -0
- widemem_ai-1.3.0/scripts/e2e_test.py +119 -0
- widemem_ai-1.3.0/scripts/train_extractor.py +128 -0
- widemem_ai-1.3.0/tests/__init__.py +0 -0
- widemem_ai-1.3.0/tests/test_active_retrieval.py +359 -0
- widemem_ai-1.3.0/tests/test_extraction.py +165 -0
- widemem_ai-1.3.0/tests/test_hierarchy.py +413 -0
- widemem_ai-1.3.0/tests/test_memory.py +532 -0
- widemem_ai-1.3.0/tests/test_scoring.py +265 -0
- widemem_ai-1.3.0/tests/test_ymyl_topics.py +454 -0
- widemem_ai-1.3.0/widemem/__init__.py +7 -0
- widemem_ai-1.3.0/widemem/conflict/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/conflict/batch_resolver.py +97 -0
- widemem_ai-1.3.0/widemem/conflict/prompts.py +31 -0
- widemem_ai-1.3.0/widemem/core/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/core/exceptions.py +18 -0
- widemem_ai-1.3.0/widemem/core/memory.py +344 -0
- widemem_ai-1.3.0/widemem/core/pipeline.py +230 -0
- widemem_ai-1.3.0/widemem/core/types.py +135 -0
- widemem_ai-1.3.0/widemem/extraction/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/extraction/base.py +11 -0
- widemem_ai-1.3.0/widemem/extraction/collector.py +71 -0
- widemem_ai-1.3.0/widemem/extraction/llm_extractor.py +63 -0
- widemem_ai-1.3.0/widemem/extraction/prompts.py +48 -0
- widemem_ai-1.3.0/widemem/extraction/self_supervised.py +80 -0
- widemem_ai-1.3.0/widemem/hierarchy/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/hierarchy/manager.py +148 -0
- widemem_ai-1.3.0/widemem/hierarchy/prompts.py +56 -0
- widemem_ai-1.3.0/widemem/hierarchy/query_router.py +59 -0
- widemem_ai-1.3.0/widemem/hierarchy/summarizer.py +55 -0
- widemem_ai-1.3.0/widemem/providers/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/providers/embeddings/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/providers/embeddings/base.py +22 -0
- widemem_ai-1.3.0/widemem/providers/embeddings/openai.py +29 -0
- widemem_ai-1.3.0/widemem/providers/embeddings/sentence_transformers.py +42 -0
- widemem_ai-1.3.0/widemem/providers/llm/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/providers/llm/anthropic.py +51 -0
- widemem_ai-1.3.0/widemem/providers/llm/base.py +41 -0
- widemem_ai-1.3.0/widemem/providers/llm/ollama.py +51 -0
- widemem_ai-1.3.0/widemem/providers/llm/openai.py +56 -0
- widemem_ai-1.3.0/widemem/retrieval/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/retrieval/active.py +84 -0
- widemem_ai-1.3.0/widemem/retrieval/prompts.py +25 -0
- widemem_ai-1.3.0/widemem/retrieval/temporal.py +66 -0
- widemem_ai-1.3.0/widemem/scoring/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/scoring/decay.py +35 -0
- widemem_ai-1.3.0/widemem/scoring/importance.py +6 -0
- widemem_ai-1.3.0/widemem/scoring/topics.py +35 -0
- widemem_ai-1.3.0/widemem/scoring/ymyl.py +135 -0
- widemem_ai-1.3.0/widemem/storage/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/storage/history.py +74 -0
- widemem_ai-1.3.0/widemem/storage/vector/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/storage/vector/base.py +45 -0
- widemem_ai-1.3.0/widemem/storage/vector/faiss_store.py +155 -0
- widemem_ai-1.3.0/widemem/storage/vector/qdrant_store.py +144 -0
- widemem_ai-1.3.0/widemem/utils/__init__.py +0 -0
- widemem_ai-1.3.0/widemem/utils/hashing.py +5 -0
- widemem_ai-1.3.0/widemem/utils/id_mapping.py +25 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Bug Report
|
|
3
|
+
about: Something isn't working as expected
|
|
4
|
+
title: "[Bug] "
|
|
5
|
+
labels: bug
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
**Describe the bug**
|
|
10
|
+
A clear description of what went wrong.
|
|
11
|
+
|
|
12
|
+
**To reproduce**
|
|
13
|
+
```python
|
|
14
|
+
# Minimal code to reproduce the issue
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
**Expected behavior**
|
|
18
|
+
What you expected to happen.
|
|
19
|
+
|
|
20
|
+
**Actual behavior**
|
|
21
|
+
What actually happened. Include the full error traceback if applicable.
|
|
22
|
+
|
|
23
|
+
**Environment**
|
|
24
|
+
- Python version:
|
|
25
|
+
- widemem version:
|
|
26
|
+
- OS:
|
|
27
|
+
- LLM provider:
|
|
28
|
+
- Vector store:
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Feature Request
|
|
3
|
+
about: Suggest a new feature or improvement
|
|
4
|
+
title: "[Feature] "
|
|
5
|
+
labels: enhancement
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
**Problem**
|
|
10
|
+
What problem does this solve? Why is it needed?
|
|
11
|
+
|
|
12
|
+
**Proposed solution**
|
|
13
|
+
How should it work? Include code examples if possible.
|
|
14
|
+
|
|
15
|
+
**Alternatives considered**
|
|
16
|
+
Any other approaches you've thought about.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
## What
|
|
2
|
+
|
|
3
|
+
Brief description of the change.
|
|
4
|
+
|
|
5
|
+
## Why
|
|
6
|
+
|
|
7
|
+
Why is this change needed?
|
|
8
|
+
|
|
9
|
+
## How
|
|
10
|
+
|
|
11
|
+
How does it work? Any design decisions worth noting?
|
|
12
|
+
|
|
13
|
+
## Checklist
|
|
14
|
+
|
|
15
|
+
- [ ] Tests added/updated
|
|
16
|
+
- [ ] All tests pass (`pytest`)
|
|
17
|
+
- [ ] Linting passes (`ruff check .`)
|
|
18
|
+
- [ ] CHANGELOG.md updated (if user-facing change)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Lint
|
|
30
|
+
run: ruff check widemem/ tests/
|
|
31
|
+
|
|
32
|
+
- name: Test
|
|
33
|
+
run: pytest tests/ -v
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
__pycache__/
|
|
2
|
+
*.py[cod]
|
|
3
|
+
*$py.class
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
*.egg
|
|
8
|
+
.eggs/
|
|
9
|
+
|
|
10
|
+
.env
|
|
11
|
+
.env.*
|
|
12
|
+
*.key
|
|
13
|
+
credentials.json
|
|
14
|
+
|
|
15
|
+
.pytest_cache/
|
|
16
|
+
.mypy_cache/
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
htmlcov/
|
|
19
|
+
.coverage
|
|
20
|
+
|
|
21
|
+
*.db
|
|
22
|
+
*.faiss
|
|
23
|
+
state.json
|
|
24
|
+
|
|
25
|
+
.venv/
|
|
26
|
+
venv/
|
|
27
|
+
.vscode/
|
|
28
|
+
.idea/
|
|
29
|
+
*.swp
|
|
30
|
+
*.swo
|
|
31
|
+
.DS_Store
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
|
+
|
|
7
|
+
## [1.3.0] - 2026-03-09
|
|
8
|
+
|
|
9
|
+
### Added
|
|
10
|
+
|
|
11
|
+
- **Retry/backoff on LLM calls** — All LLM providers now retry up to 3 times with exponential backoff on transient errors (network, rate limits). `ProviderError` is not retried.
|
|
12
|
+
- **Memory TTL** — `MemoryConfig(ttl_days=30)` auto-expires memories older than N days at search time. No background jobs needed.
|
|
13
|
+
- **Score breakdown** — `MemorySearchResult` exposes `similarity_score`, `temporal_score`, `importance_score`, and `final_score` for debugging and transparency.
|
|
14
|
+
- **Batch add** — `memory.add_batch(["text1", "text2", ...])` processes multiple texts in one call.
|
|
15
|
+
- **Memory count** — `memory.count(user_id="alice")` returns total memory count with optional filters.
|
|
16
|
+
- **Export/import JSON** — `memory.export_json()` and `memory.import_json(data)` for backup, restore, and migration. Import skips existing IDs.
|
|
17
|
+
- 14 new tests (140 total)
|
|
18
|
+
|
|
19
|
+
## [1.2.0] - 2026-03-08
|
|
20
|
+
|
|
21
|
+
### Fixed
|
|
22
|
+
|
|
23
|
+
- **Invalid default LLM model** — Changed default from non-existent `gpt-4.1-nano` to `gpt-4o-mini`
|
|
24
|
+
- **Negative fact_index exploit** — Conflict resolver now rejects negative indices from LLM responses instead of silently wrapping via Python negative indexing
|
|
25
|
+
- **Duplicate fact_index processing** — If LLM returns the same fact_index twice with different actions, only the first is processed
|
|
26
|
+
- **Missing fact_index double-add** — Facts with missing `fact_index` in LLM response no longer get added twice (once from LLM action, once from fallback)
|
|
27
|
+
- **Unbounded top_k** — `search(top_k=...)` now capped at 1000 to prevent memory exhaustion
|
|
28
|
+
|
|
29
|
+
### Added
|
|
30
|
+
|
|
31
|
+
- 3 new tests for conflict resolver edge cases (negative index, duplicate index, missing index)
|
|
32
|
+
|
|
33
|
+
## [1.1.0] - 2026-03-08
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
|
|
37
|
+
- **YMYL two-tier confidence system** — Strong patterns (multi-word) get full treatment (importance floor 8.0, decay immunity, forced active retrieval). Weak patterns (single keyword) get moderate boost only. Prevents false positives like "bank of the river".
|
|
38
|
+
- **YMYL documentation** — `YMYL.md` with full explanation of the two-tier system, examples, flow diagram, and limitations
|
|
39
|
+
- **Duplicate content detection** — Content hash checked before insert, prevents identical memories from being stored
|
|
40
|
+
- **list_all() on vector stores** — Proper metadata-based listing (FAISS + Qdrant) replaces zero-vector search hack in hierarchy
|
|
41
|
+
- **End-to-end test script** — `scripts/e2e_test.py` for real OpenAI integration testing
|
|
42
|
+
- **Resource cleanup** — `WideMemory` supports context manager (`with` statement), `close()`, and `__del__` for proper SQLite cleanup
|
|
43
|
+
- **Thread safety** — Pipeline operations protected by threading lock for concurrent access
|
|
44
|
+
- **Embedding dimension validation** — FAISS rejects vectors with wrong dimensions instead of silently corrupting
|
|
45
|
+
- **Conflict resolver fallback** — Bad LLM JSON gracefully falls back to ADD all facts instead of crashing
|
|
46
|
+
|
|
47
|
+
### Fixed
|
|
48
|
+
|
|
49
|
+
- **YMYL regex case sensitivity** — Patterns with mixed case (IRS, MRI, W-2) now match correctly against lowercased text via `re.IGNORECASE`
|
|
50
|
+
- **Zero-vector search hack** — Hierarchy manager now uses `list_all()` instead of searching with a zero vector
|
|
51
|
+
|
|
52
|
+
## [1.0.0] - 2026-03-08
|
|
53
|
+
|
|
54
|
+
### Added
|
|
55
|
+
|
|
56
|
+
- **Core memory system** — `WideMemory` with add, search, get, delete, and history
|
|
57
|
+
- **Batch conflict resolution** — Single LLM call resolves all new facts against existing memories (ADD/UPDATE/DELETE/NONE)
|
|
58
|
+
- **Importance scoring** — Facts rated 1-10 at extraction time, normalized into combined scoring
|
|
59
|
+
- **Time decay** — Four decay functions: exponential, linear, step, none
|
|
60
|
+
- **Combined scoring** — `final_score = similarity * weight + importance * weight + recency * weight`
|
|
61
|
+
- **Hierarchical memory** — Three-tier system (facts, summaries, themes) with automatic query routing and fallback chain
|
|
62
|
+
- **Active retrieval** — Contradiction and ambiguity detection with clarification callbacks
|
|
63
|
+
- **Self-supervised extraction** — SQLite-backed training data collector, small model fallback chain, training script
|
|
64
|
+
- **Topic weights** — Configurable boost/suppress multipliers for retrieval, custom extraction hints
|
|
65
|
+
- **Temporal search** — Time-range filters (time_after, time_before) on search
|
|
66
|
+
- **History audit trail** — SQLite log of all add/update/delete operations
|
|
67
|
+
- **Persistent FAISS** — Save/load to disk via `VectorStoreConfig.path`
|
|
68
|
+
- **LLM providers** — OpenAI, Anthropic Claude, Ollama
|
|
69
|
+
- **Embedding providers** — OpenAI, sentence-transformers (local)
|
|
70
|
+
- **Vector store providers** — FAISS (local), Qdrant (local or cloud)
|
|
71
|
+
- **UUID-to-integer ID mapping** — Prevents LLM hallucination of invalid memory IDs during conflict resolution
|
|
72
|
+
- **MD5 content hashing** — Skips no-op updates when content hasn't changed
|
|
73
|
+
- **Open source release** — README, CONTRIBUTING, CODE_OF_CONDUCT, SECURITY, LICENSE (Apache 2.0), GitHub templates, CI workflow
|
|
74
|
+
- 126 tests, all passing
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We are committed to making participation in this project a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
|
|
6
|
+
|
|
7
|
+
## Our Standards
|
|
8
|
+
|
|
9
|
+
**Examples of behavior that contributes to a positive environment:**
|
|
10
|
+
|
|
11
|
+
- Using welcoming and inclusive language
|
|
12
|
+
- Being respectful of differing viewpoints and experiences
|
|
13
|
+
- Gracefully accepting constructive criticism
|
|
14
|
+
- Focusing on what is best for the community
|
|
15
|
+
- Showing empathy towards other community members
|
|
16
|
+
|
|
17
|
+
**Examples of unacceptable behavior:**
|
|
18
|
+
|
|
19
|
+
- The use of sexualized language or imagery and unwelcome sexual attention or advances
|
|
20
|
+
- Trolling, insulting/derogatory comments, and personal or political attacks
|
|
21
|
+
- Public or private harassment
|
|
22
|
+
- Publishing others' private information without explicit permission
|
|
23
|
+
- Other conduct which could reasonably be considered inappropriate in a professional setting
|
|
24
|
+
|
|
25
|
+
## Enforcement
|
|
26
|
+
|
|
27
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project maintainer at **radu@cioplea.com**. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances.
|
|
28
|
+
|
|
29
|
+
## Attribution
|
|
30
|
+
|
|
31
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Contributing to widemem
|
|
2
|
+
|
|
3
|
+
Thanks for considering a contribution. Here's how to get started.
|
|
4
|
+
|
|
5
|
+
## Setup
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
git clone https://github.com/remete618/widemem-ai.git
|
|
9
|
+
cd widemem
|
|
10
|
+
python3 -m pip install -e ".[dev]"
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Running tests
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pytest
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
All tests use mocks — no API keys or external services needed.
|
|
20
|
+
|
|
21
|
+
## Code style
|
|
22
|
+
|
|
23
|
+
- We use [ruff](https://github.com/astral-sh/ruff) for linting and formatting
|
|
24
|
+
- Run `ruff check .` and `ruff format .` before submitting
|
|
25
|
+
- No docstrings or comments unless the logic isn't self-evident
|
|
26
|
+
- No type stubs or backwards-compatibility shims — just change the code
|
|
27
|
+
- Keep it simple. If three lines work, don't write an abstraction
|
|
28
|
+
|
|
29
|
+
## Pull requests
|
|
30
|
+
|
|
31
|
+
1. Fork the repo and create a branch from `main`
|
|
32
|
+
2. Write tests for any new functionality
|
|
33
|
+
3. Make sure all tests pass (`pytest`)
|
|
34
|
+
4. Make sure linting passes (`ruff check .`)
|
|
35
|
+
5. Keep PRs focused — one feature or fix per PR
|
|
36
|
+
6. Write a clear PR description explaining what and why
|
|
37
|
+
|
|
38
|
+
## What we're looking for
|
|
39
|
+
|
|
40
|
+
- Bug fixes with a test that reproduces the bug
|
|
41
|
+
- New provider implementations (LLM, embedding, vector store)
|
|
42
|
+
- Performance improvements with benchmarks
|
|
43
|
+
- Documentation improvements
|
|
44
|
+
|
|
45
|
+
## What we're not looking for (yet)
|
|
46
|
+
|
|
47
|
+
- Major architectural changes without prior discussion — open an issue first
|
|
48
|
+
- Features that add complexity without clear user value
|
|
49
|
+
- Dependencies on large frameworks or libraries
|
|
50
|
+
|
|
51
|
+
## Reporting bugs
|
|
52
|
+
|
|
53
|
+
Open a GitHub issue with:
|
|
54
|
+
- What you expected to happen
|
|
55
|
+
- What actually happened
|
|
56
|
+
- Steps to reproduce
|
|
57
|
+
- Python version and OS
|
|
58
|
+
|
|
59
|
+
## Code of Conduct
|
|
60
|
+
|
|
61
|
+
See [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md). Be decent.
|
widemem_ai-1.3.0/LICENSE
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
you may not use this file except in compliance with the License.
|
|
7
|
+
You may obtain a copy of the License at
|
|
8
|
+
|
|
9
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
|
|
11
|
+
Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
See the License for the specific language governing permissions and
|
|
15
|
+
limitations under the License.
|