widemem-ai 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. widemem_ai-1.3.0/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  2. widemem_ai-1.3.0/.github/ISSUE_TEMPLATE/feature_request.md +16 -0
  3. widemem_ai-1.3.0/.github/pull_request_template.md +18 -0
  4. widemem_ai-1.3.0/.github/workflows/ci.yml +33 -0
  5. widemem_ai-1.3.0/.gitignore +31 -0
  6. widemem_ai-1.3.0/CHANGELOG.md +74 -0
  7. widemem_ai-1.3.0/CODE_OF_CONDUCT.md +31 -0
  8. widemem_ai-1.3.0/CONTRIBUTING.md +61 -0
  9. widemem_ai-1.3.0/LICENSE +15 -0
  10. widemem_ai-1.3.0/PKG-INFO +677 -0
  11. widemem_ai-1.3.0/README.md +635 -0
  12. widemem_ai-1.3.0/SECURITY.md +31 -0
  13. widemem_ai-1.3.0/YMYL.md +168 -0
  14. widemem_ai-1.3.0/examples/basic_usage.py +29 -0
  15. widemem_ai-1.3.0/examples/hierarchical_memory.py +39 -0
  16. widemem_ai-1.3.0/examples/temporal_queries.py +32 -0
  17. widemem_ai-1.3.0/pyproject.toml +64 -0
  18. widemem_ai-1.3.0/scripts/e2e_test.py +119 -0
  19. widemem_ai-1.3.0/scripts/train_extractor.py +128 -0
  20. widemem_ai-1.3.0/tests/__init__.py +0 -0
  21. widemem_ai-1.3.0/tests/test_active_retrieval.py +359 -0
  22. widemem_ai-1.3.0/tests/test_extraction.py +165 -0
  23. widemem_ai-1.3.0/tests/test_hierarchy.py +413 -0
  24. widemem_ai-1.3.0/tests/test_memory.py +532 -0
  25. widemem_ai-1.3.0/tests/test_scoring.py +265 -0
  26. widemem_ai-1.3.0/tests/test_ymyl_topics.py +454 -0
  27. widemem_ai-1.3.0/widemem/__init__.py +7 -0
  28. widemem_ai-1.3.0/widemem/conflict/__init__.py +0 -0
  29. widemem_ai-1.3.0/widemem/conflict/batch_resolver.py +97 -0
  30. widemem_ai-1.3.0/widemem/conflict/prompts.py +31 -0
  31. widemem_ai-1.3.0/widemem/core/__init__.py +0 -0
  32. widemem_ai-1.3.0/widemem/core/exceptions.py +18 -0
  33. widemem_ai-1.3.0/widemem/core/memory.py +344 -0
  34. widemem_ai-1.3.0/widemem/core/pipeline.py +230 -0
  35. widemem_ai-1.3.0/widemem/core/types.py +135 -0
  36. widemem_ai-1.3.0/widemem/extraction/__init__.py +0 -0
  37. widemem_ai-1.3.0/widemem/extraction/base.py +11 -0
  38. widemem_ai-1.3.0/widemem/extraction/collector.py +71 -0
  39. widemem_ai-1.3.0/widemem/extraction/llm_extractor.py +63 -0
  40. widemem_ai-1.3.0/widemem/extraction/prompts.py +48 -0
  41. widemem_ai-1.3.0/widemem/extraction/self_supervised.py +80 -0
  42. widemem_ai-1.3.0/widemem/hierarchy/__init__.py +0 -0
  43. widemem_ai-1.3.0/widemem/hierarchy/manager.py +148 -0
  44. widemem_ai-1.3.0/widemem/hierarchy/prompts.py +56 -0
  45. widemem_ai-1.3.0/widemem/hierarchy/query_router.py +59 -0
  46. widemem_ai-1.3.0/widemem/hierarchy/summarizer.py +55 -0
  47. widemem_ai-1.3.0/widemem/providers/__init__.py +0 -0
  48. widemem_ai-1.3.0/widemem/providers/embeddings/__init__.py +0 -0
  49. widemem_ai-1.3.0/widemem/providers/embeddings/base.py +22 -0
  50. widemem_ai-1.3.0/widemem/providers/embeddings/openai.py +29 -0
  51. widemem_ai-1.3.0/widemem/providers/embeddings/sentence_transformers.py +42 -0
  52. widemem_ai-1.3.0/widemem/providers/llm/__init__.py +0 -0
  53. widemem_ai-1.3.0/widemem/providers/llm/anthropic.py +51 -0
  54. widemem_ai-1.3.0/widemem/providers/llm/base.py +41 -0
  55. widemem_ai-1.3.0/widemem/providers/llm/ollama.py +51 -0
  56. widemem_ai-1.3.0/widemem/providers/llm/openai.py +56 -0
  57. widemem_ai-1.3.0/widemem/retrieval/__init__.py +0 -0
  58. widemem_ai-1.3.0/widemem/retrieval/active.py +84 -0
  59. widemem_ai-1.3.0/widemem/retrieval/prompts.py +25 -0
  60. widemem_ai-1.3.0/widemem/retrieval/temporal.py +66 -0
  61. widemem_ai-1.3.0/widemem/scoring/__init__.py +0 -0
  62. widemem_ai-1.3.0/widemem/scoring/decay.py +35 -0
  63. widemem_ai-1.3.0/widemem/scoring/importance.py +6 -0
  64. widemem_ai-1.3.0/widemem/scoring/topics.py +35 -0
  65. widemem_ai-1.3.0/widemem/scoring/ymyl.py +135 -0
  66. widemem_ai-1.3.0/widemem/storage/__init__.py +0 -0
  67. widemem_ai-1.3.0/widemem/storage/history.py +74 -0
  68. widemem_ai-1.3.0/widemem/storage/vector/__init__.py +0 -0
  69. widemem_ai-1.3.0/widemem/storage/vector/base.py +45 -0
  70. widemem_ai-1.3.0/widemem/storage/vector/faiss_store.py +155 -0
  71. widemem_ai-1.3.0/widemem/storage/vector/qdrant_store.py +144 -0
  72. widemem_ai-1.3.0/widemem/utils/__init__.py +0 -0
  73. widemem_ai-1.3.0/widemem/utils/hashing.py +5 -0
  74. widemem_ai-1.3.0/widemem/utils/id_mapping.py +25 -0
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: Bug Report
3
+ about: Something isn't working as expected
4
+ title: "[Bug] "
5
+ labels: bug
6
+ assignees: ''
7
+ ---
8
+
9
+ **Describe the bug**
10
+ A clear description of what went wrong.
11
+
12
+ **To reproduce**
13
+ ```python
14
+ # Minimal code to reproduce the issue
15
+ ```
16
+
17
+ **Expected behavior**
18
+ What you expected to happen.
19
+
20
+ **Actual behavior**
21
+ What actually happened. Include the full error traceback if applicable.
22
+
23
+ **Environment**
24
+ - Python version:
25
+ - widemem version:
26
+ - OS:
27
+ - LLM provider:
28
+ - Vector store:
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: Feature Request
3
+ about: Suggest a new feature or improvement
4
+ title: "[Feature] "
5
+ labels: enhancement
6
+ assignees: ''
7
+ ---
8
+
9
+ **Problem**
10
+ What problem does this solve? Why is it needed?
11
+
12
+ **Proposed solution**
13
+ How should it work? Include code examples if possible.
14
+
15
+ **Alternatives considered**
16
+ Any other approaches you've thought about.
@@ -0,0 +1,18 @@
1
+ ## What
2
+
3
+ Brief description of the change.
4
+
5
+ ## Why
6
+
7
+ Why is this change needed?
8
+
9
+ ## How
10
+
11
+ How does it work? Any design decisions worth noting?
12
+
13
+ ## Checklist
14
+
15
+ - [ ] Tests added/updated
16
+ - [ ] All tests pass (`pytest`)
17
+ - [ ] Linting passes (`ruff check .`)
18
+ - [ ] CHANGELOG.md updated (if user-facing change)
@@ -0,0 +1,33 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python ${{ matrix.python-version }}
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -e ".[dev]"
28
+
29
+ - name: Lint
30
+ run: ruff check widemem/ tests/
31
+
32
+ - name: Test
33
+ run: pytest tests/ -v
@@ -0,0 +1,31 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+ .eggs/
9
+
10
+ .env
11
+ .env.*
12
+ *.key
13
+ credentials.json
14
+
15
+ .pytest_cache/
16
+ .mypy_cache/
17
+ .ruff_cache/
18
+ htmlcov/
19
+ .coverage
20
+
21
+ *.db
22
+ *.faiss
23
+ state.json
24
+
25
+ .venv/
26
+ venv/
27
+ .vscode/
28
+ .idea/
29
+ *.swp
30
+ *.swo
31
+ .DS_Store
@@ -0,0 +1,74 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [1.3.0] - 2026-03-09
8
+
9
+ ### Added
10
+
11
+ - **Retry/backoff on LLM calls** — All LLM providers now retry up to 3 times with exponential backoff on transient errors (network, rate limits). `ProviderError` is not retried.
12
+ - **Memory TTL** — `MemoryConfig(ttl_days=30)` auto-expires memories older than N days at search time. No background jobs needed.
13
+ - **Score breakdown** — `MemorySearchResult` exposes `similarity_score`, `temporal_score`, `importance_score`, and `final_score` for debugging and transparency.
14
+ - **Batch add** — `memory.add_batch(["text1", "text2", ...])` processes multiple texts in one call.
15
+ - **Memory count** — `memory.count(user_id="alice")` returns total memory count with optional filters.
16
+ - **Export/import JSON** — `memory.export_json()` and `memory.import_json(data)` for backup, restore, and migration. Import skips existing IDs.
17
+ - 14 new tests (140 total)
18
+
19
+ ## [1.2.0] - 2026-03-08
20
+
21
+ ### Fixed
22
+
23
+ - **Invalid default LLM model** — Changed default from non-existent `gpt-4.1-nano` to `gpt-4o-mini`
24
+ - **Negative fact_index exploit** — Conflict resolver now rejects negative indices from LLM responses instead of silently wrapping via Python negative indexing
25
+ - **Duplicate fact_index processing** — If LLM returns the same fact_index twice with different actions, only the first is processed
26
+ - **Missing fact_index double-add** — Facts with missing `fact_index` in LLM response no longer get added twice (once from LLM action, once from fallback)
27
+ - **Unbounded top_k** — `search(top_k=...)` now capped at 1000 to prevent memory exhaustion
28
+
29
+ ### Added
30
+
31
+ - 3 new tests for conflict resolver edge cases (negative index, duplicate index, missing index)
32
+
33
+ ## [1.1.0] - 2026-03-08
34
+
35
+ ### Added
36
+
37
+ - **YMYL two-tier confidence system** — Strong patterns (multi-word) get full treatment (importance floor 8.0, decay immunity, forced active retrieval). Weak patterns (single keyword) get moderate boost only. Prevents false positives like "bank of the river".
38
+ - **YMYL documentation** — `YMYL.md` with full explanation of the two-tier system, examples, flow diagram, and limitations
39
+ - **Duplicate content detection** — Content hash checked before insert, prevents identical memories from being stored
40
+ - **list_all() on vector stores** — Proper metadata-based listing (FAISS + Qdrant) replaces zero-vector search hack in hierarchy
41
+ - **End-to-end test script** — `scripts/e2e_test.py` for real OpenAI integration testing
42
+ - **Resource cleanup** — `WideMemory` supports context manager (`with` statement), `close()`, and `__del__` for proper SQLite cleanup
43
+ - **Thread safety** — Pipeline operations protected by threading lock for concurrent access
44
+ - **Embedding dimension validation** — FAISS rejects vectors with wrong dimensions instead of silently corrupting
45
+ - **Conflict resolver fallback** — Bad LLM JSON gracefully falls back to ADD all facts instead of crashing
46
+
47
+ ### Fixed
48
+
49
+ - **YMYL regex case sensitivity** — Patterns with mixed case (IRS, MRI, W-2) now match correctly against lowercased text via `re.IGNORECASE`
50
+ - **Zero-vector search hack** — Hierarchy manager now uses `list_all()` instead of searching with a zero vector
51
+
52
+ ## [1.0.0] - 2026-03-08
53
+
54
+ ### Added
55
+
56
+ - **Core memory system** — `WideMemory` with add, search, get, delete, and history
57
+ - **Batch conflict resolution** — Single LLM call resolves all new facts against existing memories (ADD/UPDATE/DELETE/NONE)
58
+ - **Importance scoring** — Facts rated 1-10 at extraction time, normalized into combined scoring
59
+ - **Time decay** — Four decay functions: exponential, linear, step, none
60
+ - **Combined scoring** — `final_score = similarity * weight + importance * weight + recency * weight`
61
+ - **Hierarchical memory** — Three-tier system (facts, summaries, themes) with automatic query routing and fallback chain
62
+ - **Active retrieval** — Contradiction and ambiguity detection with clarification callbacks
63
+ - **Self-supervised extraction** — SQLite-backed training data collector, small model fallback chain, training script
64
+ - **Topic weights** — Configurable boost/suppress multipliers for retrieval, custom extraction hints
65
+ - **Temporal search** — Time-range filters (time_after, time_before) on search
66
+ - **History audit trail** — SQLite log of all add/update/delete operations
67
+ - **Persistent FAISS** — Save/load to disk via `VectorStoreConfig.path`
68
+ - **LLM providers** — OpenAI, Anthropic Claude, Ollama
69
+ - **Embedding providers** — OpenAI, sentence-transformers (local)
70
+ - **Vector store providers** — FAISS (local), Qdrant (local or cloud)
71
+ - **UUID-to-integer ID mapping** — Prevents LLM hallucination of invalid memory IDs during conflict resolution
72
+ - **MD5 content hashing** — Skips no-op updates when content hasn't changed
73
+ - **Open source release** — README, CONTRIBUTING, CODE_OF_CONDUCT, SECURITY, LICENSE (Apache 2.0), GitHub templates, CI workflow
74
+ - 126 tests, all passing
@@ -0,0 +1,31 @@
1
+ # Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We are committed to making participation in this project a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
6
+
7
+ ## Our Standards
8
+
9
+ **Examples of behavior that contributes to a positive environment:**
10
+
11
+ - Using welcoming and inclusive language
12
+ - Being respectful of differing viewpoints and experiences
13
+ - Gracefully accepting constructive criticism
14
+ - Focusing on what is best for the community
15
+ - Showing empathy towards other community members
16
+
17
+ **Examples of unacceptable behavior:**
18
+
19
+ - The use of sexualized language or imagery and unwelcome sexual attention or advances
20
+ - Trolling, insulting/derogatory comments, and personal or political attacks
21
+ - Public or private harassment
22
+ - Publishing others' private information without explicit permission
23
+ - Other conduct which could reasonably be considered inappropriate in a professional setting
24
+
25
+ ## Enforcement
26
+
27
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project maintainer at **radu@cioplea.com**. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances.
28
+
29
+ ## Attribution
30
+
31
+ This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
@@ -0,0 +1,61 @@
1
+ # Contributing to widemem
2
+
3
+ Thanks for considering a contribution. Here's how to get started.
4
+
5
+ ## Setup
6
+
7
+ ```bash
8
+ git clone https://github.com/remete618/widemem-ai.git
9
+ cd widemem
10
+ python3 -m pip install -e ".[dev]"
11
+ ```
12
+
13
+ ## Running tests
14
+
15
+ ```bash
16
+ pytest
17
+ ```
18
+
19
+ All tests use mocks — no API keys or external services needed.
20
+
21
+ ## Code style
22
+
23
+ - We use [ruff](https://github.com/astral-sh/ruff) for linting and formatting
24
+ - Run `ruff check .` and `ruff format .` before submitting
25
+ - No docstrings or comments unless the logic isn't self-evident
26
+ - No type stubs or backwards-compatibility shims — just change the code
27
+ - Keep it simple. If three lines work, don't write an abstraction
28
+
29
+ ## Pull requests
30
+
31
+ 1. Fork the repo and create a branch from `main`
32
+ 2. Write tests for any new functionality
33
+ 3. Make sure all tests pass (`pytest`)
34
+ 4. Make sure linting passes (`ruff check .`)
35
+ 5. Keep PRs focused — one feature or fix per PR
36
+ 6. Write a clear PR description explaining what and why
37
+
38
+ ## What we're looking for
39
+
40
+ - Bug fixes with a test that reproduces the bug
41
+ - New provider implementations (LLM, embedding, vector store)
42
+ - Performance improvements with benchmarks
43
+ - Documentation improvements
44
+
45
+ ## What we're not looking for (yet)
46
+
47
+ - Major architectural changes without prior discussion — open an issue first
48
+ - Features that add complexity without clear user value
49
+ - Dependencies on large frameworks or libraries
50
+
51
+ ## Reporting bugs
52
+
53
+ Open a GitHub issue with:
54
+ - What you expected to happen
55
+ - What actually happened
56
+ - Steps to reproduce
57
+ - Python version and OS
58
+
59
+ ## Code of Conduct
60
+
61
+ See [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md). Be decent.
@@ -0,0 +1,15 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.