vectormesh 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. vectormesh-0.2.0/.claude/settings.local.json +9 -0
  2. vectormesh-0.2.0/.gitignore +38 -0
  3. vectormesh-0.2.0/.lefthook.yml +28 -0
  4. vectormesh-0.2.0/.python-version +1 -0
  5. vectormesh-0.2.0/.remember/.gitignore +1 -0
  6. vectormesh-0.2.0/.remember/archive.md +3 -0
  7. vectormesh-0.2.0/.remember/now.md +0 -0
  8. vectormesh-0.2.0/.remember/recent.md +8 -0
  9. vectormesh-0.2.0/.remember/today-2026-06-05.done.md +7 -0
  10. vectormesh-0.2.0/.remember/today-2026-06-06.md +5 -0
  11. vectormesh-0.2.0/Makefile +180 -0
  12. vectormesh-0.2.0/PKG-INFO +479 -0
  13. vectormesh-0.2.0/README.md +457 -0
  14. vectormesh-0.2.0/notebooks/0_vectorizer.ipynb +506 -0
  15. vectormesh-0.2.0/notebooks/1_training.ipynb +705 -0
  16. vectormesh-0.2.0/notebooks/2_design.ipynb +448 -0
  17. vectormesh-0.2.0/notebooks/3_moe.ipynb +201 -0
  18. vectormesh-0.2.0/notebooks/img/distribution_of_chunk_size.png +0 -0
  19. vectormesh-0.2.0/notebooks/img/regex_stats.png +0 -0
  20. vectormesh-0.2.0/pyproject.toml +88 -0
  21. vectormesh-0.2.0/references/Book of Monads (2021).pdf +0 -0
  22. vectormesh-0.2.0/references/Category Theory for Programmers (2019).pdf +0 -0
  23. vectormesh-0.2.0/references/Highway Networks.pdf +0 -0
  24. vectormesh-0.2.0/references/Outrageously Large Neural Networks The Sparsely-Gated Mixture-of-Experts Layer.pdf +0 -0
  25. vectormesh-0.2.0/scripts/build_dataset.py +28 -0
  26. vectormesh-0.2.0/scripts/create_cache_aktes.py +52 -0
  27. vectormesh-0.2.0/scripts/create_cache_imdb.py +38 -0
  28. vectormesh-0.2.0/scripts/embed_debertav3.py +48 -0
  29. vectormesh-0.2.0/scripts/embed_legal_dutch.py +49 -0
  30. vectormesh-0.2.0/scripts/embed_multilegal.py +48 -0
  31. vectormesh-0.2.0/scripts/train_moe.py +95 -0
  32. vectormesh-0.2.0/src/vectormesh/__init__.py +28 -0
  33. vectormesh-0.2.0/src/vectormesh/components/__init__.py +36 -0
  34. vectormesh-0.2.0/src/vectormesh/components/aggregation.py +95 -0
  35. vectormesh-0.2.0/src/vectormesh/components/connectors.py +38 -0
  36. vectormesh-0.2.0/src/vectormesh/components/gating.py +124 -0
  37. vectormesh-0.2.0/src/vectormesh/components/metrics.py +121 -0
  38. vectormesh-0.2.0/src/vectormesh/components/neural.py +125 -0
  39. vectormesh-0.2.0/src/vectormesh/components/padding.py +34 -0
  40. vectormesh-0.2.0/src/vectormesh/components/pipelines.py +43 -0
  41. vectormesh-0.2.0/src/vectormesh/data/__init__.py +17 -0
  42. vectormesh-0.2.0/src/vectormesh/data/cache.py +208 -0
  43. vectormesh-0.2.0/src/vectormesh/data/dataset.py +237 -0
  44. vectormesh-0.2.0/src/vectormesh/data/vectorizers.py +503 -0
  45. vectormesh-0.2.0/src/vectormesh/types.py +52 -0
  46. vectormesh-0.2.0/uv.lock +5534 -0
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv run *)",
5
+ "Bash(python -c \"import mltrainer; print\\(mltrainer.__file__\\)\")",
6
+ "WebFetch(domain:docs.astral.sh)"
7
+ ]
8
+ }
9
+ }
@@ -0,0 +1,38 @@
1
+ # Project data
2
+ assets/**
3
+ artefacts/**
4
+
5
+ # Python
6
+ **/__pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+
12
+ # Testing
13
+ .pytest_cache/
14
+ htmlcov/
15
+ .coverage
16
+ .coverage.*
17
+ *.cover
18
+
19
+ # VectorMesh caches
20
+ .vmcache/
21
+
22
+ # IDE
23
+ .vscode/
24
+ .idea/
25
+ *.swp
26
+ *.swo
27
+ *~
28
+
29
+ # OS
30
+ .DS_Store
31
+ Thumbs.db
32
+ dev/*
33
+ *.log
34
+ .lycheecache
35
+ **/tmp/*
36
+ **/demo/*
37
+ .notebookcache/
38
+ logs/**
@@ -0,0 +1,28 @@
1
+ pre-commit:
2
+ commands:
3
+ notebooktester:
4
+ priority: 1
5
+ glob: '*.ipynb'
6
+ run: notebooktester notebooks -v -t 240
7
+ clean-jupyter:
8
+ priority: 2
9
+ files: git ls-files
10
+ glob: '*.ipynb'
11
+ run: jupyter nbconvert --clear-output --inplace {staged_files}
12
+ stage_fixed: true
13
+ format:
14
+ priority: 2
15
+ files: git ls-files
16
+ glob: '*.{py, ipynb}'
17
+ run: ruff format {staged_files}
18
+ stage_fixed: true
19
+ ruff:
20
+ priority: 2
21
+ files: git ls-files
22
+ glob: '*.py'
23
+ run: ruff check {staged_files} --fix
24
+ typecheck:
25
+ priority: 2
26
+ files: git ls-files
27
+ glob: '*.py'
28
+ run: ty check {staged_files}
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1 @@
1
+ *
@@ -0,0 +1,3 @@
1
+ # Archive
2
+
3
+ ```
File without changes
@@ -0,0 +1,8 @@
1
+ # Recent
2
+
3
+ ```
4
+
5
+ # Recent
6
+
7
+ ## 2026-06-05
8
+ Fixed tokenizer overflow and 8192-token context crash in vectorizers.py with max_length override to base class. Refactored _effective_max_length logic with type hints improvements. Implemented IMDB sentiment detection (43-feature binary vectors, pattern matching, adjective analysis).
@@ -0,0 +1,7 @@
1
+ ## 18:49 | main
2
+ Fixed tokenizer overflow (pad_token_id vocab index) & 8192-token context crash in vectorizers.py; added max_length override to Vectorizer class (cap seq length).
3
+ ## 19:01 | main
4
+ vectorizers.py: refactored _effective_max_length→BaseVectorizer; type hints int→Optional[int]; removed Vectorizer override; verified small_demo.py
5
+
6
+ ## 21:04 | main
7
+ vectorizers.py: added build_imdb_review_pattern, harmonize_imdb_match (IMDB sentiment); 43-feature binary vectors, sentiment adjective matching; verified IMDB data
@@ -0,0 +1,5 @@
1
+ ## 08:57 | main
2
+ Fixed type linting types.py (BaseComponent.forward LSP); researched Liskov Substitution parameter contravariance; exploring pyproject.toml type checker config
3
+
4
+ ## 21:12-21:54 | main
5
+ Added MaskedMeanAggregator (aggregation.py; mask FixedPadding zeros; prevent embedding dilution); implemented 5 MoE fixes (dtype, 2D, pop-CV, L_load, dense-importance; gating.py + MoEAuxLoss train_moe.py); diagnosed sparse routing unsuitable dataset scale; began dense MoE refactor; refactored SoftMoE dims-agnostic (gating.py); added TransformerBlock expert (inlined MultiheadAttention + _build_padding_mask, neural.py); updated train_moe.py 3D pipeline (SoftMoE experts + MaskedMeanAgg); validated padding mask prevents real-position contamination
@@ -0,0 +1,180 @@
1
+ # ===================================================================
2
+ # VectorMesh - Remote Transfer Makefile
3
+ # ===================================================================
4
+ # This Makefile provides commands for transferring assets and
5
+ # artefacts between local and remote servers using SCP.
6
+ # ===================================================================
7
+
8
+ # Configuration Variables
9
+ # Customize these for your environment
10
+ USERNAME := rgrouls
11
+ IP := 145.38.191.144
12
+ REMOTE_FOLDER := /home/rgrouls/vectormesh
13
+
14
+ # Local directories
15
+ LOCAL_ASSETS := assets
16
+ LOCAL_ARTEFACTS := artefacts
17
+
18
+ # Remote directories (relative to REMOTE_FOLDER)
19
+ REMOTE_ASSETS := $(REMOTE_FOLDER)/assets
20
+ REMOTE_ARTEFACTS := $(REMOTE_FOLDER)/artefacts
21
+
22
+ # SCP options
23
+ SCP_OPTS := -r -C -p
24
+
25
+ # ===================================================================
26
+ # Targets
27
+ # ===================================================================
28
+
29
+ .PHONY: help scp-push scp-pull check-config test-connection
30
+
31
+ # Default target - show help
32
+ help:
33
+ @echo "====================================================================="
34
+ @echo "VectorMesh Remote Transfer Commands"
35
+ @echo "====================================================================="
36
+ @echo ""
37
+ @echo "Configuration:"
38
+ @echo " USERNAME : $(USERNAME)"
39
+ @echo " IP : $(IP)"
40
+ @echo " REMOTE_FOLDER : $(REMOTE_FOLDER)"
41
+ @echo ""
42
+ @echo "Available targets:"
43
+ @echo " make help - Show this help message"
44
+ @echo " make check-config - Verify configuration variables"
45
+ @echo " make test-connection - Test SSH connection to remote server"
46
+ @echo ""
47
+ @echo "Push (Local → Remote):"
48
+ @echo " make scp-push - Push all aktes_* folders from local assets/"
49
+ @echo " make scp-push FOLDER=name - Push assets/name to remote assets/"
50
+ @echo ""
51
+ @echo "Pull (Remote → Local):"
52
+ @echo " make scp-pull - Pull all files from remote artefacts/"
53
+ @echo " make scp-pull FOLDER=name - Pull remote artefacts/name to local artefacts/"
54
+ @echo ""
55
+ @echo "Examples:"
56
+ @echo " make scp-push # Push all aktes_* folders"
57
+ @echo " make scp-push FOLDER=aktes_theshold_50_d97342 # Push assets/aktes_theshold_50_d97342"
58
+ @echo " make scp-pull # Pull all remote artefacts"
59
+ @echo " make scp-pull FOLDER=results_2024 # Pull remote artefacts/results_2024"
60
+ @echo ""
61
+ @echo "====================================================================="
62
+
63
+ # Check configuration
64
+ check-config:
65
+ @echo "Checking configuration..."
66
+ @if [ "$(IP)" = "your.server.ip" ]; then \
67
+ echo "ERROR: Please set IP variable"; \
68
+ echo "Usage: make scp-push IP=your.server.ip"; \
69
+ exit 1; \
70
+ fi
71
+ @if [ -z "$(USERNAME)" ]; then \
72
+ echo "ERROR: USERNAME not set"; \
73
+ exit 1; \
74
+ fi
75
+ @if [ -z "$(REMOTE_FOLDER)" ]; then \
76
+ echo "ERROR: REMOTE_FOLDER not set"; \
77
+ exit 1; \
78
+ fi
79
+ @echo "Configuration OK"
80
+ @echo " Target: $(USERNAME)@$(IP):$(REMOTE_FOLDER)"
81
+
82
+ # Test SSH connection
83
+ test-connection: check-config
84
+ @echo "Testing connection to $(USERNAME)@$(IP)..."
85
+ @ssh -o ConnectTimeout=5 -o BatchMode=yes $(USERNAME)@$(IP) "echo 'Connection successful!'" || \
86
+ (echo "ERROR: Cannot connect to server. Check IP, USERNAME, and SSH keys."; exit 1)
87
+
88
+ # Push folders to remote assets directory
89
+ # Usage: make scp-push [FOLDER=folder_name]
90
+ # If FOLDER is set, push only that folder. Otherwise push all aktes_* folders.
91
+ scp-push: check-config
92
+ @if [ ! -d "$(LOCAL_ASSETS)" ]; then \
93
+ echo "ERROR: Local assets directory not found: $(LOCAL_ASSETS)"; \
94
+ exit 1; \
95
+ fi
96
+ @# Create remote assets directory if it doesn't exist
97
+ @echo "Ensuring remote directory exists..."
98
+ @ssh $(USERNAME)@$(IP) "mkdir -p $(REMOTE_ASSETS)"
99
+ ifdef FOLDER
100
+ @echo "====================================================================="
101
+ @echo "Pushing $(LOCAL_ASSETS)/$(FOLDER) to $(USERNAME)@$(IP):$(REMOTE_ASSETS)/"
102
+ @echo "====================================================================="
103
+ @if [ ! -d "$(LOCAL_ASSETS)/$(FOLDER)" ]; then \
104
+ echo "ERROR: Folder not found: $(LOCAL_ASSETS)/$(FOLDER)"; \
105
+ exit 1; \
106
+ fi
107
+ @echo "Transferring $(FOLDER)..."
108
+ @scp $(SCP_OPTS) "$(LOCAL_ASSETS)/$(FOLDER)" $(USERNAME)@$(IP):$(REMOTE_ASSETS)/
109
+ @echo ""
110
+ @echo "Transfer complete!"
111
+ else
112
+ @echo "====================================================================="
113
+ @echo "Pushing all aktes_* folders to $(USERNAME)@$(IP):$(REMOTE_ASSETS)/"
114
+ @echo "====================================================================="
115
+ @FOLDERS=$$(find $(LOCAL_ASSETS) -maxdepth 1 -type d -name "aktes_*" 2>/dev/null); \
116
+ if [ -z "$$FOLDERS" ]; then \
117
+ echo "WARNING: No folders matching 'aktes_*' found in $(LOCAL_ASSETS)"; \
118
+ else \
119
+ for folder in $$FOLDERS; do \
120
+ echo "Transferring $$folder..."; \
121
+ scp $(SCP_OPTS) "$$folder" $(USERNAME)@$(IP):$(REMOTE_ASSETS)/ || exit 1; \
122
+ done; \
123
+ echo ""; \
124
+ echo "Transfer complete!"; \
125
+ fi
126
+ endif
127
+
128
+ # Pull artefacts from remote to local
129
+ # Usage: make scp-pull [FOLDER=folder_name]
130
+ # If FOLDER is set, pull only that folder. Otherwise pull all files.
131
+ scp-pull: check-config
132
+ @# Create local artefacts directory if it doesn't exist
133
+ @mkdir -p $(LOCAL_ARTEFACTS)
134
+ @# Check if remote directory exists
135
+ @echo "Checking remote directory..."
136
+ @ssh $(USERNAME)@$(IP) "[ -d $(REMOTE_ARTEFACTS) ]" || \
137
+ (echo "ERROR: Remote artefacts directory not found: $(REMOTE_ARTEFACTS)"; exit 1)
138
+ ifdef FOLDER
139
+ @echo "====================================================================="
140
+ @echo "Pulling $(FOLDER) from $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/"
141
+ @echo "====================================================================="
142
+ @echo "Transferring $(FOLDER)..."
143
+ @scp $(SCP_OPTS) $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/$(FOLDER) $(LOCAL_ARTEFACTS)/ || \
144
+ (echo "ERROR: Folder not found or transfer failed"; exit 1)
145
+ @echo ""
146
+ @echo "Transfer complete! Files saved to $(LOCAL_ARTEFACTS)/"
147
+ else
148
+ @echo "====================================================================="
149
+ @echo "Pulling all files from $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/"
150
+ @echo "====================================================================="
151
+ @echo "Transferring artefacts/*..."
152
+ @scp $(SCP_OPTS) $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/* $(LOCAL_ARTEFACTS)/ || \
153
+ (echo "WARNING: No files found or transfer failed"; exit 0)
154
+ @echo ""
155
+ @echo "Transfer complete! Files saved to $(LOCAL_ARTEFACTS)/"
156
+ endif
157
+
158
+ # Utility target to list remote files
159
+ list-remote: check-config
160
+ @echo "====================================================================="
161
+ @echo "Remote Directory Listing"
162
+ @echo "====================================================================="
163
+ @echo ""
164
+ @echo "Assets ($(REMOTE_ASSETS)):"
165
+ @ssh $(USERNAME)@$(IP) "ls -lh $(REMOTE_ASSETS) 2>/dev/null || echo 'Directory not found'"
166
+ @echo ""
167
+ @echo "Artefacts ($(REMOTE_ARTEFACTS)):"
168
+ @ssh $(USERNAME)@$(IP) "ls -lh $(REMOTE_ARTEFACTS) 2>/dev/null || echo 'Directory not found'"
169
+
170
+ # Utility target to list local files
171
+ list-local:
172
+ @echo "====================================================================="
173
+ @echo "Local Directory Listing"
174
+ @echo "====================================================================="
175
+ @echo ""
176
+ @echo "Assets ($(LOCAL_ASSETS)):"
177
+ @ls -lh $(LOCAL_ASSETS) 2>/dev/null || echo "Directory not found"
178
+ @echo ""
179
+ @echo "Artefacts ($(LOCAL_ARTEFACTS)):"
180
+ @ls -lh $(LOCAL_ARTEFACTS) 2>/dev/null || echo "Directory not found"