vectormesh 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectormesh-0.2.0/.claude/settings.local.json +9 -0
- vectormesh-0.2.0/.gitignore +38 -0
- vectormesh-0.2.0/.lefthook.yml +28 -0
- vectormesh-0.2.0/.python-version +1 -0
- vectormesh-0.2.0/.remember/.gitignore +1 -0
- vectormesh-0.2.0/.remember/archive.md +3 -0
- vectormesh-0.2.0/.remember/now.md +0 -0
- vectormesh-0.2.0/.remember/recent.md +8 -0
- vectormesh-0.2.0/.remember/today-2026-06-05.done.md +7 -0
- vectormesh-0.2.0/.remember/today-2026-06-06.md +5 -0
- vectormesh-0.2.0/Makefile +180 -0
- vectormesh-0.2.0/PKG-INFO +479 -0
- vectormesh-0.2.0/README.md +457 -0
- vectormesh-0.2.0/notebooks/0_vectorizer.ipynb +506 -0
- vectormesh-0.2.0/notebooks/1_training.ipynb +705 -0
- vectormesh-0.2.0/notebooks/2_design.ipynb +448 -0
- vectormesh-0.2.0/notebooks/3_moe.ipynb +201 -0
- vectormesh-0.2.0/notebooks/img/distribution_of_chunk_size.png +0 -0
- vectormesh-0.2.0/notebooks/img/regex_stats.png +0 -0
- vectormesh-0.2.0/pyproject.toml +88 -0
- vectormesh-0.2.0/references/Book of Monads (2021).pdf +0 -0
- vectormesh-0.2.0/references/Category Theory for Programmers (2019).pdf +0 -0
- vectormesh-0.2.0/references/Highway Networks.pdf +0 -0
- vectormesh-0.2.0/references/Outrageously Large Neural Networks The Sparsely-Gated Mixture-of-Experts Layer.pdf +0 -0
- vectormesh-0.2.0/scripts/build_dataset.py +28 -0
- vectormesh-0.2.0/scripts/create_cache_aktes.py +52 -0
- vectormesh-0.2.0/scripts/create_cache_imdb.py +38 -0
- vectormesh-0.2.0/scripts/embed_debertav3.py +48 -0
- vectormesh-0.2.0/scripts/embed_legal_dutch.py +49 -0
- vectormesh-0.2.0/scripts/embed_multilegal.py +48 -0
- vectormesh-0.2.0/scripts/train_moe.py +95 -0
- vectormesh-0.2.0/src/vectormesh/__init__.py +28 -0
- vectormesh-0.2.0/src/vectormesh/components/__init__.py +36 -0
- vectormesh-0.2.0/src/vectormesh/components/aggregation.py +95 -0
- vectormesh-0.2.0/src/vectormesh/components/connectors.py +38 -0
- vectormesh-0.2.0/src/vectormesh/components/gating.py +124 -0
- vectormesh-0.2.0/src/vectormesh/components/metrics.py +121 -0
- vectormesh-0.2.0/src/vectormesh/components/neural.py +125 -0
- vectormesh-0.2.0/src/vectormesh/components/padding.py +34 -0
- vectormesh-0.2.0/src/vectormesh/components/pipelines.py +43 -0
- vectormesh-0.2.0/src/vectormesh/data/__init__.py +17 -0
- vectormesh-0.2.0/src/vectormesh/data/cache.py +208 -0
- vectormesh-0.2.0/src/vectormesh/data/dataset.py +237 -0
- vectormesh-0.2.0/src/vectormesh/data/vectorizers.py +503 -0
- vectormesh-0.2.0/src/vectormesh/types.py +52 -0
- vectormesh-0.2.0/uv.lock +5534 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Project data
|
|
2
|
+
assets/**
|
|
3
|
+
artefacts/**
|
|
4
|
+
|
|
5
|
+
# Python
|
|
6
|
+
**/__pycache__/
|
|
7
|
+
*.py[cod]
|
|
8
|
+
*$py.class
|
|
9
|
+
*.so
|
|
10
|
+
.Python
|
|
11
|
+
|
|
12
|
+
# Testing
|
|
13
|
+
.pytest_cache/
|
|
14
|
+
htmlcov/
|
|
15
|
+
.coverage
|
|
16
|
+
.coverage.*
|
|
17
|
+
*.cover
|
|
18
|
+
|
|
19
|
+
# VectorMesh caches
|
|
20
|
+
.vmcache/
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.vscode/
|
|
24
|
+
.idea/
|
|
25
|
+
*.swp
|
|
26
|
+
*.swo
|
|
27
|
+
*~
|
|
28
|
+
|
|
29
|
+
# OS
|
|
30
|
+
.DS_Store
|
|
31
|
+
Thumbs.db
|
|
32
|
+
dev/*
|
|
33
|
+
*.log
|
|
34
|
+
.lycheecache
|
|
35
|
+
**/tmp/*
|
|
36
|
+
**/demo/*
|
|
37
|
+
.notebookcache/
|
|
38
|
+
logs/**
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
pre-commit:
|
|
2
|
+
commands:
|
|
3
|
+
notebooktester:
|
|
4
|
+
priority: 1
|
|
5
|
+
glob: '*.ipynb'
|
|
6
|
+
run: notebooktester notebooks -v -t 240
|
|
7
|
+
clean-jupyter:
|
|
8
|
+
priority: 2
|
|
9
|
+
files: git ls-files
|
|
10
|
+
glob: '*.ipynb'
|
|
11
|
+
run: jupyter nbconvert --clear-output --inplace {staged_files}
|
|
12
|
+
stage_fixed: true
|
|
13
|
+
format:
|
|
14
|
+
priority: 2
|
|
15
|
+
files: git ls-files
|
|
16
|
+
glob: '*.{py, ipynb}'
|
|
17
|
+
run: ruff format {staged_files}
|
|
18
|
+
stage_fixed: true
|
|
19
|
+
ruff:
|
|
20
|
+
priority: 2
|
|
21
|
+
files: git ls-files
|
|
22
|
+
glob: '*.py'
|
|
23
|
+
run: ruff check {staged_files} --fix
|
|
24
|
+
typecheck:
|
|
25
|
+
priority: 2
|
|
26
|
+
files: git ls-files
|
|
27
|
+
glob: '*.py'
|
|
28
|
+
run: ty check {staged_files}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.12
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
*
|
|
File without changes
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Recent
|
|
2
|
+
|
|
3
|
+
```
|
|
4
|
+
|
|
5
|
+
# Recent
|
|
6
|
+
|
|
7
|
+
## 2026-06-05
|
|
8
|
+
Fixed tokenizer overflow and 8192-token context crash in vectorizers.py with max_length override to base class. Refactored _effective_max_length logic with type hints improvements. Implemented IMDB sentiment detection (43-feature binary vectors, pattern matching, adjective analysis).
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
## 18:49 | main
|
|
2
|
+
Fixed tokenizer overflow (pad_token_id vocab index) & 8192-token context crash in vectorizers.py; added max_length override to Vectorizer class (cap seq length).
|
|
3
|
+
## 19:01 | main
|
|
4
|
+
vectorizers.py: refactored _effective_max_length→BaseVectorizer; type hints int→Optional[int]; removed Vectorizer override; verified small_demo.py
|
|
5
|
+
|
|
6
|
+
## 21:04 | main
|
|
7
|
+
vectorizers.py: added build_imdb_review_pattern, harmonize_imdb_match (IMDB sentiment); 43-feature binary vectors, sentiment adjective matching; verified IMDB data
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
## 08:57 | main
|
|
2
|
+
Fixed type linting types.py (BaseComponent.forward LSP); researched Liskov Substitution parameter contravariance; exploring pyproject.toml type checker config
|
|
3
|
+
|
|
4
|
+
## 21:12-21:54 | main
|
|
5
|
+
Added MaskedMeanAggregator (aggregation.py; mask FixedPadding zeros; prevent embedding dilution); implemented 5 MoE fixes (dtype, 2D, pop-CV, L_load, dense-importance; gating.py + MoEAuxLoss train_moe.py); diagnosed sparse routing unsuitable dataset scale; began dense MoE refactor; refactored SoftMoE dims-agnostic (gating.py); added TransformerBlock expert (inlined MultiheadAttention + _build_padding_mask, neural.py); updated train_moe.py 3D pipeline (SoftMoE experts + MaskedMeanAgg); validated padding mask prevents real-position contamination
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# ===================================================================
|
|
2
|
+
# VectorMesh - Remote Transfer Makefile
|
|
3
|
+
# ===================================================================
|
|
4
|
+
# This Makefile provides commands for transferring assets and
|
|
5
|
+
# artefacts between local and remote servers using SCP.
|
|
6
|
+
# ===================================================================
|
|
7
|
+
|
|
8
|
+
# Configuration Variables
|
|
9
|
+
# Customize these for your environment
|
|
10
|
+
USERNAME := rgrouls
|
|
11
|
+
IP := 145.38.191.144
|
|
12
|
+
REMOTE_FOLDER := /home/rgrouls/vectormesh
|
|
13
|
+
|
|
14
|
+
# Local directories
|
|
15
|
+
LOCAL_ASSETS := assets
|
|
16
|
+
LOCAL_ARTEFACTS := artefacts
|
|
17
|
+
|
|
18
|
+
# Remote directories (relative to REMOTE_FOLDER)
|
|
19
|
+
REMOTE_ASSETS := $(REMOTE_FOLDER)/assets
|
|
20
|
+
REMOTE_ARTEFACTS := $(REMOTE_FOLDER)/artefacts
|
|
21
|
+
|
|
22
|
+
# SCP options
|
|
23
|
+
SCP_OPTS := -r -C -p
|
|
24
|
+
|
|
25
|
+
# ===================================================================
|
|
26
|
+
# Targets
|
|
27
|
+
# ===================================================================
|
|
28
|
+
|
|
29
|
+
.PHONY: help scp-push scp-pull check-config test-connection
|
|
30
|
+
|
|
31
|
+
# Default target - show help
|
|
32
|
+
help:
|
|
33
|
+
@echo "====================================================================="
|
|
34
|
+
@echo "VectorMesh Remote Transfer Commands"
|
|
35
|
+
@echo "====================================================================="
|
|
36
|
+
@echo ""
|
|
37
|
+
@echo "Configuration:"
|
|
38
|
+
@echo " USERNAME : $(USERNAME)"
|
|
39
|
+
@echo " IP : $(IP)"
|
|
40
|
+
@echo " REMOTE_FOLDER : $(REMOTE_FOLDER)"
|
|
41
|
+
@echo ""
|
|
42
|
+
@echo "Available targets:"
|
|
43
|
+
@echo " make help - Show this help message"
|
|
44
|
+
@echo " make check-config - Verify configuration variables"
|
|
45
|
+
@echo " make test-connection - Test SSH connection to remote server"
|
|
46
|
+
@echo ""
|
|
47
|
+
@echo "Push (Local → Remote):"
|
|
48
|
+
@echo " make scp-push - Push all aktes_* folders from local assets/"
|
|
49
|
+
@echo " make scp-push FOLDER=name - Push assets/name to remote assets/"
|
|
50
|
+
@echo ""
|
|
51
|
+
@echo "Pull (Remote → Local):"
|
|
52
|
+
@echo " make scp-pull - Pull all files from remote artefacts/"
|
|
53
|
+
@echo " make scp-pull FOLDER=name - Pull remote artefacts/name to local artefacts/"
|
|
54
|
+
@echo ""
|
|
55
|
+
@echo "Examples:"
|
|
56
|
+
@echo " make scp-push # Push all aktes_* folders"
|
|
57
|
+
@echo " make scp-push FOLDER=aktes_theshold_50_d97342 # Push assets/aktes_theshold_50_d97342"
|
|
58
|
+
@echo " make scp-pull # Pull all remote artefacts"
|
|
59
|
+
@echo " make scp-pull FOLDER=results_2024 # Pull remote artefacts/results_2024"
|
|
60
|
+
@echo ""
|
|
61
|
+
@echo "====================================================================="
|
|
62
|
+
|
|
63
|
+
# Check configuration
|
|
64
|
+
check-config:
|
|
65
|
+
@echo "Checking configuration..."
|
|
66
|
+
@if [ "$(IP)" = "your.server.ip" ]; then \
|
|
67
|
+
echo "ERROR: Please set IP variable"; \
|
|
68
|
+
echo "Usage: make scp-push IP=your.server.ip"; \
|
|
69
|
+
exit 1; \
|
|
70
|
+
fi
|
|
71
|
+
@if [ -z "$(USERNAME)" ]; then \
|
|
72
|
+
echo "ERROR: USERNAME not set"; \
|
|
73
|
+
exit 1; \
|
|
74
|
+
fi
|
|
75
|
+
@if [ -z "$(REMOTE_FOLDER)" ]; then \
|
|
76
|
+
echo "ERROR: REMOTE_FOLDER not set"; \
|
|
77
|
+
exit 1; \
|
|
78
|
+
fi
|
|
79
|
+
@echo "Configuration OK"
|
|
80
|
+
@echo " Target: $(USERNAME)@$(IP):$(REMOTE_FOLDER)"
|
|
81
|
+
|
|
82
|
+
# Test SSH connection
|
|
83
|
+
test-connection: check-config
|
|
84
|
+
@echo "Testing connection to $(USERNAME)@$(IP)..."
|
|
85
|
+
@ssh -o ConnectTimeout=5 -o BatchMode=yes $(USERNAME)@$(IP) "echo 'Connection successful!'" || \
|
|
86
|
+
(echo "ERROR: Cannot connect to server. Check IP, USERNAME, and SSH keys."; exit 1)
|
|
87
|
+
|
|
88
|
+
# Push folders to remote assets directory
|
|
89
|
+
# Usage: make scp-push [FOLDER=folder_name]
|
|
90
|
+
# If FOLDER is set, push only that folder. Otherwise push all aktes_* folders.
|
|
91
|
+
scp-push: check-config
|
|
92
|
+
@if [ ! -d "$(LOCAL_ASSETS)" ]; then \
|
|
93
|
+
echo "ERROR: Local assets directory not found: $(LOCAL_ASSETS)"; \
|
|
94
|
+
exit 1; \
|
|
95
|
+
fi
|
|
96
|
+
@# Create remote assets directory if it doesn't exist
|
|
97
|
+
@echo "Ensuring remote directory exists..."
|
|
98
|
+
@ssh $(USERNAME)@$(IP) "mkdir -p $(REMOTE_ASSETS)"
|
|
99
|
+
ifdef FOLDER
|
|
100
|
+
@echo "====================================================================="
|
|
101
|
+
@echo "Pushing $(LOCAL_ASSETS)/$(FOLDER) to $(USERNAME)@$(IP):$(REMOTE_ASSETS)/"
|
|
102
|
+
@echo "====================================================================="
|
|
103
|
+
@if [ ! -d "$(LOCAL_ASSETS)/$(FOLDER)" ]; then \
|
|
104
|
+
echo "ERROR: Folder not found: $(LOCAL_ASSETS)/$(FOLDER)"; \
|
|
105
|
+
exit 1; \
|
|
106
|
+
fi
|
|
107
|
+
@echo "Transferring $(FOLDER)..."
|
|
108
|
+
@scp $(SCP_OPTS) "$(LOCAL_ASSETS)/$(FOLDER)" $(USERNAME)@$(IP):$(REMOTE_ASSETS)/
|
|
109
|
+
@echo ""
|
|
110
|
+
@echo "Transfer complete!"
|
|
111
|
+
else
|
|
112
|
+
@echo "====================================================================="
|
|
113
|
+
@echo "Pushing all aktes_* folders to $(USERNAME)@$(IP):$(REMOTE_ASSETS)/"
|
|
114
|
+
@echo "====================================================================="
|
|
115
|
+
@FOLDERS=$$(find $(LOCAL_ASSETS) -maxdepth 1 -type d -name "aktes_*" 2>/dev/null); \
|
|
116
|
+
if [ -z "$$FOLDERS" ]; then \
|
|
117
|
+
echo "WARNING: No folders matching 'aktes_*' found in $(LOCAL_ASSETS)"; \
|
|
118
|
+
else \
|
|
119
|
+
for folder in $$FOLDERS; do \
|
|
120
|
+
echo "Transferring $$folder..."; \
|
|
121
|
+
scp $(SCP_OPTS) "$$folder" $(USERNAME)@$(IP):$(REMOTE_ASSETS)/ || exit 1; \
|
|
122
|
+
done; \
|
|
123
|
+
echo ""; \
|
|
124
|
+
echo "Transfer complete!"; \
|
|
125
|
+
fi
|
|
126
|
+
endif
|
|
127
|
+
|
|
128
|
+
# Pull artefacts from remote to local
|
|
129
|
+
# Usage: make scp-pull [FOLDER=folder_name]
|
|
130
|
+
# If FOLDER is set, pull only that folder. Otherwise pull all files.
|
|
131
|
+
scp-pull: check-config
|
|
132
|
+
@# Create local artefacts directory if it doesn't exist
|
|
133
|
+
@mkdir -p $(LOCAL_ARTEFACTS)
|
|
134
|
+
@# Check if remote directory exists
|
|
135
|
+
@echo "Checking remote directory..."
|
|
136
|
+
@ssh $(USERNAME)@$(IP) "[ -d $(REMOTE_ARTEFACTS) ]" || \
|
|
137
|
+
(echo "ERROR: Remote artefacts directory not found: $(REMOTE_ARTEFACTS)"; exit 1)
|
|
138
|
+
ifdef FOLDER
|
|
139
|
+
@echo "====================================================================="
|
|
140
|
+
@echo "Pulling $(FOLDER) from $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/"
|
|
141
|
+
@echo "====================================================================="
|
|
142
|
+
@echo "Transferring $(FOLDER)..."
|
|
143
|
+
@scp $(SCP_OPTS) $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/$(FOLDER) $(LOCAL_ARTEFACTS)/ || \
|
|
144
|
+
(echo "ERROR: Folder not found or transfer failed"; exit 1)
|
|
145
|
+
@echo ""
|
|
146
|
+
@echo "Transfer complete! Files saved to $(LOCAL_ARTEFACTS)/"
|
|
147
|
+
else
|
|
148
|
+
@echo "====================================================================="
|
|
149
|
+
@echo "Pulling all files from $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/"
|
|
150
|
+
@echo "====================================================================="
|
|
151
|
+
@echo "Transferring artefacts/*..."
|
|
152
|
+
@scp $(SCP_OPTS) $(USERNAME)@$(IP):$(REMOTE_ARTEFACTS)/* $(LOCAL_ARTEFACTS)/ || \
|
|
153
|
+
(echo "WARNING: No files found or transfer failed"; exit 0)
|
|
154
|
+
@echo ""
|
|
155
|
+
@echo "Transfer complete! Files saved to $(LOCAL_ARTEFACTS)/"
|
|
156
|
+
endif
|
|
157
|
+
|
|
158
|
+
# Utility target to list remote files
|
|
159
|
+
list-remote: check-config
|
|
160
|
+
@echo "====================================================================="
|
|
161
|
+
@echo "Remote Directory Listing"
|
|
162
|
+
@echo "====================================================================="
|
|
163
|
+
@echo ""
|
|
164
|
+
@echo "Assets ($(REMOTE_ASSETS)):"
|
|
165
|
+
@ssh $(USERNAME)@$(IP) "ls -lh $(REMOTE_ASSETS) 2>/dev/null || echo 'Directory not found'"
|
|
166
|
+
@echo ""
|
|
167
|
+
@echo "Artefacts ($(REMOTE_ARTEFACTS)):"
|
|
168
|
+
@ssh $(USERNAME)@$(IP) "ls -lh $(REMOTE_ARTEFACTS) 2>/dev/null || echo 'Directory not found'"
|
|
169
|
+
|
|
170
|
+
# Utility target to list local files
|
|
171
|
+
list-local:
|
|
172
|
+
@echo "====================================================================="
|
|
173
|
+
@echo "Local Directory Listing"
|
|
174
|
+
@echo "====================================================================="
|
|
175
|
+
@echo ""
|
|
176
|
+
@echo "Assets ($(LOCAL_ASSETS)):"
|
|
177
|
+
@ls -lh $(LOCAL_ASSETS) 2>/dev/null || echo "Directory not found"
|
|
178
|
+
@echo ""
|
|
179
|
+
@echo "Artefacts ($(LOCAL_ARTEFACTS)):"
|
|
180
|
+
@ls -lh $(LOCAL_ARTEFACTS) 2>/dev/null || echo "Directory not found"
|