swarmauri_embedding_mlm 0.8.2.dev24__tar.gz → 0.8.3.dev3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {swarmauri_embedding_mlm-0.8.2.dev24 → swarmauri_embedding_mlm-0.8.3.dev3}/PKG-INFO +16 -14
- {swarmauri_embedding_mlm-0.8.2.dev24 → swarmauri_embedding_mlm-0.8.3.dev3}/README.md +6 -9
- {swarmauri_embedding_mlm-0.8.2.dev24 → swarmauri_embedding_mlm-0.8.3.dev3}/pyproject.toml +10 -5
- {swarmauri_embedding_mlm-0.8.2.dev24 → swarmauri_embedding_mlm-0.8.3.dev3}/LICENSE +0 -0
- {swarmauri_embedding_mlm-0.8.2.dev24 → swarmauri_embedding_mlm-0.8.3.dev3}/swarmauri_embedding_mlm/MlmEmbedding.py +0 -0
- {swarmauri_embedding_mlm-0.8.2.dev24 → swarmauri_embedding_mlm-0.8.3.dev3}/swarmauri_embedding_mlm/__init__.py +0 -0
|
@@ -1,21 +1,26 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: swarmauri_embedding_mlm
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.3.dev3
|
|
4
4
|
Summary: example community package
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Keywords: swarmauri,embedding,mlm,example,community,package
|
|
8
8
|
Author: Jacob Stewart
|
|
9
9
|
Author-email: jacob@swarmauri.com
|
|
10
|
-
Requires-Python: >=3.10,<3.
|
|
10
|
+
Requires-Python: >=3.10,<3.15
|
|
11
11
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
12
|
Classifier: Natural Language :: English
|
|
16
13
|
Classifier: Development Status :: 3 - Alpha
|
|
17
14
|
Classifier: Intended Audience :: Developers
|
|
18
15
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
24
|
Requires-Dist: swarmauri_base
|
|
20
25
|
Requires-Dist: swarmauri_core
|
|
21
26
|
Requires-Dist: swarmauri_standard
|
|
@@ -23,24 +28,21 @@ Requires-Dist: torch (>=2.6.0)
|
|
|
23
28
|
Requires-Dist: transformers (>=4.49.0)
|
|
24
29
|
Description-Content-Type: text/markdown
|
|
25
30
|
|
|
26
|
-
|
|
27
|
-

|
|
31
|
+

|
|
28
32
|
|
|
29
33
|
<p align="center">
|
|
30
|
-
<a href="https://
|
|
31
|
-
<img src="https://
|
|
34
|
+
<a href="https://pepy.tech/project/swarmauri_embedding_mlm/">
|
|
35
|
+
<img src="https://static.pepy.tech/badge/swarmauri_embedding_mlm/month" alt="PyPI - Downloads"/></a>
|
|
32
36
|
<a href="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm/">
|
|
33
37
|
<img alt="Hits" src="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm.svg"/></a>
|
|
34
38
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
35
|
-
<img src="https://img.shields.io/
|
|
39
|
+
<img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="PyPI - Python Version"/></a>
|
|
36
40
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
37
41
|
<img src="https://img.shields.io/pypi/l/swarmauri_embedding_mlm" alt="PyPI - License"/></a>
|
|
38
42
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
39
43
|
<img src="https://img.shields.io/pypi/v/swarmauri_embedding_mlm?label=swarmauri_embedding_mlm&color=green" alt="PyPI - swarmauri_embedding_mlm"/></a>
|
|
40
44
|
</p>
|
|
41
45
|
|
|
42
|
-
---
|
|
43
|
-
|
|
44
46
|
# Swarmauri Embedding MLM
|
|
45
47
|
|
|
46
48
|
Trainable embedding provider that fine-tunes a Hugging Face masked language model (MLM) end-to-end so Swarmauri agents can produce contextual document vectors without leaving the framework.
|
|
@@ -58,7 +60,7 @@ Trainable embedding provider that fine-tunes a Hugging Face masked language mode
|
|
|
58
60
|
- Python 3.10 or newer.
|
|
59
61
|
- PyTorch with CUDA support if you plan to train on GPU (the class falls back to CPU automatically).
|
|
60
62
|
- Access to the Hugging Face model hub for downloading `embedding_name`. Set `HF_HOME`, proxies, or tokens if your environment requires authentication.
|
|
61
|
-
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420
|
|
63
|
+
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420 MB).
|
|
62
64
|
|
|
63
65
|
## Installation
|
|
64
66
|
|
|
@@ -146,7 +148,7 @@ embedding = restored.infer_vector("Reuse the trained weights instantly")
|
|
|
146
148
|
## Operational Tips
|
|
147
149
|
|
|
148
150
|
- Batch and sequence length drive GPU memory usage; reduce `batch_size` or `max_length` in tokenizer calls when running on constrained hardware.
|
|
149
|
-
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings
|
|
151
|
+
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings—useful for one-off adaptation jobs.
|
|
150
152
|
- When training on large corpora, stream documents from a generator, chunk them, or wrap the `.fit` call in your own epoch loop.
|
|
151
153
|
- Run `extract_features()` to audit the tokenizer vocabulary (helpful when debugging domain token coverage).
|
|
152
154
|
- Combine the generated vectors with Swarmauri vector stores (Redis, Qdrant, etc.) to build end-to-end retrieval pipelines.
|
|
@@ -1,21 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-

|
|
1
|
+

|
|
3
2
|
|
|
4
3
|
<p align="center">
|
|
5
|
-
<a href="https://
|
|
6
|
-
<img src="https://
|
|
4
|
+
<a href="https://pepy.tech/project/swarmauri_embedding_mlm/">
|
|
5
|
+
<img src="https://static.pepy.tech/badge/swarmauri_embedding_mlm/month" alt="PyPI - Downloads"/></a>
|
|
7
6
|
<a href="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm/">
|
|
8
7
|
<img alt="Hits" src="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm.svg"/></a>
|
|
9
8
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
10
|
-
<img src="https://img.shields.io/
|
|
9
|
+
<img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="PyPI - Python Version"/></a>
|
|
11
10
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
12
11
|
<img src="https://img.shields.io/pypi/l/swarmauri_embedding_mlm" alt="PyPI - License"/></a>
|
|
13
12
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
14
13
|
<img src="https://img.shields.io/pypi/v/swarmauri_embedding_mlm?label=swarmauri_embedding_mlm&color=green" alt="PyPI - swarmauri_embedding_mlm"/></a>
|
|
15
14
|
</p>
|
|
16
15
|
|
|
17
|
-
---
|
|
18
|
-
|
|
19
16
|
# Swarmauri Embedding MLM
|
|
20
17
|
|
|
21
18
|
Trainable embedding provider that fine-tunes a Hugging Face masked language model (MLM) end-to-end so Swarmauri agents can produce contextual document vectors without leaving the framework.
|
|
@@ -33,7 +30,7 @@ Trainable embedding provider that fine-tunes a Hugging Face masked language mode
|
|
|
33
30
|
- Python 3.10 or newer.
|
|
34
31
|
- PyTorch with CUDA support if you plan to train on GPU (the class falls back to CPU automatically).
|
|
35
32
|
- Access to the Hugging Face model hub for downloading `embedding_name`. Set `HF_HOME`, proxies, or tokens if your environment requires authentication.
|
|
36
|
-
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420
|
|
33
|
+
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420 MB).
|
|
37
34
|
|
|
38
35
|
## Installation
|
|
39
36
|
|
|
@@ -121,7 +118,7 @@ embedding = restored.infer_vector("Reuse the trained weights instantly")
|
|
|
121
118
|
## Operational Tips
|
|
122
119
|
|
|
123
120
|
- Batch and sequence length drive GPU memory usage; reduce `batch_size` or `max_length` in tokenizer calls when running on constrained hardware.
|
|
124
|
-
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings
|
|
121
|
+
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings—useful for one-off adaptation jobs.
|
|
125
122
|
- When training on large corpora, stream documents from a generator, chunk them, or wrap the `.fit` call in your own epoch loop.
|
|
126
123
|
- Run `extract_features()` to audit the tokenizer vocabulary (helpful when debugging domain token coverage).
|
|
127
124
|
- Combine the generated vectors with Swarmauri vector stores (Redis, Qdrant, etc.) to build end-to-end retrieval pipelines.
|
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "swarmauri_embedding_mlm"
|
|
3
|
-
version = "0.8.
|
|
3
|
+
version = "0.8.3.dev3"
|
|
4
4
|
description = "example community package"
|
|
5
5
|
license = "Apache-2.0"
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
repository = "http://github.com/swarmauri/swarmauri-sdk"
|
|
8
|
-
requires-python = ">=3.10,<3.
|
|
8
|
+
requires-python = ">=3.10,<3.15"
|
|
9
9
|
classifiers = [
|
|
10
10
|
"License :: OSI Approved :: Apache Software License",
|
|
11
|
-
"Programming Language :: Python :: 3.10",
|
|
12
|
-
"Programming Language :: Python :: 3.11",
|
|
13
|
-
"Programming Language :: Python :: 3.12",
|
|
14
11
|
"Natural Language :: English",
|
|
15
12
|
"Development Status :: 3 - Alpha",
|
|
16
13
|
"Intended Audience :: Developers",
|
|
17
14
|
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
15
|
+
"Programming Language :: Python",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Programming Language :: Python :: 3.14",
|
|
18
23
|
]
|
|
19
24
|
authors = [{ name = "Jacob Stewart", email = "jacob@swarmauri.com" }]
|
|
20
25
|
dependencies = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|