swarmauri_embedding_mlm 0.8.2.dev22__tar.gz → 0.8.3.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {swarmauri_embedding_mlm-0.8.2.dev22 → swarmauri_embedding_mlm-0.8.3.dev2}/PKG-INFO +16 -13
- {swarmauri_embedding_mlm-0.8.2.dev22 → swarmauri_embedding_mlm-0.8.3.dev2}/README.md +6 -8
- {swarmauri_embedding_mlm-0.8.2.dev22 → swarmauri_embedding_mlm-0.8.3.dev2}/pyproject.toml +10 -5
- {swarmauri_embedding_mlm-0.8.2.dev22 → swarmauri_embedding_mlm-0.8.3.dev2}/LICENSE +0 -0
- {swarmauri_embedding_mlm-0.8.2.dev22 → swarmauri_embedding_mlm-0.8.3.dev2}/swarmauri_embedding_mlm/MlmEmbedding.py +0 -0
- {swarmauri_embedding_mlm-0.8.2.dev22 → swarmauri_embedding_mlm-0.8.3.dev2}/swarmauri_embedding_mlm/__init__.py +0 -0
|
@@ -1,21 +1,26 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: swarmauri_embedding_mlm
|
|
3
|
-
Version: 0.8.
|
|
3
|
+
Version: 0.8.3.dev2
|
|
4
4
|
Summary: example community package
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
7
7
|
Keywords: swarmauri,embedding,mlm,example,community,package
|
|
8
8
|
Author: Jacob Stewart
|
|
9
9
|
Author-email: jacob@swarmauri.com
|
|
10
|
-
Requires-Python: >=3.10,<3.
|
|
10
|
+
Requires-Python: >=3.10,<3.15
|
|
11
11
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
15
12
|
Classifier: Natural Language :: English
|
|
16
13
|
Classifier: Development Status :: 3 - Alpha
|
|
17
14
|
Classifier: Intended Audience :: Developers
|
|
18
15
|
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
24
|
Requires-Dist: swarmauri_base
|
|
20
25
|
Requires-Dist: swarmauri_core
|
|
21
26
|
Requires-Dist: swarmauri_standard
|
|
@@ -23,22 +28,20 @@ Requires-Dist: torch (>=2.6.0)
|
|
|
23
28
|
Requires-Dist: transformers (>=4.49.0)
|
|
24
29
|
Description-Content-Type: text/markdown
|
|
25
30
|
|
|
26
|
-
|
|
27
|
-

|
|
31
|
+

|
|
28
32
|
|
|
29
33
|
<p align="center">
|
|
30
|
-
<a href="https://
|
|
31
|
-
<img src="https://
|
|
34
|
+
<a href="https://pepy.tech/project/swarmauri_embedding_mlm/">
|
|
35
|
+
<img src="https://static.pepy.tech/badge/swarmauri_embedding_mlm/month" alt="PyPI - Downloads"/></a>
|
|
32
36
|
<a href="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm/">
|
|
33
37
|
<img alt="Hits" src="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm.svg"/></a>
|
|
34
38
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
35
|
-
<img src="https://img.shields.io/
|
|
39
|
+
<img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="PyPI - Python Version"/></a>
|
|
36
40
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
37
41
|
<img src="https://img.shields.io/pypi/l/swarmauri_embedding_mlm" alt="PyPI - License"/></a>
|
|
38
42
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
39
43
|
<img src="https://img.shields.io/pypi/v/swarmauri_embedding_mlm?label=swarmauri_embedding_mlm&color=green" alt="PyPI - swarmauri_embedding_mlm"/></a>
|
|
40
44
|
</p>
|
|
41
|
-
|
|
42
45
|
---
|
|
43
46
|
|
|
44
47
|
# Swarmauri Embedding MLM
|
|
@@ -58,7 +61,7 @@ Trainable embedding provider that fine-tunes a Hugging Face masked language mode
|
|
|
58
61
|
- Python 3.10 or newer.
|
|
59
62
|
- PyTorch with CUDA support if you plan to train on GPU (the class falls back to CPU automatically).
|
|
60
63
|
- Access to the Hugging Face model hub for downloading `embedding_name`. Set `HF_HOME`, proxies, or tokens if your environment requires authentication.
|
|
61
|
-
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420
|
|
64
|
+
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420 MB).
|
|
62
65
|
|
|
63
66
|
## Installation
|
|
64
67
|
|
|
@@ -146,7 +149,7 @@ embedding = restored.infer_vector("Reuse the trained weights instantly")
|
|
|
146
149
|
## Operational Tips
|
|
147
150
|
|
|
148
151
|
- Batch and sequence length drive GPU memory usage; reduce `batch_size` or `max_length` in tokenizer calls when running on constrained hardware.
|
|
149
|
-
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings
|
|
152
|
+
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings—useful for one-off adaptation jobs.
|
|
150
153
|
- When training on large corpora, stream documents from a generator, chunk them, or wrap the `.fit` call in your own epoch loop.
|
|
151
154
|
- Run `extract_features()` to audit the tokenizer vocabulary (helpful when debugging domain token coverage).
|
|
152
155
|
- Combine the generated vectors with Swarmauri vector stores (Redis, Qdrant, etc.) to build end-to-end retrieval pipelines.
|
|
@@ -1,19 +1,17 @@
|
|
|
1
|
-
|
|
2
|
-

|
|
1
|
+

|
|
3
2
|
|
|
4
3
|
<p align="center">
|
|
5
|
-
<a href="https://
|
|
6
|
-
<img src="https://
|
|
4
|
+
<a href="https://pepy.tech/project/swarmauri_embedding_mlm/">
|
|
5
|
+
<img src="https://static.pepy.tech/badge/swarmauri_embedding_mlm/month" alt="PyPI - Downloads"/></a>
|
|
7
6
|
<a href="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm/">
|
|
8
7
|
<img alt="Hits" src="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm.svg"/></a>
|
|
9
8
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
10
|
-
<img src="https://img.shields.io/
|
|
9
|
+
<img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="PyPI - Python Version"/></a>
|
|
11
10
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
12
11
|
<img src="https://img.shields.io/pypi/l/swarmauri_embedding_mlm" alt="PyPI - License"/></a>
|
|
13
12
|
<a href="https://pypi.org/project/swarmauri_embedding_mlm/">
|
|
14
13
|
<img src="https://img.shields.io/pypi/v/swarmauri_embedding_mlm?label=swarmauri_embedding_mlm&color=green" alt="PyPI - swarmauri_embedding_mlm"/></a>
|
|
15
14
|
</p>
|
|
16
|
-
|
|
17
15
|
---
|
|
18
16
|
|
|
19
17
|
# Swarmauri Embedding MLM
|
|
@@ -33,7 +31,7 @@ Trainable embedding provider that fine-tunes a Hugging Face masked language mode
|
|
|
33
31
|
- Python 3.10 or newer.
|
|
34
32
|
- PyTorch with CUDA support if you plan to train on GPU (the class falls back to CPU automatically).
|
|
35
33
|
- Access to the Hugging Face model hub for downloading `embedding_name`. Set `HF_HOME`, proxies, or tokens if your environment requires authentication.
|
|
36
|
-
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420
|
|
34
|
+
- Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420 MB).
|
|
37
35
|
|
|
38
36
|
## Installation
|
|
39
37
|
|
|
@@ -121,7 +119,7 @@ embedding = restored.infer_vector("Reuse the trained weights instantly")
|
|
|
121
119
|
## Operational Tips
|
|
122
120
|
|
|
123
121
|
- Batch and sequence length drive GPU memory usage; reduce `batch_size` or `max_length` in tokenizer calls when running on constrained hardware.
|
|
124
|
-
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings
|
|
122
|
+
- `fit_transform` runs a full fine-tuning pass and immediately returns embeddings—useful for one-off adaptation jobs.
|
|
125
123
|
- When training on large corpora, stream documents from a generator, chunk them, or wrap the `.fit` call in your own epoch loop.
|
|
126
124
|
- Run `extract_features()` to audit the tokenizer vocabulary (helpful when debugging domain token coverage).
|
|
127
125
|
- Combine the generated vectors with Swarmauri vector stores (Redis, Qdrant, etc.) to build end-to-end retrieval pipelines.
|
|
@@ -1,20 +1,25 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "swarmauri_embedding_mlm"
|
|
3
|
-
version = "0.8.
|
|
3
|
+
version = "0.8.3.dev2"
|
|
4
4
|
description = "example community package"
|
|
5
5
|
license = "Apache-2.0"
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
repository = "http://github.com/swarmauri/swarmauri-sdk"
|
|
8
|
-
requires-python = ">=3.10,<3.
|
|
8
|
+
requires-python = ">=3.10,<3.15"
|
|
9
9
|
classifiers = [
|
|
10
10
|
"License :: OSI Approved :: Apache Software License",
|
|
11
|
-
"Programming Language :: Python :: 3.10",
|
|
12
|
-
"Programming Language :: Python :: 3.11",
|
|
13
|
-
"Programming Language :: Python :: 3.12",
|
|
14
11
|
"Natural Language :: English",
|
|
15
12
|
"Development Status :: 3 - Alpha",
|
|
16
13
|
"Intended Audience :: Developers",
|
|
17
14
|
"Topic :: Software Development :: Libraries :: Application Frameworks",
|
|
15
|
+
"Programming Language :: Python",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Programming Language :: Python :: 3.14",
|
|
18
23
|
]
|
|
19
24
|
authors = [{ name = "Jacob Stewart", email = "jacob@swarmauri.com" }]
|
|
20
25
|
dependencies = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|