swarmauri_embedding_mlm 0.8.2.dev22__tar.gz → 0.8.3.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,21 +1,26 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: swarmauri_embedding_mlm
3
- Version: 0.8.2.dev22
3
+ Version: 0.8.3.dev2
4
4
  Summary: example community package
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
7
7
  Keywords: swarmauri,embedding,mlm,example,community,package
8
8
  Author: Jacob Stewart
9
9
  Author-email: jacob@swarmauri.com
10
- Requires-Python: >=3.10,<3.13
10
+ Requires-Python: >=3.10,<3.15
11
11
  Classifier: License :: OSI Approved :: Apache Software License
12
- Classifier: Programming Language :: Python :: 3.10
13
- Classifier: Programming Language :: Python :: 3.11
14
- Classifier: Programming Language :: Python :: 3.12
15
12
  Classifier: Natural Language :: English
16
13
  Classifier: Development Status :: 3 - Alpha
17
14
  Classifier: Intended Audience :: Developers
18
15
  Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
16
+ Classifier: Programming Language :: Python
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3 :: Only
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Programming Language :: Python :: 3.14
19
24
  Requires-Dist: swarmauri_base
20
25
  Requires-Dist: swarmauri_core
21
26
  Requires-Dist: swarmauri_standard
@@ -23,22 +28,20 @@ Requires-Dist: torch (>=2.6.0)
23
28
  Requires-Dist: transformers (>=4.49.0)
24
29
  Description-Content-Type: text/markdown
25
30
 
26
-
27
- ![Swarmauri Logo](https://github.com/swarmauri/swarmauri-sdk/blob/3d4d1cfa949399d7019ae9d8f296afba773dfb7f/assets/swarmauri.brand.theme.svg)
31
+ ![Swarmauri Logo](https://raw.githubusercontent.com/swarmauri/swarmauri-sdk/master/assets/swarmauri_sdk_brand.png)
28
32
 
29
33
  <p align="center">
30
- <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
31
- <img src="https://img.shields.io/pypi/dm/swarmauri_embedding_mlm" alt="PyPI - Downloads"/></a>
34
+ <a href="https://pepy.tech/project/swarmauri_embedding_mlm/">
35
+ <img src="https://static.pepy.tech/badge/swarmauri_embedding_mlm/month" alt="PyPI - Downloads"/></a>
32
36
  <a href="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm/">
33
37
  <img alt="Hits" src="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm.svg"/></a>
34
38
  <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
35
- <img src="https://img.shields.io/pypi/pyversions/swarmauri_embedding_mlm" alt="PyPI - Python Version"/></a>
39
+ <img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="PyPI - Python Version"/></a>
36
40
  <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
37
41
  <img src="https://img.shields.io/pypi/l/swarmauri_embedding_mlm" alt="PyPI - License"/></a>
38
42
  <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
39
43
  <img src="https://img.shields.io/pypi/v/swarmauri_embedding_mlm?label=swarmauri_embedding_mlm&color=green" alt="PyPI - swarmauri_embedding_mlm"/></a>
40
44
  </p>
41
-
42
45
  ---
43
46
 
44
47
  # Swarmauri Embedding MLM
@@ -58,7 +61,7 @@ Trainable embedding provider that fine-tunes a Hugging Face masked language mode
58
61
  - Python 3.10 or newer.
59
62
  - PyTorch with CUDA support if you plan to train on GPU (the class falls back to CPU automatically).
60
63
  - Access to the Hugging Face model hub for downloading `embedding_name`. Set `HF_HOME`, proxies, or tokens if your environment requires authentication.
61
- - Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420MB).
64
+ - Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420 MB).
62
65
 
63
66
  ## Installation
64
67
 
@@ -146,7 +149,7 @@ embedding = restored.infer_vector("Reuse the trained weights instantly")
146
149
  ## Operational Tips
147
150
 
148
151
  - Batch and sequence length drive GPU memory usage; reduce `batch_size` or `max_length` in tokenizer calls when running on constrained hardware.
149
- - `fit_transform` runs a full fine-tuning pass and immediately returns embeddingsuseful for one-off adaptation jobs.
152
+ - `fit_transform` runs a full fine-tuning pass and immediately returns embeddings—useful for one-off adaptation jobs.
150
153
  - When training on large corpora, stream documents from a generator, chunk them, or wrap the `.fit` call in your own epoch loop.
151
154
  - Run `extract_features()` to audit the tokenizer vocabulary (helpful when debugging domain token coverage).
152
155
  - Combine the generated vectors with Swarmauri vector stores (Redis, Qdrant, etc.) to build end-to-end retrieval pipelines.
@@ -1,19 +1,17 @@
1
-
2
- ![Swarmauri Logo](https://github.com/swarmauri/swarmauri-sdk/blob/3d4d1cfa949399d7019ae9d8f296afba773dfb7f/assets/swarmauri.brand.theme.svg)
1
+ ![Swarmauri Logo](https://raw.githubusercontent.com/swarmauri/swarmauri-sdk/master/assets/swarmauri_sdk_brand.png)
3
2
 
4
3
  <p align="center">
5
- <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
6
- <img src="https://img.shields.io/pypi/dm/swarmauri_embedding_mlm" alt="PyPI - Downloads"/></a>
4
+ <a href="https://pepy.tech/project/swarmauri_embedding_mlm/">
5
+ <img src="https://static.pepy.tech/badge/swarmauri_embedding_mlm/month" alt="PyPI - Downloads"/></a>
7
6
  <a href="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm/">
8
7
  <img alt="Hits" src="https://hits.sh/github.com/swarmauri/swarmauri-sdk/tree/master/pkgs/community/swarmauri_embedding_mlm.svg"/></a>
9
8
  <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
10
- <img src="https://img.shields.io/pypi/pyversions/swarmauri_embedding_mlm" alt="PyPI - Python Version"/></a>
9
+ <img src="https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12%20%7C%203.13%20%7C%203.14-blue" alt="PyPI - Python Version"/></a>
11
10
  <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
12
11
  <img src="https://img.shields.io/pypi/l/swarmauri_embedding_mlm" alt="PyPI - License"/></a>
13
12
  <a href="https://pypi.org/project/swarmauri_embedding_mlm/">
14
13
  <img src="https://img.shields.io/pypi/v/swarmauri_embedding_mlm?label=swarmauri_embedding_mlm&color=green" alt="PyPI - swarmauri_embedding_mlm"/></a>
15
14
  </p>
16
-
17
15
  ---
18
16
 
19
17
  # Swarmauri Embedding MLM
@@ -33,7 +31,7 @@ Trainable embedding provider that fine-tunes a Hugging Face masked language mode
33
31
  - Python 3.10 or newer.
34
32
  - PyTorch with CUDA support if you plan to train on GPU (the class falls back to CPU automatically).
35
33
  - Access to the Hugging Face model hub for downloading `embedding_name`. Set `HF_HOME`, proxies, or tokens if your environment requires authentication.
36
- - Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420MB).
34
+ - Enough disk space to cache the chosen MLM (e.g., `bert-base-uncased` ~420 MB).
37
35
 
38
36
  ## Installation
39
37
 
@@ -121,7 +119,7 @@ embedding = restored.infer_vector("Reuse the trained weights instantly")
121
119
  ## Operational Tips
122
120
 
123
121
  - Batch and sequence length drive GPU memory usage; reduce `batch_size` or `max_length` in tokenizer calls when running on constrained hardware.
124
- - `fit_transform` runs a full fine-tuning pass and immediately returns embeddingsuseful for one-off adaptation jobs.
122
+ - `fit_transform` runs a full fine-tuning pass and immediately returns embeddings—useful for one-off adaptation jobs.
125
123
  - When training on large corpora, stream documents from a generator, chunk them, or wrap the `.fit` call in your own epoch loop.
126
124
  - Run `extract_features()` to audit the tokenizer vocabulary (helpful when debugging domain token coverage).
127
125
  - Combine the generated vectors with Swarmauri vector stores (Redis, Qdrant, etc.) to build end-to-end retrieval pipelines.
@@ -1,20 +1,25 @@
1
1
  [project]
2
2
  name = "swarmauri_embedding_mlm"
3
- version = "0.8.2.dev22"
3
+ version = "0.8.3.dev2"
4
4
  description = "example community package"
5
5
  license = "Apache-2.0"
6
6
  readme = "README.md"
7
7
  repository = "http://github.com/swarmauri/swarmauri-sdk"
8
- requires-python = ">=3.10,<3.13"
8
+ requires-python = ">=3.10,<3.15"
9
9
  classifiers = [
10
10
  "License :: OSI Approved :: Apache Software License",
11
- "Programming Language :: Python :: 3.10",
12
- "Programming Language :: Python :: 3.11",
13
- "Programming Language :: Python :: 3.12",
14
11
  "Natural Language :: English",
15
12
  "Development Status :: 3 - Alpha",
16
13
  "Intended Audience :: Developers",
17
14
  "Topic :: Software Development :: Libraries :: Application Frameworks",
15
+ "Programming Language :: Python",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3 :: Only",
18
+ "Programming Language :: Python :: 3.10",
19
+ "Programming Language :: Python :: 3.11",
20
+ "Programming Language :: Python :: 3.12",
21
+ "Programming Language :: Python :: 3.13",
22
+ "Programming Language :: Python :: 3.14",
18
23
  ]
19
24
  authors = [{ name = "Jacob Stewart", email = "jacob@swarmauri.com" }]
20
25
  dependencies = [