swarmauri_embedding_nmf 0.6.0.dev154__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- swarmauri_embedding_nmf-0.6.0.dev154/PKG-INFO +20 -0
- swarmauri_embedding_nmf-0.6.0.dev154/README.md +1 -0
- swarmauri_embedding_nmf-0.6.0.dev154/pyproject.toml +57 -0
- swarmauri_embedding_nmf-0.6.0.dev154/swarmauri_embedding_nmf/NmfEmbedding.py +111 -0
- swarmauri_embedding_nmf-0.6.0.dev154/swarmauri_embedding_nmf/__init__.py +14 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: swarmauri_embedding_nmf
|
|
3
|
+
Version: 0.6.0.dev154
|
|
4
|
+
Summary: NMF Embedding for Swarmauri.
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Author: Jacob Stewart
|
|
7
|
+
Author-email: jacob@swarmauri.com
|
|
8
|
+
Requires-Python: >=3.10,<3.13
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Requires-Dist: scikit-learn (>=1.4.2,<2.0.0)
|
|
15
|
+
Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
|
|
16
|
+
Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
|
|
17
|
+
Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Swarmauri Example Plugin
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Swarmauri Example Plugin
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "swarmauri_embedding_nmf"
|
|
3
|
+
version = "0.6.0.dev154"
|
|
4
|
+
description = "NMF Embedding for Swarmauri."
|
|
5
|
+
authors = ["Jacob Stewart <jacob@swarmauri.com>"]
|
|
6
|
+
license = "Apache-2.0"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
repository = "http://github.com/swarmauri/swarmauri-sdk"
|
|
9
|
+
classifiers = [
|
|
10
|
+
"License :: OSI Approved :: Apache Software License",
|
|
11
|
+
"Programming Language :: Python :: 3.10",
|
|
12
|
+
"Programming Language :: Python :: 3.11",
|
|
13
|
+
"Programming Language :: Python :: 3.12"
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.poetry.dependencies]
|
|
17
|
+
python = ">=3.10,<3.13"
|
|
18
|
+
|
|
19
|
+
# Swarmauri
|
|
20
|
+
swarmauri_core = {version = "^0.6.0.dev154"}
|
|
21
|
+
swarmauri_base = {version = "^0.6.0.dev154"}
|
|
22
|
+
|
|
23
|
+
# Dependencies
|
|
24
|
+
scikit-learn = "^1.4.2"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
[tool.poetry.group.dev.dependencies]
|
|
28
|
+
flake8 = "^7.0"
|
|
29
|
+
pytest = "^8.0"
|
|
30
|
+
pytest-asyncio = ">=0.24.0"
|
|
31
|
+
pytest-xdist = "^3.6.1"
|
|
32
|
+
pytest-json-report = "^1.5.0"
|
|
33
|
+
python-dotenv = "*"
|
|
34
|
+
requests = "^2.32.3"
|
|
35
|
+
|
|
36
|
+
[build-system]
|
|
37
|
+
requires = ["poetry-core>=1.0.0"]
|
|
38
|
+
build-backend = "poetry.core.masonry.api"
|
|
39
|
+
|
|
40
|
+
[tool.pytest.ini_options]
|
|
41
|
+
norecursedirs = ["combined", "scripts"]
|
|
42
|
+
|
|
43
|
+
markers = [
|
|
44
|
+
"test: standard test",
|
|
45
|
+
"unit: Unit tests",
|
|
46
|
+
"integration: Integration tests",
|
|
47
|
+
"acceptance: Acceptance tests",
|
|
48
|
+
"experimental: Experimental tests"
|
|
49
|
+
]
|
|
50
|
+
log_cli = true
|
|
51
|
+
log_cli_level = "INFO"
|
|
52
|
+
log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
|
|
53
|
+
log_cli_date_format = "%Y-%m-%d %H:%M:%S"
|
|
54
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
55
|
+
|
|
56
|
+
[tool.poetry.plugins."swarmauri.embeddings"]
|
|
57
|
+
NmfEmbedding = "swarmauri_embedding_nmf:NmfEmbedding"
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import joblib
|
|
2
|
+
from sklearn.decomposition import NMF
|
|
3
|
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
|
4
|
+
|
|
5
|
+
from typing import List, Any, Literal
|
|
6
|
+
from pydantic import PrivateAttr
|
|
7
|
+
from swarmauri_standard.vectors.Vector import Vector
|
|
8
|
+
from swarmauri_base.embeddings.EmbeddingBase import EmbeddingBase
|
|
9
|
+
from swarmauri_core.ComponentBase import ComponentBase
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@ComponentBase.register_type(EmbeddingBase, "NmfEmbedding")
|
|
13
|
+
class NmfEmbedding(EmbeddingBase):
|
|
14
|
+
n_components: int = 10
|
|
15
|
+
_tfidf_vectorizer = PrivateAttr()
|
|
16
|
+
_model = PrivateAttr()
|
|
17
|
+
feature_names: List[Any] = []
|
|
18
|
+
|
|
19
|
+
type: Literal["NmfEmbedding"] = "NmfEmbedding"
|
|
20
|
+
|
|
21
|
+
def __init__(self, **kwargs):
|
|
22
|
+
|
|
23
|
+
super().__init__(**kwargs)
|
|
24
|
+
# Initialize TF-IDF Vectorizer
|
|
25
|
+
self._tfidf_vectorizer = TfidfVectorizer()
|
|
26
|
+
# Initialize NMF with the desired number of components
|
|
27
|
+
self._model = NMF(n_components=self.n_components)
|
|
28
|
+
|
|
29
|
+
def fit(self, data):
|
|
30
|
+
"""
|
|
31
|
+
Fit the NMF model to data.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
data (Union[str, Any]): The text data to fit.
|
|
35
|
+
"""
|
|
36
|
+
# Transform data into TF-IDF matrix
|
|
37
|
+
tfidf_matrix = self._tfidf_vectorizer.fit_transform(data)
|
|
38
|
+
# Fit the NMF model
|
|
39
|
+
self._model.fit(tfidf_matrix)
|
|
40
|
+
# Store feature names
|
|
41
|
+
self.feature_names = self._tfidf_vectorizer.get_feature_names_out()
|
|
42
|
+
|
|
43
|
+
def transform(self, data):
|
|
44
|
+
"""
|
|
45
|
+
Transform new data into NMF feature space.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
data (Union[str, Any]): Text data to transform.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List[IVector]: A list of vectors representing the transformed data.
|
|
52
|
+
"""
|
|
53
|
+
# Transform data into TF-IDF matrix
|
|
54
|
+
tfidf_matrix = self._tfidf_vectorizer.transform(data)
|
|
55
|
+
# Transform TF-IDF matrix into NMF space
|
|
56
|
+
nmf_features = self._model.transform(tfidf_matrix)
|
|
57
|
+
|
|
58
|
+
# Wrap NMF features in SimpleVector instances and return
|
|
59
|
+
return [Vector(value=features.tolist()) for features in nmf_features]
|
|
60
|
+
|
|
61
|
+
def fit_transform(self, data):
|
|
62
|
+
"""
|
|
63
|
+
Fit the model to data and then transform it.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
data (Union[str, Any]): The text data to fit and transform.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
List[IVector]: A list of vectors representing the fitted and transformed data.
|
|
70
|
+
"""
|
|
71
|
+
self.fit(data)
|
|
72
|
+
return self.transform(data)
|
|
73
|
+
|
|
74
|
+
def infer_vector(self, data):
|
|
75
|
+
"""
|
|
76
|
+
Convenience method for transforming a single data point.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
data (Union[str, Any]): Single text data to transform.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
IVector: A vector representing the transformed single data point.
|
|
83
|
+
"""
|
|
84
|
+
return self.transform([data])[0]
|
|
85
|
+
|
|
86
|
+
def extract_features(self):
|
|
87
|
+
"""
|
|
88
|
+
Extract the feature names from the TF-IDF vectorizer.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
The feature names.
|
|
92
|
+
"""
|
|
93
|
+
return self.feature_names.tolist()
|
|
94
|
+
|
|
95
|
+
def save_model(self, path: str) -> None:
|
|
96
|
+
"""
|
|
97
|
+
Saves the NMF model and TF-IDF vectorizer using joblib.
|
|
98
|
+
"""
|
|
99
|
+
# It might be necessary to save both tfidf_vectorizer and model
|
|
100
|
+
# Consider using a directory for 'path' or appended identifiers for each model file
|
|
101
|
+
joblib.dump(self._tfidf_vectorizer, f"{path}_tfidf.joblib")
|
|
102
|
+
joblib.dump(self._model, f"{path}_nmf.joblib")
|
|
103
|
+
|
|
104
|
+
def load_model(self, path: str) -> None:
|
|
105
|
+
"""
|
|
106
|
+
Loads the NMF model and TF-IDF vectorizer from paths using joblib.
|
|
107
|
+
"""
|
|
108
|
+
self._tfidf_vectorizer = joblib.load(f"{path}_tfidf.joblib")
|
|
109
|
+
self._model = joblib.load(f"{path}_nmf.joblib")
|
|
110
|
+
# Dependending on your implementation, you might need to refresh the feature_names
|
|
111
|
+
self.feature_names = self._tfidf_vectorizer.get_feature_names_out()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from .NmfEmbedding import NmfEmbedding
|
|
2
|
+
|
|
3
|
+
__version__ = "0.6.0.dev26"
|
|
4
|
+
__long_desc__ = """
|
|
5
|
+
|
|
6
|
+
# Swarmauri Nmf Embedding Plugin
|
|
7
|
+
|
|
8
|
+
This repository includes an Nmf Embedding of a Swarmauri Plugin.
|
|
9
|
+
|
|
10
|
+
Visit us at: https://swarmauri.com
|
|
11
|
+
Follow us at: https://github.com/swarmauri
|
|
12
|
+
Star us at: https://github.com/swarmauri/swarmauri-sdk
|
|
13
|
+
|
|
14
|
+
"""
|