swarmauri_embedding_nmf 0.6.0.dev154__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.3
2
+ Name: swarmauri_embedding_nmf
3
+ Version: 0.6.0.dev154
4
+ Summary: NMF Embedding for Swarmauri.
5
+ License: Apache-2.0
6
+ Author: Jacob Stewart
7
+ Author-email: jacob@swarmauri.com
8
+ Requires-Python: >=3.10,<3.13
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Dist: scikit-learn (>=1.4.2,<2.0.0)
15
+ Requires-Dist: swarmauri_base (>=0.6.0.dev154,<0.7.0)
16
+ Requires-Dist: swarmauri_core (>=0.6.0.dev154,<0.7.0)
17
+ Project-URL: Repository, http://github.com/swarmauri/swarmauri-sdk
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Swarmauri Example Plugin
@@ -0,0 +1 @@
1
+ # Swarmauri Example Plugin
@@ -0,0 +1,57 @@
1
+ [tool.poetry]
2
+ name = "swarmauri_embedding_nmf"
3
+ version = "0.6.0.dev154"
4
+ description = "NMF Embedding for Swarmauri."
5
+ authors = ["Jacob Stewart <jacob@swarmauri.com>"]
6
+ license = "Apache-2.0"
7
+ readme = "README.md"
8
+ repository = "http://github.com/swarmauri/swarmauri-sdk"
9
+ classifiers = [
10
+ "License :: OSI Approved :: Apache Software License",
11
+ "Programming Language :: Python :: 3.10",
12
+ "Programming Language :: Python :: 3.11",
13
+ "Programming Language :: Python :: 3.12"
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = ">=3.10,<3.13"
18
+
19
+ # Swarmauri
20
+ swarmauri_core = {version = "^0.6.0.dev154"}
21
+ swarmauri_base = {version = "^0.6.0.dev154"}
22
+
23
+ # Dependencies
24
+ scikit-learn = "^1.4.2"
25
+
26
+
27
+ [tool.poetry.group.dev.dependencies]
28
+ flake8 = "^7.0"
29
+ pytest = "^8.0"
30
+ pytest-asyncio = ">=0.24.0"
31
+ pytest-xdist = "^3.6.1"
32
+ pytest-json-report = "^1.5.0"
33
+ python-dotenv = "*"
34
+ requests = "^2.32.3"
35
+
36
+ [build-system]
37
+ requires = ["poetry-core>=1.0.0"]
38
+ build-backend = "poetry.core.masonry.api"
39
+
40
+ [tool.pytest.ini_options]
41
+ norecursedirs = ["combined", "scripts"]
42
+
43
+ markers = [
44
+ "test: standard test",
45
+ "unit: Unit tests",
46
+ "integration: Integration tests",
47
+ "acceptance: Acceptance tests",
48
+ "experimental: Experimental tests"
49
+ ]
50
+ log_cli = true
51
+ log_cli_level = "INFO"
52
+ log_cli_format = "%(asctime)s [%(levelname)s] %(message)s"
53
+ log_cli_date_format = "%Y-%m-%d %H:%M:%S"
54
+ asyncio_default_fixture_loop_scope = "function"
55
+
56
+ [tool.poetry.plugins."swarmauri.embeddings"]
57
+ NmfEmbedding = "swarmauri_embedding_nmf:NmfEmbedding"
@@ -0,0 +1,111 @@
1
+ import joblib
2
+ from sklearn.decomposition import NMF
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+
5
+ from typing import List, Any, Literal
6
+ from pydantic import PrivateAttr
7
+ from swarmauri_standard.vectors.Vector import Vector
8
+ from swarmauri_base.embeddings.EmbeddingBase import EmbeddingBase
9
+ from swarmauri_core.ComponentBase import ComponentBase
10
+
11
+
12
+ @ComponentBase.register_type(EmbeddingBase, "NmfEmbedding")
13
+ class NmfEmbedding(EmbeddingBase):
14
+ n_components: int = 10
15
+ _tfidf_vectorizer = PrivateAttr()
16
+ _model = PrivateAttr()
17
+ feature_names: List[Any] = []
18
+
19
+ type: Literal["NmfEmbedding"] = "NmfEmbedding"
20
+
21
+ def __init__(self, **kwargs):
22
+
23
+ super().__init__(**kwargs)
24
+ # Initialize TF-IDF Vectorizer
25
+ self._tfidf_vectorizer = TfidfVectorizer()
26
+ # Initialize NMF with the desired number of components
27
+ self._model = NMF(n_components=self.n_components)
28
+
29
+ def fit(self, data):
30
+ """
31
+ Fit the NMF model to data.
32
+
33
+ Args:
34
+ data (Union[str, Any]): The text data to fit.
35
+ """
36
+ # Transform data into TF-IDF matrix
37
+ tfidf_matrix = self._tfidf_vectorizer.fit_transform(data)
38
+ # Fit the NMF model
39
+ self._model.fit(tfidf_matrix)
40
+ # Store feature names
41
+ self.feature_names = self._tfidf_vectorizer.get_feature_names_out()
42
+
43
+ def transform(self, data):
44
+ """
45
+ Transform new data into NMF feature space.
46
+
47
+ Args:
48
+ data (Union[str, Any]): Text data to transform.
49
+
50
+ Returns:
51
+ List[IVector]: A list of vectors representing the transformed data.
52
+ """
53
+ # Transform data into TF-IDF matrix
54
+ tfidf_matrix = self._tfidf_vectorizer.transform(data)
55
+ # Transform TF-IDF matrix into NMF space
56
+ nmf_features = self._model.transform(tfidf_matrix)
57
+
58
+ # Wrap NMF features in SimpleVector instances and return
59
+ return [Vector(value=features.tolist()) for features in nmf_features]
60
+
61
+ def fit_transform(self, data):
62
+ """
63
+ Fit the model to data and then transform it.
64
+
65
+ Args:
66
+ data (Union[str, Any]): The text data to fit and transform.
67
+
68
+ Returns:
69
+ List[IVector]: A list of vectors representing the fitted and transformed data.
70
+ """
71
+ self.fit(data)
72
+ return self.transform(data)
73
+
74
+ def infer_vector(self, data):
75
+ """
76
+ Convenience method for transforming a single data point.
77
+
78
+ Args:
79
+ data (Union[str, Any]): Single text data to transform.
80
+
81
+ Returns:
82
+ IVector: A vector representing the transformed single data point.
83
+ """
84
+ return self.transform([data])[0]
85
+
86
+ def extract_features(self):
87
+ """
88
+ Extract the feature names from the TF-IDF vectorizer.
89
+
90
+ Returns:
91
+ The feature names.
92
+ """
93
+ return self.feature_names.tolist()
94
+
95
+ def save_model(self, path: str) -> None:
96
+ """
97
+ Saves the NMF model and TF-IDF vectorizer using joblib.
98
+ """
99
+ # It might be necessary to save both tfidf_vectorizer and model
100
+ # Consider using a directory for 'path' or appended identifiers for each model file
101
+ joblib.dump(self._tfidf_vectorizer, f"{path}_tfidf.joblib")
102
+ joblib.dump(self._model, f"{path}_nmf.joblib")
103
+
104
+ def load_model(self, path: str) -> None:
105
+ """
106
+ Loads the NMF model and TF-IDF vectorizer from paths using joblib.
107
+ """
108
+ self._tfidf_vectorizer = joblib.load(f"{path}_tfidf.joblib")
109
+ self._model = joblib.load(f"{path}_nmf.joblib")
110
+ # Dependending on your implementation, you might need to refresh the feature_names
111
+ self.feature_names = self._tfidf_vectorizer.get_feature_names_out()
@@ -0,0 +1,14 @@
1
+ from .NmfEmbedding import NmfEmbedding
2
+
3
+ __version__ = "0.6.0.dev26"
4
+ __long_desc__ = """
5
+
6
+ # Swarmauri Nmf Embedding Plugin
7
+
8
+ This repository includes an Nmf Embedding of a Swarmauri Plugin.
9
+
10
+ Visit us at: https://swarmauri.com
11
+ Follow us at: https://github.com/swarmauri
12
+ Star us at: https://github.com/swarmauri/swarmauri-sdk
13
+
14
+ """