swarmauri_embedding_doc2vec 0.9.0.dev4__tar.gz → 0.9.0.dev32__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,10 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: swarmauri_embedding_doc2vec
3
- Version: 0.9.0.dev4
3
+ Version: 0.9.0.dev32
4
4
  Summary: A Doc2Vec based Embedding Model.
5
- License: Apache-2.0
5
+ License-Expression: Apache-2.0
6
+ License-File: LICENSE
7
+ Keywords: swarmauri,sdk,standards,embedding,doc2vec,machine-learning
6
8
  Author: Jacob Stewart
7
9
  Author-email: jacob@swarmauri.com
8
10
  Requires-Python: >=3.10,<3.13
@@ -10,6 +12,9 @@ Classifier: License :: OSI Approved :: Apache Software License
10
12
  Classifier: Programming Language :: Python :: 3.10
11
13
  Classifier: Programming Language :: Python :: 3.11
12
14
  Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3 :: Only
13
18
  Requires-Dist: gensim (==4.3.3)
14
19
  Requires-Dist: swarmauri_base
15
20
  Requires-Dist: swarmauri_core
@@ -17,7 +22,7 @@ Requires-Dist: swarmauri_standard
17
22
  Description-Content-Type: text/markdown
18
23
 
19
24
 
20
- ![Swamauri Logo](https://res.cloudinary.com/dbjmpekvl/image/upload/v1730099724/Swarmauri-logo-lockup-2048x757_hww01w.png)
25
+ ![Swarmauri Logo](https://github.com/swarmauri/swarmauri-sdk/blob/3d4d1cfa949399d7019ae9d8f296afba773dfb7f/assets/swarmauri.brand.theme.svg)
21
26
 
22
27
  <p align="center">
23
28
  <a href="https://pypi.org/project/swarmauri_embedding_doc2vec/">
@@ -36,35 +41,54 @@ Description-Content-Type: text/markdown
36
41
 
37
42
  # Swarmauri Embedding Doc2vec
38
43
 
39
- A Gensim-based Doc2Vec implementation for document embedding in the Swarmauri ecosystem. This package provides document vectorization capabilities using the Doc2Vec algorithm.
44
+ A [Gensim](https://radimrehurek.com/gensim/)-powered Doc2Vec implementation for document
45
+ embeddings in the Swarmauri ecosystem. The component registers as
46
+ `Doc2VecEmbedding` and returns vectors as `swarmauri_standard.vectors.Vector`
47
+ instances.
40
48
 
41
49
  ## Installation
42
50
 
51
+ Install the package with your preferred Python packaging tool:
52
+
43
53
  ```bash
44
54
  pip install swarmauri_embedding_doc2vec
45
55
  ```
46
56
 
57
+ ```bash
58
+ poetry add swarmauri_embedding_doc2vec
59
+ ```
60
+
61
+ ```bash
62
+ uv pip install swarmauri_embedding_doc2vec
63
+ ```
64
+
47
65
  ## Usage
48
66
 
49
67
  ```python
50
- from swarmauri.embeddings.Doc2VecEmbedding import Doc2VecEmbedding
68
+ from swarmauri_embedding_doc2vec import Doc2VecEmbedding
51
69
 
52
- # Initialize the embedder
53
- embedder = Doc2VecEmbedding(vector_size=3000)
70
+ documents = [
71
+ "This is the first document.",
72
+ "Here is another document.",
73
+ "And a third one.",
74
+ ]
54
75
 
55
- # Prepare your documents
56
- documents = ["This is the first document.", "Here is another document.", "And a third one"]
76
+ # Initialize the embedder. Adjust parameters to match your dataset size.
77
+ embedder = Doc2VecEmbedding(vector_size=300, window=10, min_count=1, workers=1)
57
78
 
58
- # Fit and transform documents
79
+ # Fit and transform documents into Vector objects.
59
80
  vectors = embedder.fit_transform(documents)
60
81
 
61
- # Transform new documents
62
- new_doc = "This is a new document"
63
- vector = embedder.transform([new_doc])
82
+ # Access the raw embedding values via the Vector.value attribute.
83
+ first_vector = vectors[0].value
84
+
85
+ # Transform new documents (the result is also a Vector).
86
+ new_vector = embedder.transform(["This is a new document."])[0]
64
87
 
65
- # Save and load the model
66
- embedder.save_model("doc2vec.model")
67
- embedder.load_model("doc2vec.model")
88
+ # Save and load the underlying Doc2Vec model.
89
+ model_path = "doc2vec.model"
90
+ embedder.save_model(model_path)
91
+ embedder.load_model(model_path)
68
92
  ```
69
93
 
70
94
  ## Want to help?
@@ -1,5 +1,5 @@
1
1
 
2
- ![Swamauri Logo](https://res.cloudinary.com/dbjmpekvl/image/upload/v1730099724/Swarmauri-logo-lockup-2048x757_hww01w.png)
2
+ ![Swarmauri Logo](https://github.com/swarmauri/swarmauri-sdk/blob/3d4d1cfa949399d7019ae9d8f296afba773dfb7f/assets/swarmauri.brand.theme.svg)
3
3
 
4
4
  <p align="center">
5
5
  <a href="https://pypi.org/project/swarmauri_embedding_doc2vec/">
@@ -18,35 +18,54 @@
18
18
 
19
19
  # Swarmauri Embedding Doc2vec
20
20
 
21
- A Gensim-based Doc2Vec implementation for document embedding in the Swarmauri ecosystem. This package provides document vectorization capabilities using the Doc2Vec algorithm.
21
+ A [Gensim](https://radimrehurek.com/gensim/)-powered Doc2Vec implementation for document
22
+ embeddings in the Swarmauri ecosystem. The component registers as
23
+ `Doc2VecEmbedding` and returns vectors as `swarmauri_standard.vectors.Vector`
24
+ instances.
22
25
 
23
26
  ## Installation
24
27
 
28
+ Install the package with your preferred Python packaging tool:
29
+
25
30
  ```bash
26
31
  pip install swarmauri_embedding_doc2vec
27
32
  ```
28
33
 
34
+ ```bash
35
+ poetry add swarmauri_embedding_doc2vec
36
+ ```
37
+
38
+ ```bash
39
+ uv pip install swarmauri_embedding_doc2vec
40
+ ```
41
+
29
42
  ## Usage
30
43
 
31
44
  ```python
32
- from swarmauri.embeddings.Doc2VecEmbedding import Doc2VecEmbedding
45
+ from swarmauri_embedding_doc2vec import Doc2VecEmbedding
33
46
 
34
- # Initialize the embedder
35
- embedder = Doc2VecEmbedding(vector_size=3000)
47
+ documents = [
48
+ "This is the first document.",
49
+ "Here is another document.",
50
+ "And a third one.",
51
+ ]
36
52
 
37
- # Prepare your documents
38
- documents = ["This is the first document.", "Here is another document.", "And a third one"]
53
+ # Initialize the embedder. Adjust parameters to match your dataset size.
54
+ embedder = Doc2VecEmbedding(vector_size=300, window=10, min_count=1, workers=1)
39
55
 
40
- # Fit and transform documents
56
+ # Fit and transform documents into Vector objects.
41
57
  vectors = embedder.fit_transform(documents)
42
58
 
43
- # Transform new documents
44
- new_doc = "This is a new document"
45
- vector = embedder.transform([new_doc])
59
+ # Access the raw embedding values via the Vector.value attribute.
60
+ first_vector = vectors[0].value
61
+
62
+ # Transform new documents (the result is also a Vector).
63
+ new_vector = embedder.transform(["This is a new document."])[0]
46
64
 
47
- # Save and load the model
48
- embedder.save_model("doc2vec.model")
49
- embedder.load_model("doc2vec.model")
65
+ # Save and load the underlying Doc2Vec model.
66
+ model_path = "doc2vec.model"
67
+ embedder.save_model(model_path)
68
+ embedder.load_model(model_path)
50
69
  ```
51
70
 
52
71
  ## Want to help?
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "swarmauri_embedding_doc2vec"
3
- version = "0.9.0.dev4"
3
+ version = "0.9.0.dev32"
4
4
  description = "A Doc2Vec based Embedding Model."
5
5
  license = "Apache-2.0"
6
6
  readme = "README.md"
@@ -11,6 +11,9 @@ classifiers = [
11
11
  "Programming Language :: Python :: 3.10",
12
12
  "Programming Language :: Python :: 3.11",
13
13
  "Programming Language :: Python :: 3.12",
14
+ "Programming Language :: Python",
15
+ "Programming Language :: Python :: 3",
16
+ "Programming Language :: Python :: 3 :: Only",
14
17
  ]
15
18
  authors = [{ name = "Jacob Stewart", email = "jacob@swarmauri.com" }]
16
19
  dependencies = [
@@ -19,6 +22,14 @@ dependencies = [
19
22
  "swarmauri_base",
20
23
  "swarmauri_standard",
21
24
  ]
25
+ keywords = [
26
+ 'swarmauri',
27
+ 'sdk',
28
+ 'standards',
29
+ 'embedding',
30
+ 'doc2vec',
31
+ 'machine-learning',
32
+ ]
22
33
 
23
34
  [tool.uv.sources]
24
35
  swarmauri_core = { workspace = true }
@@ -37,6 +48,7 @@ markers = [
37
48
  "xfail: Expected failures",
38
49
  "acceptance: Acceptance tests",
39
50
  "perf: Performance tests that measure execution time and resource usage",
51
+ "example: Example tests sourced from documentation",
40
52
  ]
41
53
  timeout = 300
42
54
  log_cli = true