vectoriz 0.0.4__tar.gz → 0.1.0b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/PKG-INFO +27 -2
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/README.md +26 -1
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/setup.py +1 -1
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz/files.py +1 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz/token_transformer.py +4 -3
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz/vector_db.py +2 -10
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz.egg-info/PKG-INFO +27 -2
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/pyproject.toml +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/setup.cfg +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/tests/__init__.py +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/tests/test_files.py +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz/__init__.py +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz.egg-info/SOURCES.txt +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz.egg-info/dependency_links.txt +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz.egg-info/requires.txt +0 -0
- {vectoriz-0.0.4 → vectoriz-0.1.0b0}/vectoriz.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vectoriz
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.1.0b0
|
4
4
|
Summary: Python library for creating vectorized data from text or files.
|
5
5
|
Home-page: https://github.com/PedroHenriqueDevBR/vectoriz
|
6
6
|
Author: PedroHenriqueDevBR
|
@@ -25,6 +25,24 @@ Dynamic: summary
|
|
25
25
|
|
26
26
|
# Vectoriz
|
27
27
|
|
28
|
+
[](https://pypi.org/project/vectoriz/)
|
29
|
+
|
30
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
|
31
|
+
|
32
|
+
[](https://www.python.org/downloads/)
|
33
|
+
|
34
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
|
35
|
+
|
36
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
|
37
|
+
|
38
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/network)
|
39
|
+
|
40
|
+
Vectoriz is available on PyPI and can be installed via pip:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
pip install vectoriz
|
44
|
+
```
|
45
|
+
|
28
46
|
A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
|
29
47
|
|
30
48
|
## Overview
|
@@ -63,7 +81,8 @@ files_features = FilesFeature()
|
|
63
81
|
argument = files_features.load_all_files_from_directory(directory_path)
|
64
82
|
|
65
83
|
# Created FAISS index to be used in queries
|
66
|
-
|
84
|
+
token_data = transformer.create_index(argument.text_list)
|
85
|
+
index = token_data.index
|
67
86
|
|
68
87
|
# To load files from VectorDB use
|
69
88
|
vector_client = VectorDBClient()
|
@@ -74,6 +93,12 @@ argument = vector_client.file_argument
|
|
74
93
|
# To save data on VectorDB use
|
75
94
|
vector_client = VectorDBClient(index, argument)
|
76
95
|
vector_client.save_data(index_db_path, np_db_path)
|
96
|
+
|
97
|
+
# To search information on index
|
98
|
+
query = input(">>> ")
|
99
|
+
amoount_content = 1
|
100
|
+
response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
|
101
|
+
print(response)
|
77
102
|
```
|
78
103
|
|
79
104
|
## Contributing
|
@@ -1,5 +1,23 @@
|
|
1
1
|
# Vectoriz
|
2
2
|
|
3
|
+
[](https://pypi.org/project/vectoriz/)
|
4
|
+
|
5
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
|
6
|
+
|
7
|
+
[](https://www.python.org/downloads/)
|
8
|
+
|
9
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
|
10
|
+
|
11
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
|
12
|
+
|
13
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/network)
|
14
|
+
|
15
|
+
Vectoriz is available on PyPI and can be installed via pip:
|
16
|
+
|
17
|
+
```bash
|
18
|
+
pip install vectoriz
|
19
|
+
```
|
20
|
+
|
3
21
|
A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
|
4
22
|
|
5
23
|
## Overview
|
@@ -38,7 +56,8 @@ files_features = FilesFeature()
|
|
38
56
|
argument = files_features.load_all_files_from_directory(directory_path)
|
39
57
|
|
40
58
|
# Created FAISS index to be used in queries
|
41
|
-
|
59
|
+
token_data = transformer.create_index(argument.text_list)
|
60
|
+
index = token_data.index
|
42
61
|
|
43
62
|
# To load files from VectorDB use
|
44
63
|
vector_client = VectorDBClient()
|
@@ -49,6 +68,12 @@ argument = vector_client.file_argument
|
|
49
68
|
# To save data on VectorDB use
|
50
69
|
vector_client = VectorDBClient(index, argument)
|
51
70
|
vector_client.save_data(index_db_path, np_db_path)
|
71
|
+
|
72
|
+
# To search information on index
|
73
|
+
query = input(">>> ")
|
74
|
+
amoount_content = 1
|
75
|
+
response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
|
76
|
+
print(response)
|
52
77
|
```
|
53
78
|
|
54
79
|
## Contributing
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="vectoriz",
|
5
|
-
version="0.0
|
5
|
+
version="0.1.0-beta",
|
6
6
|
author="PedroHenriqueDevBR",
|
7
7
|
author_email="pedro.henrique.particular@gmail.com",
|
8
8
|
description="Python library for creating vectorized data from text or files.",
|
@@ -73,15 +73,16 @@ class TokenTransformer:
|
|
73
73
|
def search(
|
74
74
|
self,
|
75
75
|
query: str,
|
76
|
-
|
76
|
+
index: faiss.IndexFlatL2,
|
77
|
+
texts: list[str],
|
77
78
|
context_amount: int = 1,
|
78
79
|
) -> str:
|
79
80
|
query_embedding = self._query_to_embeddings(query)
|
80
|
-
_, I =
|
81
|
+
_, I = index.search(query_embedding, k=context_amount)
|
81
82
|
context = ""
|
82
83
|
|
83
84
|
for i in I[0]:
|
84
|
-
context +=
|
85
|
+
context += texts[i] + "\n"
|
85
86
|
|
86
87
|
return context.strip()
|
87
88
|
|
@@ -54,15 +54,6 @@ class VectorDBClient:
|
|
54
54
|
|
55
55
|
class VectorDB:
|
56
56
|
|
57
|
-
def __init__(self):
|
58
|
-
"""
|
59
|
-
Constructor for the class.
|
60
|
-
|
61
|
-
Initializes the following attributes:
|
62
|
-
- transformer: A TokenTransformer instance for text transformation.
|
63
|
-
"""
|
64
|
-
self.transformer = TokenTransformer()
|
65
|
-
|
66
57
|
def load_saved_data(
|
67
58
|
self, faiss_db_path: str, np_db_path: str
|
68
59
|
) -> Optional[VectorDBClient]:
|
@@ -158,13 +149,14 @@ class VectorDB:
|
|
158
149
|
- 'chunk_names': The chunk names
|
159
150
|
- 'texts': The text content
|
160
151
|
"""
|
152
|
+
transformer = TokenTransformer()
|
161
153
|
np_db_path = np_db_path if np_db_path.endswith(".npz") else np_db_path + ".npz"
|
162
154
|
|
163
155
|
embeddings_np: np.ndarray = None
|
164
156
|
if argument.ndarray_data is not None:
|
165
157
|
embeddings_np = argument.ndarray_data
|
166
158
|
else:
|
167
|
-
embeddings_np =
|
159
|
+
embeddings_np = transformer.get_np_vectors(argument.embeddings)
|
168
160
|
|
169
161
|
np.savez(
|
170
162
|
np_db_path,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vectoriz
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.1.0b0
|
4
4
|
Summary: Python library for creating vectorized data from text or files.
|
5
5
|
Home-page: https://github.com/PedroHenriqueDevBR/vectoriz
|
6
6
|
Author: PedroHenriqueDevBR
|
@@ -25,6 +25,24 @@ Dynamic: summary
|
|
25
25
|
|
26
26
|
# Vectoriz
|
27
27
|
|
28
|
+
[](https://pypi.org/project/vectoriz/)
|
29
|
+
|
30
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
|
31
|
+
|
32
|
+
[](https://www.python.org/downloads/)
|
33
|
+
|
34
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
|
35
|
+
|
36
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
|
37
|
+
|
38
|
+
[](https://github.com/PedroHenriqueDevBR/vectoriz/network)
|
39
|
+
|
40
|
+
Vectoriz is available on PyPI and can be installed via pip:
|
41
|
+
|
42
|
+
```bash
|
43
|
+
pip install vectoriz
|
44
|
+
```
|
45
|
+
|
28
46
|
A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
|
29
47
|
|
30
48
|
## Overview
|
@@ -63,7 +81,8 @@ files_features = FilesFeature()
|
|
63
81
|
argument = files_features.load_all_files_from_directory(directory_path)
|
64
82
|
|
65
83
|
# Created FAISS index to be used in queries
|
66
|
-
|
84
|
+
token_data = transformer.create_index(argument.text_list)
|
85
|
+
index = token_data.index
|
67
86
|
|
68
87
|
# To load files from VectorDB use
|
69
88
|
vector_client = VectorDBClient()
|
@@ -74,6 +93,12 @@ argument = vector_client.file_argument
|
|
74
93
|
# To save data on VectorDB use
|
75
94
|
vector_client = VectorDBClient(index, argument)
|
76
95
|
vector_client.save_data(index_db_path, np_db_path)
|
96
|
+
|
97
|
+
# To search information on index
|
98
|
+
query = input(">>> ")
|
99
|
+
amoount_content = 1
|
100
|
+
response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
|
101
|
+
print(response)
|
77
102
|
```
|
78
103
|
|
79
104
|
## Contributing
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|