vectoriz 0.0.4__tar.gz → 0.0.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectoriz
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: Python library for creating vectorized data from text or files.
5
5
  Home-page: https://github.com/PedroHenriqueDevBR/vectoriz
6
6
  Author: PedroHenriqueDevBR
@@ -25,6 +25,24 @@ Dynamic: summary
25
25
 
26
26
  # Vectoriz
27
27
 
28
+ [![PyPI version](https://badge.fury.io/py/vectoriz.svg)](https://pypi.org/project/vectoriz/)
29
+
30
+ [![GitHub license](https://img.shields.io/github/license/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
31
+
32
+ [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
33
+
34
+ [![GitHub issues](https://img.shields.io/github/issues/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
35
+
36
+ [![GitHub stars](https://img.shields.io/github/stars/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
37
+
38
+ [![GitHub forks](https://img.shields.io/github/forks/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/network)
39
+
40
+ Vectoriz is available on PyPI and can be installed via pip:
41
+
42
+ ```bash
43
+ pip install vectoriz
44
+ ```
45
+
28
46
  A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
29
47
 
30
48
  ## Overview
@@ -63,7 +81,8 @@ files_features = FilesFeature()
63
81
  argument = files_features.load_all_files_from_directory(directory_path)
64
82
 
65
83
  # Created FAISS index to be used in queries
66
- index = transformer.create_index(argument.text_list)
84
+ token_data = transformer.create_index(argument.text_list)
85
+ index = token_data.index
67
86
 
68
87
  # To load files from VectorDB use
69
88
  vector_client = VectorDBClient()
@@ -74,6 +93,12 @@ argument = vector_client.file_argument
74
93
  # To save data on VectorDB use
75
94
  vector_client = VectorDBClient(index, argument)
76
95
  vector_client.save_data(index_db_path, np_db_path)
96
+
97
+ # To search information on index
98
+ query = input(">>> ")
99
+ amoount_content = 1
100
+ response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
101
+ print(response)
77
102
  ```
78
103
 
79
104
  ## Contributing
@@ -1,5 +1,23 @@
1
1
  # Vectoriz
2
2
 
3
+ [![PyPI version](https://badge.fury.io/py/vectoriz.svg)](https://pypi.org/project/vectoriz/)
4
+
5
+ [![GitHub license](https://img.shields.io/github/license/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
6
+
7
+ [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
8
+
9
+ [![GitHub issues](https://img.shields.io/github/issues/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
10
+
11
+ [![GitHub stars](https://img.shields.io/github/stars/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
12
+
13
+ [![GitHub forks](https://img.shields.io/github/forks/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/network)
14
+
15
+ Vectoriz is available on PyPI and can be installed via pip:
16
+
17
+ ```bash
18
+ pip install vectoriz
19
+ ```
20
+
3
21
  A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
4
22
 
5
23
  ## Overview
@@ -38,7 +56,8 @@ files_features = FilesFeature()
38
56
  argument = files_features.load_all_files_from_directory(directory_path)
39
57
 
40
58
  # Created FAISS index to be used in queries
41
- index = transformer.create_index(argument.text_list)
59
+ token_data = transformer.create_index(argument.text_list)
60
+ index = token_data.index
42
61
 
43
62
  # To load files from VectorDB use
44
63
  vector_client = VectorDBClient()
@@ -49,6 +68,12 @@ argument = vector_client.file_argument
49
68
  # To save data on VectorDB use
50
69
  vector_client = VectorDBClient(index, argument)
51
70
  vector_client.save_data(index_db_path, np_db_path)
71
+
72
+ # To search information on index
73
+ query = input(">>> ")
74
+ amoount_content = 1
75
+ response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
76
+ print(response)
52
77
  ```
53
78
 
54
79
  ## Contributing
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="vectoriz",
5
- version="0.0.4",
5
+ version="0.0.5",
6
6
  author="PedroHenriqueDevBR",
7
7
  author_email="pedro.henrique.particular@gmail.com",
8
8
  description="Python library for creating vectorized data from text or files.",
@@ -2,7 +2,7 @@ import os
2
2
  import docx
3
3
  import numpy as np
4
4
  from typing import Optional
5
- from vectoriz.token_transformer import TokenTransformer
5
+ from token_transformer import TokenTransformer
6
6
 
7
7
  class FileArgument:
8
8
  def __init__(
@@ -73,15 +73,16 @@ class TokenTransformer:
73
73
  def search(
74
74
  self,
75
75
  query: str,
76
- data: TokenData,
76
+ index: faiss.IndexFlatL2,
77
+ texts: list[str],
77
78
  context_amount: int = 1,
78
79
  ) -> str:
79
80
  query_embedding = self._query_to_embeddings(query)
80
- _, I = data.index.search(query_embedding, k=context_amount)
81
+ _, I = index.search(query_embedding, k=context_amount)
81
82
  context = ""
82
83
 
83
84
  for i in I[0]:
84
- context += data.texts[i] + "\n"
85
+ context += texts[i] + "\n"
85
86
 
86
87
  return context.strip()
87
88
 
@@ -3,8 +3,8 @@ import faiss
3
3
  import numpy as np
4
4
  from typing import Optional
5
5
 
6
- from vectoriz.files import FileArgument
7
- from vectoriz.token_transformer import TokenTransformer
6
+ from files import FileArgument
7
+ from token_transformer import TokenTransformer
8
8
 
9
9
 
10
10
  class VectorDBClient:
@@ -54,15 +54,6 @@ class VectorDBClient:
54
54
 
55
55
  class VectorDB:
56
56
 
57
- def __init__(self):
58
- """
59
- Constructor for the class.
60
-
61
- Initializes the following attributes:
62
- - transformer: A TokenTransformer instance for text transformation.
63
- """
64
- self.transformer = TokenTransformer()
65
-
66
57
  def load_saved_data(
67
58
  self, faiss_db_path: str, np_db_path: str
68
59
  ) -> Optional[VectorDBClient]:
@@ -158,13 +149,14 @@ class VectorDB:
158
149
  - 'chunk_names': The chunk names
159
150
  - 'texts': The text content
160
151
  """
152
+ transformer = TokenTransformer()
161
153
  np_db_path = np_db_path if np_db_path.endswith(".npz") else np_db_path + ".npz"
162
154
 
163
155
  embeddings_np: np.ndarray = None
164
156
  if argument.ndarray_data is not None:
165
157
  embeddings_np = argument.ndarray_data
166
158
  else:
167
- embeddings_np = self.transformer.get_np_vectors(argument.embeddings)
159
+ embeddings_np = transformer.get_np_vectors(argument.embeddings)
168
160
 
169
161
  np.savez(
170
162
  np_db_path,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectoriz
3
- Version: 0.0.4
3
+ Version: 0.0.5
4
4
  Summary: Python library for creating vectorized data from text or files.
5
5
  Home-page: https://github.com/PedroHenriqueDevBR/vectoriz
6
6
  Author: PedroHenriqueDevBR
@@ -25,6 +25,24 @@ Dynamic: summary
25
25
 
26
26
  # Vectoriz
27
27
 
28
+ [![PyPI version](https://badge.fury.io/py/vectoriz.svg)](https://pypi.org/project/vectoriz/)
29
+
30
+ [![GitHub license](https://img.shields.io/github/license/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
31
+
32
+ [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
33
+
34
+ [![GitHub issues](https://img.shields.io/github/issues/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
35
+
36
+ [![GitHub stars](https://img.shields.io/github/stars/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
37
+
38
+ [![GitHub forks](https://img.shields.io/github/forks/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/network)
39
+
40
+ Vectoriz is available on PyPI and can be installed via pip:
41
+
42
+ ```bash
43
+ pip install vectoriz
44
+ ```
45
+
28
46
  A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
29
47
 
30
48
  ## Overview
@@ -63,7 +81,8 @@ files_features = FilesFeature()
63
81
  argument = files_features.load_all_files_from_directory(directory_path)
64
82
 
65
83
  # Created FAISS index to be used in queries
66
- index = transformer.create_index(argument.text_list)
84
+ token_data = transformer.create_index(argument.text_list)
85
+ index = token_data.index
67
86
 
68
87
  # To load files from VectorDB use
69
88
  vector_client = VectorDBClient()
@@ -74,6 +93,12 @@ argument = vector_client.file_argument
74
93
  # To save data on VectorDB use
75
94
  vector_client = VectorDBClient(index, argument)
76
95
  vector_client.save_data(index_db_path, np_db_path)
96
+
97
+ # To search information on index
98
+ query = input(">>> ")
99
+ amoount_content = 1
100
+ response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
101
+ print(response)
77
102
  ```
78
103
 
79
104
  ## Contributing
File without changes
File without changes
File without changes
File without changes
File without changes