vectoriz 0.0.4__py3-none-any.whl → 0.1.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vectoriz/files.py CHANGED
@@ -2,6 +2,7 @@ import os
2
2
  import docx
3
3
  import numpy as np
4
4
  from typing import Optional
5
+
5
6
  from vectoriz.token_transformer import TokenTransformer
6
7
 
7
8
  class FileArgument:
@@ -73,15 +73,16 @@ class TokenTransformer:
73
73
  def search(
74
74
  self,
75
75
  query: str,
76
- data: TokenData,
76
+ index: faiss.IndexFlatL2,
77
+ texts: list[str],
77
78
  context_amount: int = 1,
78
79
  ) -> str:
79
80
  query_embedding = self._query_to_embeddings(query)
80
- _, I = data.index.search(query_embedding, k=context_amount)
81
+ _, I = index.search(query_embedding, k=context_amount)
81
82
  context = ""
82
83
 
83
84
  for i in I[0]:
84
- context += data.texts[i] + "\n"
85
+ context += texts[i] + "\n"
85
86
 
86
87
  return context.strip()
87
88
 
vectoriz/vector_db.py CHANGED
@@ -54,15 +54,6 @@ class VectorDBClient:
54
54
 
55
55
  class VectorDB:
56
56
 
57
- def __init__(self):
58
- """
59
- Constructor for the class.
60
-
61
- Initializes the following attributes:
62
- - transformer: A TokenTransformer instance for text transformation.
63
- """
64
- self.transformer = TokenTransformer()
65
-
66
57
  def load_saved_data(
67
58
  self, faiss_db_path: str, np_db_path: str
68
59
  ) -> Optional[VectorDBClient]:
@@ -158,13 +149,14 @@ class VectorDB:
158
149
  - 'chunk_names': The chunk names
159
150
  - 'texts': The text content
160
151
  """
152
+ transformer = TokenTransformer()
161
153
  np_db_path = np_db_path if np_db_path.endswith(".npz") else np_db_path + ".npz"
162
154
 
163
155
  embeddings_np: np.ndarray = None
164
156
  if argument.ndarray_data is not None:
165
157
  embeddings_np = argument.ndarray_data
166
158
  else:
167
- embeddings_np = self.transformer.get_np_vectors(argument.embeddings)
159
+ embeddings_np = transformer.get_np_vectors(argument.embeddings)
168
160
 
169
161
  np.savez(
170
162
  np_db_path,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: vectoriz
3
- Version: 0.0.4
3
+ Version: 0.1.0b0
4
4
  Summary: Python library for creating vectorized data from text or files.
5
5
  Home-page: https://github.com/PedroHenriqueDevBR/vectoriz
6
6
  Author: PedroHenriqueDevBR
@@ -25,6 +25,24 @@ Dynamic: summary
25
25
 
26
26
  # Vectoriz
27
27
 
28
+ [![PyPI version](https://badge.fury.io/py/vectoriz.svg)](https://pypi.org/project/vectoriz/)
29
+
30
+ [![GitHub license](https://img.shields.io/github/license/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
31
+
32
+ [![Python Version](https://img.shields.io/badge/python-3.12%2B-blue)](https://www.python.org/downloads/)
33
+
34
+ [![GitHub issues](https://img.shields.io/github/issues/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
35
+
36
+ [![GitHub stars](https://img.shields.io/github/stars/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
37
+
38
+ [![GitHub forks](https://img.shields.io/github/forks/PedroHenriqueDevBR/vectoriz)](https://github.com/PedroHenriqueDevBR/vectoriz/network)
39
+
40
+ Vectoriz is available on PyPI and can be installed via pip:
41
+
42
+ ```bash
43
+ pip install vectoriz
44
+ ```
45
+
28
46
  A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
29
47
 
30
48
  ## Overview
@@ -63,7 +81,8 @@ files_features = FilesFeature()
63
81
  argument = files_features.load_all_files_from_directory(directory_path)
64
82
 
65
83
  # Created FAISS index to be used in queries
66
- index = transformer.create_index(argument.text_list)
84
+ token_data = transformer.create_index(argument.text_list)
85
+ index = token_data.index
67
86
 
68
87
  # To load files from VectorDB use
69
88
  vector_client = VectorDBClient()
@@ -74,6 +93,12 @@ argument = vector_client.file_argument
74
93
  # To save data on VectorDB use
75
94
  vector_client = VectorDBClient(index, argument)
76
95
  vector_client.save_data(index_db_path, np_db_path)
96
+
97
+ # To search information on index
98
+ query = input(">>> ")
99
+ amoount_content = 1
100
+ response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
101
+ print(response)
77
102
  ```
78
103
 
79
104
  ## Contributing
@@ -0,0 +1,10 @@
1
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tests/test_files.py,sha256=jNIcwdDrPGKv78zGJReb6s0kPZLr3mTvkwwl3cP6MlM,1622
3
+ vectoriz/__init__.py,sha256=fnnle0EjVejiZQ8t243kvFiqcTTFh9dzmZbNwayjh4U,156
4
+ vectoriz/files.py,sha256=VxqRlHgUVUm2Afq7LX5YMvEX5T1XgQW8he2r4NY9WPQ,9041
5
+ vectoriz/token_transformer.py,sha256=1KIGL6EAiuqFhFzh3grkJcdEjjwTC3kH6RNqRkYMkQU,5811
6
+ vectoriz/vector_db.py,sha256=xxQjhfLiZ4inV5XHYHWcr-RRbH2BcpSCqTli9lOv_g8,6835
7
+ vectoriz-0.1.0b0.dist-info/METADATA,sha256=nohEcf7bCgvQDBM4fyhKfpLS7lLWAlWk9TRHX1TTR5o,3696
8
+ vectoriz-0.1.0b0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
9
+ vectoriz-0.1.0b0.dist-info/top_level.txt,sha256=Tcfk3kazBwJ_yySjjhlIhLoTWLQGSb5xV006X18O6Nk,15
10
+ vectoriz-0.1.0b0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- tests/test_files.py,sha256=jNIcwdDrPGKv78zGJReb6s0kPZLr3mTvkwwl3cP6MlM,1622
3
- vectoriz/__init__.py,sha256=fnnle0EjVejiZQ8t243kvFiqcTTFh9dzmZbNwayjh4U,156
4
- vectoriz/files.py,sha256=dbieWMdYYWWc6XPse3Al-VQnAJyXTj_Kmi31zUtUyHY,9040
5
- vectoriz/token_transformer.py,sha256=mfn3UI8i1x108iQ5dV_FyA-IhZoSSlXl7yO39s0m7C0,5786
6
- vectoriz/vector_db.py,sha256=_2dcq-9F9_tmu543Uw5-lKJiLA00QpaBebRBtwzecxI,7052
7
- vectoriz-0.0.4.dist-info/METADATA,sha256=fP_HtDi58xT-zn9VxLwjfAyM7oWyDgI7Kma1-mdEoz8,2603
8
- vectoriz-0.0.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
9
- vectoriz-0.0.4.dist-info/top_level.txt,sha256=Tcfk3kazBwJ_yySjjhlIhLoTWLQGSb5xV006X18O6Nk,15
10
- vectoriz-0.0.4.dist-info/RECORD,,