vectoriz 0.0.4__py3-none-any.whl → 0.1.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vectoriz/files.py +1 -0
- vectoriz/token_transformer.py +4 -3
- vectoriz/vector_db.py +2 -10
- {vectoriz-0.0.4.dist-info → vectoriz-0.1.0b0.dist-info}/METADATA +27 -2
- vectoriz-0.1.0b0.dist-info/RECORD +10 -0
- vectoriz-0.0.4.dist-info/RECORD +0 -10
- {vectoriz-0.0.4.dist-info → vectoriz-0.1.0b0.dist-info}/WHEEL +0 -0
- {vectoriz-0.0.4.dist-info → vectoriz-0.1.0b0.dist-info}/top_level.txt +0 -0
    
        vectoriz/files.py
    CHANGED
    
    
    
        vectoriz/token_transformer.py
    CHANGED
    
    | @@ -73,15 +73,16 @@ class TokenTransformer: | |
| 73 73 | 
             
                def search(
         | 
| 74 74 | 
             
                    self,
         | 
| 75 75 | 
             
                    query: str,
         | 
| 76 | 
            -
                     | 
| 76 | 
            +
                    index: faiss.IndexFlatL2,
         | 
| 77 | 
            +
                    texts: list[str],
         | 
| 77 78 | 
             
                    context_amount: int = 1,
         | 
| 78 79 | 
             
                ) -> str:
         | 
| 79 80 | 
             
                    query_embedding = self._query_to_embeddings(query)
         | 
| 80 | 
            -
                    _, I =  | 
| 81 | 
            +
                    _, I = index.search(query_embedding, k=context_amount)
         | 
| 81 82 | 
             
                    context = ""
         | 
| 82 83 |  | 
| 83 84 | 
             
                    for i in I[0]:
         | 
| 84 | 
            -
                        context +=  | 
| 85 | 
            +
                        context += texts[i] + "\n"
         | 
| 85 86 |  | 
| 86 87 | 
             
                    return context.strip()
         | 
| 87 88 |  | 
    
        vectoriz/vector_db.py
    CHANGED
    
    | @@ -54,15 +54,6 @@ class VectorDBClient: | |
| 54 54 |  | 
| 55 55 | 
             
            class VectorDB:
         | 
| 56 56 |  | 
| 57 | 
            -
                def __init__(self):
         | 
| 58 | 
            -
                    """
         | 
| 59 | 
            -
                    Constructor for the class.
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                    Initializes the following attributes:
         | 
| 62 | 
            -
                    - transformer: A TokenTransformer instance for text transformation.
         | 
| 63 | 
            -
                    """
         | 
| 64 | 
            -
                    self.transformer = TokenTransformer()
         | 
| 65 | 
            -
             | 
| 66 57 | 
             
                def load_saved_data(
         | 
| 67 58 | 
             
                    self, faiss_db_path: str, np_db_path: str
         | 
| 68 59 | 
             
                ) -> Optional[VectorDBClient]:
         | 
| @@ -158,13 +149,14 @@ class VectorDB: | |
| 158 149 | 
             
                        - 'chunk_names': The chunk names
         | 
| 159 150 | 
             
                        - 'texts': The text content
         | 
| 160 151 | 
             
                    """
         | 
| 152 | 
            +
                    transformer = TokenTransformer()
         | 
| 161 153 | 
             
                    np_db_path = np_db_path if np_db_path.endswith(".npz") else np_db_path + ".npz"
         | 
| 162 154 |  | 
| 163 155 | 
             
                    embeddings_np: np.ndarray = None
         | 
| 164 156 | 
             
                    if argument.ndarray_data is not None:
         | 
| 165 157 | 
             
                        embeddings_np = argument.ndarray_data
         | 
| 166 158 | 
             
                    else:
         | 
| 167 | 
            -
                        embeddings_np =  | 
| 159 | 
            +
                        embeddings_np = transformer.get_np_vectors(argument.embeddings)
         | 
| 168 160 |  | 
| 169 161 | 
             
                    np.savez(
         | 
| 170 162 | 
             
                        np_db_path,
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Metadata-Version: 2.4
         | 
| 2 2 | 
             
            Name: vectoriz
         | 
| 3 | 
            -
            Version: 0. | 
| 3 | 
            +
            Version: 0.1.0b0
         | 
| 4 4 | 
             
            Summary: Python library for creating vectorized data from text or files.
         | 
| 5 5 | 
             
            Home-page: https://github.com/PedroHenriqueDevBR/vectoriz
         | 
| 6 6 | 
             
            Author: PedroHenriqueDevBR
         | 
| @@ -25,6 +25,24 @@ Dynamic: summary | |
| 25 25 |  | 
| 26 26 | 
             
            # Vectoriz
         | 
| 27 27 |  | 
| 28 | 
            +
            [](https://pypi.org/project/vectoriz/)
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            [](https://github.com/PedroHenriqueDevBR/vectoriz/blob/main/LICENSE)
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            [](https://www.python.org/downloads/)
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            [](https://github.com/PedroHenriqueDevBR/vectoriz/issues)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            [](https://github.com/PedroHenriqueDevBR/vectoriz/stargazers)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            [](https://github.com/PedroHenriqueDevBR/vectoriz/network)
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            Vectoriz is available on PyPI and can be installed via pip:
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            ```bash
         | 
| 43 | 
            +
            pip install vectoriz
         | 
| 44 | 
            +
            ```
         | 
| 45 | 
            +
             | 
| 28 46 | 
             
            A tool for generating vector embeddings for Retrieval-Augmented Generation (RAG) applications.
         | 
| 29 47 |  | 
| 30 48 | 
             
            ## Overview
         | 
| @@ -63,7 +81,8 @@ files_features = FilesFeature() | |
| 63 81 | 
             
            argument = files_features.load_all_files_from_directory(directory_path)
         | 
| 64 82 |  | 
| 65 83 | 
             
            # Created FAISS index to be used in queries
         | 
| 66 | 
            -
             | 
| 84 | 
            +
            token_data = transformer.create_index(argument.text_list)
         | 
| 85 | 
            +
            index = token_data.index
         | 
| 67 86 |  | 
| 68 87 | 
             
            # To load files from VectorDB use
         | 
| 69 88 | 
             
            vector_client = VectorDBClient()
         | 
| @@ -74,6 +93,12 @@ argument = vector_client.file_argument | |
| 74 93 | 
             
            # To save data on VectorDB use
         | 
| 75 94 | 
             
            vector_client = VectorDBClient(index, argument)
         | 
| 76 95 | 
             
            vector_client.save_data(index_db_path, np_db_path)
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            # To search information on index
         | 
| 98 | 
            +
            query = input(">>> ")
         | 
| 99 | 
            +
            amoount_content = 1
         | 
| 100 | 
            +
            response = self.transformer.search(query, self.index, self.argument.text_list, amoount_content)
         | 
| 101 | 
            +
            print(response)
         | 
| 77 102 | 
             
            ```
         | 
| 78 103 |  | 
| 79 104 | 
             
            ## Contributing
         | 
| @@ -0,0 +1,10 @@ | |
| 1 | 
            +
            tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 2 | 
            +
            tests/test_files.py,sha256=jNIcwdDrPGKv78zGJReb6s0kPZLr3mTvkwwl3cP6MlM,1622
         | 
| 3 | 
            +
            vectoriz/__init__.py,sha256=fnnle0EjVejiZQ8t243kvFiqcTTFh9dzmZbNwayjh4U,156
         | 
| 4 | 
            +
            vectoriz/files.py,sha256=VxqRlHgUVUm2Afq7LX5YMvEX5T1XgQW8he2r4NY9WPQ,9041
         | 
| 5 | 
            +
            vectoriz/token_transformer.py,sha256=1KIGL6EAiuqFhFzh3grkJcdEjjwTC3kH6RNqRkYMkQU,5811
         | 
| 6 | 
            +
            vectoriz/vector_db.py,sha256=xxQjhfLiZ4inV5XHYHWcr-RRbH2BcpSCqTli9lOv_g8,6835
         | 
| 7 | 
            +
            vectoriz-0.1.0b0.dist-info/METADATA,sha256=nohEcf7bCgvQDBM4fyhKfpLS7lLWAlWk9TRHX1TTR5o,3696
         | 
| 8 | 
            +
            vectoriz-0.1.0b0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
         | 
| 9 | 
            +
            vectoriz-0.1.0b0.dist-info/top_level.txt,sha256=Tcfk3kazBwJ_yySjjhlIhLoTWLQGSb5xV006X18O6Nk,15
         | 
| 10 | 
            +
            vectoriz-0.1.0b0.dist-info/RECORD,,
         | 
    
        vectoriz-0.0.4.dist-info/RECORD
    DELETED
    
    | @@ -1,10 +0,0 @@ | |
| 1 | 
            -
            tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 2 | 
            -
            tests/test_files.py,sha256=jNIcwdDrPGKv78zGJReb6s0kPZLr3mTvkwwl3cP6MlM,1622
         | 
| 3 | 
            -
            vectoriz/__init__.py,sha256=fnnle0EjVejiZQ8t243kvFiqcTTFh9dzmZbNwayjh4U,156
         | 
| 4 | 
            -
            vectoriz/files.py,sha256=dbieWMdYYWWc6XPse3Al-VQnAJyXTj_Kmi31zUtUyHY,9040
         | 
| 5 | 
            -
            vectoriz/token_transformer.py,sha256=mfn3UI8i1x108iQ5dV_FyA-IhZoSSlXl7yO39s0m7C0,5786
         | 
| 6 | 
            -
            vectoriz/vector_db.py,sha256=_2dcq-9F9_tmu543Uw5-lKJiLA00QpaBebRBtwzecxI,7052
         | 
| 7 | 
            -
            vectoriz-0.0.4.dist-info/METADATA,sha256=fP_HtDi58xT-zn9VxLwjfAyM7oWyDgI7Kma1-mdEoz8,2603
         | 
| 8 | 
            -
            vectoriz-0.0.4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
         | 
| 9 | 
            -
            vectoriz-0.0.4.dist-info/top_level.txt,sha256=Tcfk3kazBwJ_yySjjhlIhLoTWLQGSb5xV006X18O6Nk,15
         | 
| 10 | 
            -
            vectoriz-0.0.4.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         |