sunholo 0.114.2__py3-none-any.whl → 0.115.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/__init__.py +0 -2
- sunholo/chunker/message_data.py +1 -4
- sunholo/chunker/pdfs.py +1 -1
- sunholo/chunker/publish.py +1 -5
- sunholo/chunker/splitter.py +6 -0
- sunholo/cli/cli_init.py +3 -1
- sunholo/components/llm.py +1 -1
- sunholo/components/vectorstore.py +1 -1
- sunholo/embedder/embed_chunk.py +3 -0
- sunholo/senses/stream_voice.py +22 -7
- sunholo/streaming/content_buffer.py +6 -13
- sunholo/summarise/summarise.py +5 -5
- sunholo/types.py +52 -0
- sunholo/utils/config.py +4 -3
- sunholo/utils/config_class.py +20 -20
- sunholo/utils/gcp.py +0 -3
- sunholo/vertex/extensions_class.py +4 -4
- {sunholo-0.114.2.dist-info → sunholo-0.115.1.dist-info}/METADATA +16 -12
- {sunholo-0.114.2.dist-info → sunholo-0.115.1.dist-info}/RECORD +23 -26
- sunholo/patches/__init__.py +0 -0
- sunholo/patches/langchain/__init__.py +0 -0
- sunholo/patches/langchain/lancedb.py +0 -219
- sunholo/patches/langchain/vertexai.py +0 -506
- {sunholo-0.114.2.dist-info → sunholo-0.115.1.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.114.2.dist-info → sunholo-0.115.1.dist-info}/WHEEL +0 -0
- {sunholo-0.114.2.dist-info → sunholo-0.115.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.114.2.dist-info → sunholo-0.115.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
sunholo/__init__.py,sha256=
|
|
1
|
+
sunholo/__init__.py,sha256=Ap2yX2ITBVt_vkloYipUM8OwW14g6aor2NX7LWp0-mI,1133
|
|
2
2
|
sunholo/custom_logging.py,sha256=YfIN1oP3dOEkkYkyRBU8BGS3uJFGwUDsFCl8mIVbwvE,12225
|
|
3
|
+
sunholo/types.py,sha256=xdNNb4bR7O4jY9NTNzeeiX5VYjVxCGzYWy8hUXemxr0,1764
|
|
3
4
|
sunholo/agents/__init__.py,sha256=X2I3pPkGeKWjc3d0QgSpkTyqD8J8JtrEWqwrumf1MMc,391
|
|
4
5
|
sunholo/agents/chat_history.py,sha256=Gph_CdlP2otYnNdR1q1Umyyyvcad2F6K3LxU5yBQ9l0,5387
|
|
5
6
|
sunholo/agents/dispatch_to_qa.py,sha256=AwLS41oK6iS8xre-HuWjS4jj9dvU-evWI58EYfG65fg,8879
|
|
@@ -35,16 +36,16 @@ sunholo/chunker/doc_handling.py,sha256=t_lDazHfJbs4Q2Ruq2MvBBeJRfsjjQkzMxKuX8qQK
|
|
|
35
36
|
sunholo/chunker/encode_metadata.py,sha256=hxxd9KU35Xi0Z_EL8kt_oD66pKfBLhEjBImC16ew-Eo,1919
|
|
36
37
|
sunholo/chunker/images.py,sha256=id2PBu6XyGEOtgafq2v0c9_O6kxaC_pYFMnbsIitkSg,1868
|
|
37
38
|
sunholo/chunker/loaders.py,sha256=5NXrMxV-WdbFpxeLhFzccw0_zhf1UQ7yKFFeaMkc9Bc,11105
|
|
38
|
-
sunholo/chunker/message_data.py,sha256
|
|
39
|
-
sunholo/chunker/pdfs.py,sha256=
|
|
39
|
+
sunholo/chunker/message_data.py,sha256=iPrYUQRjjCGoVDvnqGEudkXi1PdbOwxey6v08_-GF2g,10826
|
|
40
|
+
sunholo/chunker/pdfs.py,sha256=DFZdsvAQt8qMvg7d-3dj29ULgKJZYXy4qGdORLmE790,2467
|
|
40
41
|
sunholo/chunker/process_chunker_data.py,sha256=uO-YOEHIjAOy0ZMJ0vea9OMNsQBISHfhbtgoyuHiP6s,3598
|
|
41
|
-
sunholo/chunker/publish.py,sha256=
|
|
42
|
+
sunholo/chunker/publish.py,sha256=Y-4w59iVGoXCKaun9jMWVR7Cqprb9mTZGiaVk5y_zrY,2933
|
|
42
43
|
sunholo/chunker/pubsub.py,sha256=48bhuAcszN7LGe3-ksPSLHHhq0uKxiXOrizck5qpcP0,1012
|
|
43
|
-
sunholo/chunker/splitter.py,sha256=
|
|
44
|
+
sunholo/chunker/splitter.py,sha256=RfekLPkjhCcNd1PFXIj_FxusJMJ8_3cyWl7bsYvtQ0g,7068
|
|
44
45
|
sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
46
|
sunholo/cli/chat_vac.py,sha256=sYPzUDwwwebJvIobv3GRW_xbQQ4BTy9G-WHdarGCHB0,23705
|
|
46
47
|
sunholo/cli/cli.py,sha256=Bhyrs8GEtJTbsvPYufEY184ra13eusATXAnJClJ_LGY,4474
|
|
47
|
-
sunholo/cli/cli_init.py,sha256=
|
|
48
|
+
sunholo/cli/cli_init.py,sha256=u6BZFtUyFMOKrXZ46-DfET0IpH3Tl2PlOz386rADtrw,8549
|
|
48
49
|
sunholo/cli/configs.py,sha256=QUM9DvKOdZmEQRM5uI3Nh887T0YDiSMr7O240zTLqws,4546
|
|
49
50
|
sunholo/cli/deploy.py,sha256=zxdwUsRTRMC8U5vyRv0JiKBLFn84Ug_Tc88-_h9hJSs,1609
|
|
50
51
|
sunholo/cli/embedder.py,sha256=v-FKiSPHaQzB6ctClclYueIf3bf3CqYtC1oRgPfT4dY,5566
|
|
@@ -54,9 +55,9 @@ sunholo/cli/sun_rich.py,sha256=UpMqeJ0C8i0pkue1AHnnyyX0bFJ9zZeJ7HBR6yhuA8A,54
|
|
|
54
55
|
sunholo/cli/swagger.py,sha256=absYKAU-7Yd2eiVNUY-g_WLl2zJfeRUNdWQ0oH8M_HM,1564
|
|
55
56
|
sunholo/cli/vertex.py,sha256=8130YCarxHL1UC3aqblNmUwGZTXbkdL4Y_FOnZJsWiI,2056
|
|
56
57
|
sunholo/components/__init__.py,sha256=IDoylb74zFKo6NIS3RQqUl0PDFBGVxM1dfUmO7OJ44U,176
|
|
57
|
-
sunholo/components/llm.py,sha256=
|
|
58
|
+
sunholo/components/llm.py,sha256=xIO_UPYwlpI9Bve76pQVhPgT1DLqCCo1626uKrvwFI4,13001
|
|
58
59
|
sunholo/components/retriever.py,sha256=Wmchv3huAM4w7DIS-a5Lp9Hi7M8pE6vZdxgseiT9SXk,7820
|
|
59
|
-
sunholo/components/vectorstore.py,sha256=
|
|
60
|
+
sunholo/components/vectorstore.py,sha256=k7GS1Y5c6ZGXSDAJvyCes6dTjhDAi0fjGbVLqpyfzBc,5918
|
|
60
61
|
sunholo/database/__init__.py,sha256=bpB5Nk21kwqYj-qdVnvNgXjLsbflnH4g-San7OHMqR4,283
|
|
61
62
|
sunholo/database/alloydb.py,sha256=x1zUMB-EVWbE2Zvp4nAs2Z-tB_kOZmS45H2lwVHdYnk,11678
|
|
62
63
|
sunholo/database/alloydb_client.py,sha256=q732tmRdSDutnUk7vRUPUPpi-yU5FK5rQko8co6yke0,19132
|
|
@@ -76,7 +77,7 @@ sunholo/discovery_engine/create_new.py,sha256=jWg5LW-QpFE8zq50ShaQJB3Wu8loiWB0P4
|
|
|
76
77
|
sunholo/discovery_engine/discovery_engine_client.py,sha256=jfIayVUOPM4svGF1S5Kk60rIG-xSo_e3zOHtBRg0nZA,22002
|
|
77
78
|
sunholo/discovery_engine/get_ai_search_chunks.py,sha256=hsFGOQugSeTMPEaQ16XTs_D45F8NABBm2IsAEdTk7kQ,4316
|
|
78
79
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
79
|
-
sunholo/embedder/embed_chunk.py,sha256=
|
|
80
|
+
sunholo/embedder/embed_chunk.py,sha256=Vvvj3-H4pSb1a2sLik3-X3X459j2jrUq1dBNAsOcQLo,7156
|
|
80
81
|
sunholo/excel/__init__.py,sha256=AqTMN9K4qJYi4maEgoORc5oxDVGO_eqmwzDaVP37JgY,56
|
|
81
82
|
sunholo/excel/plugin.py,sha256=rl3FoECZ6Ts8KKExPrbPwr3u3CegZfsevmcjgUXAlhE,4033
|
|
82
83
|
sunholo/gcs/__init__.py,sha256=SZvbsMFDko40sIRHTHppA37IijvJTae54vrhooEF5-4,90
|
|
@@ -106,10 +107,6 @@ sunholo/llamaindex/llamaindex_class.py,sha256=PnpPoc7LpP7xvKIXYu-UvI4ehj67pGhE1E
|
|
|
106
107
|
sunholo/llamaindex/user_history.py,sha256=ZtkecWuF9ORduyGB8kF8gP66bm9DdvCI-ZiK6Kt-cSE,2265
|
|
107
108
|
sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
109
|
sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
|
|
109
|
-
sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
|
-
sunholo/patches/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
111
|
-
sunholo/patches/langchain/lancedb.py,sha256=KstVpYtI2E1s6_l_kq6js8ruIPJduO6nnvAXbqjoqCc,7507
|
|
112
|
-
sunholo/patches/langchain/vertexai.py,sha256=kX1IvC2D1kMgM3SaSzP9HEosbD6CUymLJd7w9eXo3eE,17677
|
|
113
110
|
sunholo/pubsub/__init__.py,sha256=DfTEk4zmCfqn6gFxRrqDO0pOrvXTDqH-medpgYO4PGw,117
|
|
114
111
|
sunholo/pubsub/process_pubsub.py,sha256=rN2N4WM6PZkMKDrdT8pnEfTvsXACRyJFqIHJQCbuxLs,3088
|
|
115
112
|
sunholo/pubsub/pubsub_manager.py,sha256=19w_N0LiG-wgVWvgJ13b8BUeN8ZzgSPXAhPmL1HRRSI,6966
|
|
@@ -117,14 +114,14 @@ sunholo/qna/__init__.py,sha256=F8q1uR_HreoSX0IfmKY1qoSwIgXhO2Q8kuDSxh9_-EE,28
|
|
|
117
114
|
sunholo/qna/parsers.py,sha256=YpOaK5S_LxJ6FbliSYDc3AVOJ62RVduayoNnzi_p8CM,2494
|
|
118
115
|
sunholo/qna/retry.py,sha256=yMw7RTkw-RXCzfENPJOt8c32mXlpvOR589EGkvK-6yI,2028
|
|
119
116
|
sunholo/senses/__init__.py,sha256=fbWqVwwzkV5uRSb8lQzo4pn0ja_VYVWbUYapurSowBs,39
|
|
120
|
-
sunholo/senses/stream_voice.py,sha256=
|
|
117
|
+
sunholo/senses/stream_voice.py,sha256=_r5rFZ30upLKzxPmgS0SHKxx0AZbbirn0gZfGUHVHtY,18696
|
|
121
118
|
sunholo/streaming/__init__.py,sha256=MpbydI2UYo_adttPQFkxNM33b-QRyNEbrKJx0C2AGPc,241
|
|
122
|
-
sunholo/streaming/content_buffer.py,sha256=
|
|
119
|
+
sunholo/streaming/content_buffer.py,sha256=bqPta3Q1tXI88Ngyj1kgPC-v4phhGm1nZURcuqQSGIQ,12537
|
|
123
120
|
sunholo/streaming/langserve.py,sha256=hi7q8WY8DPKrALl9m_dOMxWOdE-iEuk7YW05SVDFIX8,6514
|
|
124
121
|
sunholo/streaming/stream_lookup.py,sha256=hYg1DbdSE_QNJ8ZB-ynXJlWgvFjrGvwoUsGJu_E0pRQ,360
|
|
125
122
|
sunholo/streaming/streaming.py,sha256=gSxLuwK-5-t5D1AjcHf838BY-L4jvdkdn_xePl-DK3o,16635
|
|
126
123
|
sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
|
|
127
|
-
sunholo/summarise/summarise.py,sha256=
|
|
124
|
+
sunholo/summarise/summarise.py,sha256=UnycBVLLEXK1HitCOG2zW3XIyxMrw47xoVf6e2OC9A0,4150
|
|
128
125
|
sunholo/terraform/__init__.py,sha256=yixxEltc3n9UpZaVi05GlgS-YRq_DVGjUc37I9ajeP4,76
|
|
129
126
|
sunholo/terraform/tfvars_editor.py,sha256=-TBBWbALYb5HLFYwD2s70Kp27ys6fzIyreBFOT5kqqY,13142
|
|
130
127
|
sunholo/tools/__init__.py,sha256=5NuYpwwTX81qGUWvgwfItoSLXteNnp7KjgD7IPZUFjI,53
|
|
@@ -132,10 +129,10 @@ sunholo/tools/web_browser.py,sha256=8Gdf02F4zCOeSnijnfaL6jzk4oaSI0cj48o-esoWzwE,
|
|
|
132
129
|
sunholo/utils/__init__.py,sha256=Hv02T5L2zYWvCso5hzzwm8FQogwBq0OgtUbN_7Quzqc,89
|
|
133
130
|
sunholo/utils/api_key.py,sha256=Ct4bIAQZxzPEw14hP586LpVxBAVi_W9Serpy0BK-7KI,244
|
|
134
131
|
sunholo/utils/big_context.py,sha256=gJIP7_ZL-YSLhOMq8jmFTMqH1wq8eB1NK7oKPeZAq2s,5578
|
|
135
|
-
sunholo/utils/config.py,sha256=
|
|
136
|
-
sunholo/utils/config_class.py,sha256=
|
|
132
|
+
sunholo/utils/config.py,sha256=bz0ODJyqnoHQIsk4pmNpVxxq5WvwS0SfOq4cnCjQPJk,9105
|
|
133
|
+
sunholo/utils/config_class.py,sha256=7xp9jKzxej2SyFmtc74nf_rYjdRhGFBtY2cV9-4JFGY,9795
|
|
137
134
|
sunholo/utils/config_schema.py,sha256=Wv-ncitzljOhgbDaq9qnFqH5LCuxNv59dTGDWgd1qdk,4189
|
|
138
|
-
sunholo/utils/gcp.py,sha256=
|
|
135
|
+
sunholo/utils/gcp.py,sha256=lus1HH8YhFInw6QRKwfvKZq-Lz-2KQg4ips9v1I_3zE,4783
|
|
139
136
|
sunholo/utils/gcp_project.py,sha256=Fa0IhCX12bZ1ctF_PKN8PNYd7hihEUfb90kilBfUDjg,1411
|
|
140
137
|
sunholo/utils/mime.py,sha256=7_J1PnWOlvAPRoHWKESAncdRVVldVwRdKvuDvi9sRfE,2020
|
|
141
138
|
sunholo/utils/parsers.py,sha256=wES0fRn3GONoymRXOXt-z62HCoOiUvvFXa-MfKfjCls,6421
|
|
@@ -144,15 +141,15 @@ sunholo/utils/user_ids.py,sha256=SQd5_H7FE7vcTZp9AQuQDWBXd4FEEd7TeVMQe1H4Ny8,292
|
|
|
144
141
|
sunholo/utils/version.py,sha256=P1QAJQdZfT2cMqdTSmXmcxrD2PssMPEGM-WI6083Fck,237
|
|
145
142
|
sunholo/vertex/__init__.py,sha256=tMd7ysJ1uwBjfFSn8JL0uS3-s6h_X4GAUBz8AArZEF0,339
|
|
146
143
|
sunholo/vertex/extensions_call.py,sha256=QeQbL3aAHlc4_-SynOzooZ_3xkQWAlcgNmFBSwLNtN8,13816
|
|
147
|
-
sunholo/vertex/extensions_class.py,sha256=
|
|
144
|
+
sunholo/vertex/extensions_class.py,sha256=DqUovqIJgDhXRjCThzaVWM56u0cZHV0NRC6135Tllso,21032
|
|
148
145
|
sunholo/vertex/genai_functions.py,sha256=2z6grM9H0Z79Yzx88l8mE1wXck3bRa0TWvnqZZ9ifDc,2051
|
|
149
146
|
sunholo/vertex/init.py,sha256=1OQwcPBKZYBTDPdyU7IM4X4OmiXLdsNV30C-fee2scQ,2875
|
|
150
147
|
sunholo/vertex/memory_tools.py,sha256=tBZxqVZ4InTmdBvLlOYwoSEWu4-kGquc-gxDwZCC4FA,7667
|
|
151
148
|
sunholo/vertex/safety.py,sha256=S9PgQT1O_BQAkcqauWncRJaydiP8Q_Jzmu9gxYfy1VA,2482
|
|
152
149
|
sunholo/vertex/type_dict_to_json.py,sha256=uTzL4o9tJRao4u-gJOFcACgWGkBOtqACmb6ihvCErL8,4694
|
|
153
|
-
sunholo-0.
|
|
154
|
-
sunholo-0.
|
|
155
|
-
sunholo-0.
|
|
156
|
-
sunholo-0.
|
|
157
|
-
sunholo-0.
|
|
158
|
-
sunholo-0.
|
|
150
|
+
sunholo-0.115.1.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
151
|
+
sunholo-0.115.1.dist-info/METADATA,sha256=FcfcUP6qrlqE6kyj6uUVWyWMfYvZ03zRyJGBkllThS4,9134
|
|
152
|
+
sunholo-0.115.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
153
|
+
sunholo-0.115.1.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
154
|
+
sunholo-0.115.1.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
155
|
+
sunholo-0.115.1.dist-info/RECORD,,
|
sunholo/patches/__init__.py
DELETED
|
File without changes
|
|
File without changes
|
|
@@ -1,219 +0,0 @@
|
|
|
1
|
-
# from https://github.com/langchain-ai/langchain/blob/6c18f73ca56bb72cb964aaa668c3f8ac14237619/libs/community/langchain_community/vectorstores/lancedb.py
|
|
2
|
-
from __future__ import annotations
|
|
3
|
-
|
|
4
|
-
import uuid, time
|
|
5
|
-
from typing import Any, Iterable, List, Optional
|
|
6
|
-
|
|
7
|
-
from langchain_core.documents import Document
|
|
8
|
-
from langchain_core.embeddings import Embeddings
|
|
9
|
-
from langchain_core.vectorstores import VectorStore
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class LanceDB(VectorStore):
|
|
13
|
-
"""`LanceDB` vector store.
|
|
14
|
-
|
|
15
|
-
To use, you should have ``lancedb`` python package installed.
|
|
16
|
-
|
|
17
|
-
Example:
|
|
18
|
-
.. code-block:: python
|
|
19
|
-
|
|
20
|
-
db = lancedb.connect('./lancedb')
|
|
21
|
-
table = db.open_table('my_table')
|
|
22
|
-
vectorstore = LanceDB(table, embedding_function)
|
|
23
|
-
vectorstore.add_texts(['text1', 'text2'])
|
|
24
|
-
result = vectorstore.similarity_search('text1')
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
def __init__(
|
|
28
|
-
self,
|
|
29
|
-
connection: Optional[Any] = None,
|
|
30
|
-
embedding: Optional[Embeddings] = None,
|
|
31
|
-
vector_key: Optional[str] = "vector",
|
|
32
|
-
id_key: Optional[str] = "id",
|
|
33
|
-
text_key: Optional[str] = "text",
|
|
34
|
-
table_name: Optional[str] = "vectorstore",
|
|
35
|
-
):
|
|
36
|
-
"""Initialize with Lance DB connection"""
|
|
37
|
-
try:
|
|
38
|
-
import lancedb
|
|
39
|
-
except ImportError:
|
|
40
|
-
raise ImportError(
|
|
41
|
-
"Could not import lancedb python package. "
|
|
42
|
-
"Please install it with `pip install lancedb`."
|
|
43
|
-
)
|
|
44
|
-
if not isinstance(connection, lancedb.db.LanceTable):
|
|
45
|
-
raise ValueError(
|
|
46
|
-
"connection should be an instance of lancedb.db.LanceTable, ",
|
|
47
|
-
f"got {type(connection)}",
|
|
48
|
-
)
|
|
49
|
-
self.lancedb = lancedb
|
|
50
|
-
self._embedding = embedding
|
|
51
|
-
self._vector_key = vector_key
|
|
52
|
-
self._id_key = id_key
|
|
53
|
-
self._text_key = text_key
|
|
54
|
-
self._table_name = table_name
|
|
55
|
-
|
|
56
|
-
if self._embedding is None:
|
|
57
|
-
raise ValueError("embedding should be provided")
|
|
58
|
-
|
|
59
|
-
if connection is not None:
|
|
60
|
-
if not isinstance(connection, lancedb.db.LanceTable):
|
|
61
|
-
raise ValueError(
|
|
62
|
-
"connection should be an instance of lancedb.db.LanceTable, ",
|
|
63
|
-
f"got {type(connection)}",
|
|
64
|
-
)
|
|
65
|
-
self._connection = connection
|
|
66
|
-
else:
|
|
67
|
-
self._connection = self._init_table()
|
|
68
|
-
|
|
69
|
-
@property
|
|
70
|
-
def embeddings(self) -> Embeddings:
|
|
71
|
-
return self._embedding
|
|
72
|
-
|
|
73
|
-
def add_texts(
|
|
74
|
-
self,
|
|
75
|
-
texts: Iterable[str],
|
|
76
|
-
metadatas: Optional[List[dict]] = None,
|
|
77
|
-
ids: Optional[List[str]] = None,
|
|
78
|
-
**kwargs: Any,
|
|
79
|
-
) -> List[str]:
|
|
80
|
-
"""Turn texts into embedding and add it to the database
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
texts: Iterable of strings to add to the vectorstore.
|
|
84
|
-
metadatas: Optional list of metadatas associated with the texts.
|
|
85
|
-
ids: Optional list of ids to associate with the texts.
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
List of ids of the added texts.
|
|
89
|
-
"""
|
|
90
|
-
# Embed texts and create documents
|
|
91
|
-
docs = []
|
|
92
|
-
ids = ids or [str(uuid.uuid4()) for _ in texts]
|
|
93
|
-
embeddings = self._embedding.embed_documents(list(texts))
|
|
94
|
-
for idx, text in enumerate(texts):
|
|
95
|
-
embedding = embeddings[idx]
|
|
96
|
-
metadata = metadatas[idx] if metadatas else {}
|
|
97
|
-
docs.append(
|
|
98
|
-
{
|
|
99
|
-
self._vector_key: embedding,
|
|
100
|
-
self._id_key: ids[idx],
|
|
101
|
-
self._text_key: text,
|
|
102
|
-
**metadata,
|
|
103
|
-
}
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
max_retries = 5
|
|
107
|
-
retry_delay = 1
|
|
108
|
-
for attempt in range(max_retries):
|
|
109
|
-
try:
|
|
110
|
-
self._connection.add(docs)
|
|
111
|
-
return ids # If success, return immediately
|
|
112
|
-
except OSError as e:
|
|
113
|
-
if "429 Too Many Requests" in str(e):
|
|
114
|
-
if attempt < max_retries - 1:
|
|
115
|
-
time.sleep(retry_delay)
|
|
116
|
-
retry_delay *= 2 # Exponential backoff
|
|
117
|
-
continue
|
|
118
|
-
else:
|
|
119
|
-
raise # Re-raise the exception if max retries are reached
|
|
120
|
-
else:
|
|
121
|
-
raise # Re-raise the exception if it's not a rate limit error
|
|
122
|
-
|
|
123
|
-
return ids
|
|
124
|
-
|
|
125
|
-
def similarity_search(self, query: str, k: int = 4, **kwargs: Any) -> List[Document]:
|
|
126
|
-
"""Return documents most similar to the query
|
|
127
|
-
|
|
128
|
-
Args:
|
|
129
|
-
query: String to query the vectorstore with.
|
|
130
|
-
k: Number of documents to return.
|
|
131
|
-
|
|
132
|
-
Returns:
|
|
133
|
-
List of documents most similar to the query.
|
|
134
|
-
"""
|
|
135
|
-
query_type = kwargs.get('query_type', None)
|
|
136
|
-
where_clause = kwargs.get('where', None)
|
|
137
|
-
|
|
138
|
-
if query_type == "hybrid":
|
|
139
|
-
# Hybrid search logic - requires table created with embedding function
|
|
140
|
-
# https://lancedb.github.io/lancedb/hybrid_search/hybrid_search/
|
|
141
|
-
search_query = self._connection.search(query, query_type="hybrid")
|
|
142
|
-
elif query_type == "text":
|
|
143
|
-
# requires import tantivy to be installed
|
|
144
|
-
try:
|
|
145
|
-
import tantivy # noqa: F401
|
|
146
|
-
except ImportError:
|
|
147
|
-
raise ImportError(
|
|
148
|
-
"Could not import tantivy python package. "
|
|
149
|
-
"Please install it with `pip install tantivy`."
|
|
150
|
-
)
|
|
151
|
-
search_query = self._connection.search(query)
|
|
152
|
-
else:
|
|
153
|
-
# Original search logic
|
|
154
|
-
embedding = self._embedding.embed_query(query)
|
|
155
|
-
search_query = self._connection.search(embedding, vector_column_name=self._vector_key)
|
|
156
|
-
|
|
157
|
-
if where_clause:
|
|
158
|
-
# Apply the where condition if specified
|
|
159
|
-
search_query = search_query.where(where_clause)
|
|
160
|
-
|
|
161
|
-
docs = search_query.limit(k).to_arrow()
|
|
162
|
-
|
|
163
|
-
columns = docs.schema.names
|
|
164
|
-
return [
|
|
165
|
-
Document(
|
|
166
|
-
page_content=docs[self._text_key][idx].as_py(),
|
|
167
|
-
metadata={
|
|
168
|
-
col: docs[col][idx].as_py()
|
|
169
|
-
for col in columns
|
|
170
|
-
if col != self._text_key
|
|
171
|
-
},
|
|
172
|
-
)
|
|
173
|
-
for idx in range(len(docs))
|
|
174
|
-
]
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
@classmethod
|
|
178
|
-
def from_texts(
|
|
179
|
-
cls,
|
|
180
|
-
texts: List[str],
|
|
181
|
-
embedding: Embeddings,
|
|
182
|
-
metadatas: Optional[List[dict]] = None,
|
|
183
|
-
connection: Any = None,
|
|
184
|
-
vector_key: Optional[str] = "vector",
|
|
185
|
-
id_key: Optional[str] = "id",
|
|
186
|
-
text_key: Optional[str] = "text",
|
|
187
|
-
**kwargs: Any,
|
|
188
|
-
) -> LanceDB:
|
|
189
|
-
instance = LanceDB(
|
|
190
|
-
connection,
|
|
191
|
-
embedding,
|
|
192
|
-
vector_key,
|
|
193
|
-
id_key,
|
|
194
|
-
text_key,
|
|
195
|
-
)
|
|
196
|
-
instance.add_texts(texts, metadatas=metadatas, **kwargs)
|
|
197
|
-
|
|
198
|
-
return instance
|
|
199
|
-
|
|
200
|
-
def _init_table(self) -> Any:
|
|
201
|
-
import pyarrow as pa
|
|
202
|
-
|
|
203
|
-
schema = pa.schema(
|
|
204
|
-
[
|
|
205
|
-
pa.field(
|
|
206
|
-
self._vector_key,
|
|
207
|
-
pa.list_(
|
|
208
|
-
pa.float32(),
|
|
209
|
-
len(self.embeddings.embed_query("test")), # type: ignore
|
|
210
|
-
),
|
|
211
|
-
),
|
|
212
|
-
pa.field(self._id_key, pa.string()),
|
|
213
|
-
pa.field(self._text_key, pa.string()),
|
|
214
|
-
]
|
|
215
|
-
)
|
|
216
|
-
db = self.lancedb.connect("/tmp/lancedb")
|
|
217
|
-
tbl = db.create_table(self._table_name, schema=schema, mode="overwrite")
|
|
218
|
-
return tbl
|
|
219
|
-
|