sunholo 0.78.5__py3-none-any.whl → 0.79.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/agents/chat_history.py +1 -1
- sunholo/agents/dispatch_to_qa.py +1 -1
- sunholo/agents/fastapi/base.py +1 -1
- sunholo/agents/fastapi/qna_routes.py +1 -1
- sunholo/agents/flask/qna_routes.py +1 -1
- sunholo/agents/flask/vac_routes.py +1 -1
- sunholo/agents/langserve.py +1 -1
- sunholo/agents/pubsub.py +1 -1
- sunholo/agents/route.py +1 -1
- sunholo/agents/special_commands.py +1 -1
- sunholo/agents/swagger.py +1 -1
- sunholo/archive/archive.py +1 -1
- sunholo/auth/refresh.py +1 -1
- sunholo/auth/run.py +1 -1
- sunholo/azure/auth.py +1 -1
- sunholo/azure/blobs.py +1 -1
- sunholo/azure/event_grid.py +1 -1
- sunholo/bots/discord.py +1 -1
- sunholo/chunker/azure.py +1 -1
- sunholo/chunker/doc_handling.py +5 -1
- sunholo/chunker/encode_metadata.py +1 -1
- sunholo/chunker/images.py +1 -1
- sunholo/chunker/loaders.py +1 -1
- sunholo/chunker/message_data.py +1 -1
- sunholo/chunker/pdfs.py +1 -1
- sunholo/chunker/process_chunker_data.py +1 -1
- sunholo/chunker/publish.py +1 -1
- sunholo/chunker/pubsub.py +1 -1
- sunholo/chunker/splitter.py +1 -1
- sunholo/cli/chat_vac.py +1 -1
- sunholo/cli/cli.py +4 -1
- sunholo/components/llm.py +1 -1
- sunholo/components/retriever.py +18 -12
- sunholo/components/vectorstore.py +13 -2
- sunholo/database/alloydb.py +41 -8
- sunholo/database/alloydb_client.py +1 -1
- sunholo/database/database.py +1 -1
- sunholo/database/lancedb.py +1 -1
- sunholo/database/static_dbs.py +1 -1
- sunholo/discovery_engine/chunker_handler.py +1 -1
- sunholo/discovery_engine/discovery_engine_client.py +1 -1
- sunholo/embedder/embed_chunk.py +1 -1
- sunholo/gcs/add_file.py +1 -1
- sunholo/gcs/download_folder.py +1 -1
- sunholo/gcs/download_url.py +1 -1
- sunholo/gcs/metadata.py +1 -1
- sunholo/invoke/direct_vac_func.py +1 -1
- sunholo/invoke/invoke_vac_utils.py +1 -1
- sunholo/langfuse/callback.py +1 -1
- sunholo/langfuse/prompts.py +1 -1
- sunholo/llamaindex/__init__.py +1 -0
- sunholo/llamaindex/import_files.py +24 -17
- sunholo/llamaindex/llamaindex_class.py +192 -0
- sunholo/pubsub/process_pubsub.py +1 -1
- sunholo/pubsub/pubsub_manager.py +1 -1
- sunholo/qna/parsers.py +1 -1
- sunholo/qna/retry.py +1 -1
- sunholo/streaming/content_buffer.py +1 -1
- sunholo/streaming/langserve.py +1 -1
- sunholo/streaming/stream_lookup.py +1 -1
- sunholo/streaming/streaming.py +1 -1
- sunholo/summarise/summarise.py +1 -1
- sunholo/tools/web_browser.py +1 -1
- sunholo/utils/config.py +5 -5
- sunholo/utils/config_class.py +8 -4
- sunholo/vertex/extensions_call.py +1 -1
- sunholo/vertex/extensions_class.py +1 -1
- sunholo/vertex/genai_functions.py +1 -1
- sunholo/vertex/init.py +1 -1
- sunholo/vertex/memory_tools.py +29 -14
- {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/METADATA +2 -2
- sunholo-0.79.1.dist-info/RECORD +140 -0
- {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/WHEEL +1 -1
- sunholo/llamaindex/generate.py +0 -22
- sunholo-0.78.5.dist-info/RECORD +0 -140
- /sunholo/{logging.py → custom_logging.py} +0 -0
- {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/entry_points.txt +0 -0
- {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/top_level.txt +0 -0
sunholo/agents/chat_history.py
CHANGED
sunholo/agents/dispatch_to_qa.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
from ..utils import ConfigManager
|
|
16
16
|
from ..auth import get_header
|
|
17
17
|
import requests
|
sunholo/agents/fastapi/base.py
CHANGED
|
@@ -15,7 +15,7 @@ from ...agents import extract_chat_history, handle_special_commands
|
|
|
15
15
|
from ...qna.parsers import parse_output
|
|
16
16
|
from ...streaming import start_streaming_chat_async
|
|
17
17
|
from ...archive import archive_qa
|
|
18
|
-
from ...
|
|
18
|
+
from ...custom_logging import log
|
|
19
19
|
|
|
20
20
|
class VACRequest(BaseModel):
|
|
21
21
|
user_input: str
|
|
@@ -21,7 +21,7 @@ from ...agents import extract_chat_history, handle_special_commands
|
|
|
21
21
|
from ...qna.parsers import parse_output
|
|
22
22
|
from ...streaming import start_streaming_chat
|
|
23
23
|
from ...archive import archive_qa
|
|
24
|
-
from ...
|
|
24
|
+
from ...custom_logging import log
|
|
25
25
|
from ...utils.config import load_config
|
|
26
26
|
from ...utils.version import sunholo_version
|
|
27
27
|
import os
|
|
@@ -7,7 +7,7 @@ from ...agents import extract_chat_history, handle_special_commands
|
|
|
7
7
|
from ...qna.parsers import parse_output
|
|
8
8
|
from ...streaming import start_streaming_chat
|
|
9
9
|
from ...archive import archive_qa
|
|
10
|
-
from ...
|
|
10
|
+
from ...custom_logging import log
|
|
11
11
|
from ...utils.config import load_config
|
|
12
12
|
from ...utils.version import sunholo_version
|
|
13
13
|
import os
|
sunholo/agents/langserve.py
CHANGED
sunholo/agents/pubsub.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
import base64
|
sunholo/agents/route.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
from ..utils import load_config, ConfigManager
|
|
16
16
|
|
|
17
17
|
def read_cloud_run_url(agent, cloud_run_urls_file='config/cloud_run_urls.json'):
|
|
@@ -26,7 +26,7 @@ from ..utils.parsers import contains_url, extract_urls
|
|
|
26
26
|
from ..chunker.publish import publish_text
|
|
27
27
|
from ..gcs.add_file import add_file_to_gcs
|
|
28
28
|
from ..utils.config import load_config_key
|
|
29
|
-
from ..
|
|
29
|
+
from ..custom_logging import log
|
|
30
30
|
|
|
31
31
|
# config file?
|
|
32
32
|
command_descriptions = {
|
sunholo/agents/swagger.py
CHANGED
sunholo/archive/archive.py
CHANGED
sunholo/auth/refresh.py
CHANGED
sunholo/auth/run.py
CHANGED
|
@@ -6,7 +6,7 @@ from ..utils.config import load_config
|
|
|
6
6
|
from ..utils import ConfigManager
|
|
7
7
|
from ..utils.gcp import is_running_on_cloudrun
|
|
8
8
|
from ..utils.api_key import has_multivac_api_key, get_multivac_api_key
|
|
9
|
-
from ..
|
|
9
|
+
from ..custom_logging import log
|
|
10
10
|
from ..agents.route import route_vac
|
|
11
11
|
from .gcloud import get_local_gcloud_token
|
|
12
12
|
|
sunholo/azure/auth.py
CHANGED
sunholo/azure/blobs.py
CHANGED
sunholo/azure/event_grid.py
CHANGED
sunholo/bots/discord.py
CHANGED
sunholo/chunker/azure.py
CHANGED
sunholo/chunker/doc_handling.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from ..utils import load_config_key
|
|
2
|
-
from ..
|
|
2
|
+
from ..custom_logging import log
|
|
3
3
|
from ..database.alloydb import add_document_if_not_exists
|
|
4
4
|
from ..database.uuid import generate_uuid_from_object_id
|
|
5
5
|
from ..components.llm import llm_str_to_llm
|
|
@@ -175,6 +175,10 @@ Be careful not to add any speculation or any details that are not covered in the
|
|
|
175
175
|
bucket_name = os.getenv("DOC_BUCKET")
|
|
176
176
|
if not bucket_name:
|
|
177
177
|
raise ValueError("No DOC_BUCKET configured for summary")
|
|
178
|
+
|
|
179
|
+
if bucket_name.startswith("gs://"):
|
|
180
|
+
bucket_name = bucket_name[len("gs://"):]
|
|
181
|
+
|
|
178
182
|
with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
|
|
179
183
|
temp_file.write(summary)
|
|
180
184
|
temp_file.flush()
|
sunholo/chunker/images.py
CHANGED
sunholo/chunker/loaders.py
CHANGED
|
@@ -18,7 +18,7 @@ from langchain_community.document_loaders import UnstructuredURLLoader
|
|
|
18
18
|
from langchain_community.document_loaders import GitLoader
|
|
19
19
|
from langchain_community.document_loaders import GoogleDriveLoader
|
|
20
20
|
|
|
21
|
-
from ..
|
|
21
|
+
from ..custom_logging import log
|
|
22
22
|
from .pdfs import read_pdf_file
|
|
23
23
|
from ..utils.config import load_config
|
|
24
24
|
|
sunholo/chunker/message_data.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
|
|
16
16
|
import pathlib
|
|
17
17
|
import tempfile
|
sunholo/chunker/pdfs.py
CHANGED
|
@@ -16,7 +16,7 @@ from .publish import process_docs_chunks_vector_name
|
|
|
16
16
|
from .splitter import chunk_doc_to_docs
|
|
17
17
|
from ..azure.blobs import is_azure_blob
|
|
18
18
|
|
|
19
|
-
from ..
|
|
19
|
+
from ..custom_logging import log
|
|
20
20
|
|
|
21
21
|
def process_chunker_data(message_data, metadata, vector_name):
|
|
22
22
|
|
sunholo/chunker/publish.py
CHANGED
sunholo/chunker/pubsub.py
CHANGED
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
from ..
|
|
15
|
+
from ..custom_logging import log
|
|
16
16
|
from ..pubsub import process_pubsub_message
|
|
17
17
|
from .process_chunker_data import process_chunker_data
|
|
18
18
|
|
sunholo/chunker/splitter.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
from ..utils.parsers import remove_whitespace
|
|
16
16
|
from langchain.schema import Document
|
|
17
17
|
import langchain.text_splitter as text_splitter
|
sunholo/cli/chat_vac.py
CHANGED
|
@@ -3,7 +3,7 @@ from ..streaming import generate_proxy_stream, can_agent_stream
|
|
|
3
3
|
from ..utils.user_ids import generate_user_id
|
|
4
4
|
from ..utils import ConfigManager
|
|
5
5
|
from ..utils.api_key import has_multivac_api_key
|
|
6
|
-
from ..
|
|
6
|
+
from ..custom_logging import log
|
|
7
7
|
from ..qna.parsers import parse_output
|
|
8
8
|
from ..gcs.add_file import add_file_to_gcs
|
|
9
9
|
from .run_proxy import clean_proxy_list, start_proxy, stop_proxy
|
sunholo/cli/cli.py
CHANGED
|
@@ -10,10 +10,11 @@ from .chat_vac import setup_vac_subparser
|
|
|
10
10
|
from .embedder import setup_embedder_subparser
|
|
11
11
|
from .swagger import setup_swagger_subparser
|
|
12
12
|
from .vertex import setup_vertex_subparser
|
|
13
|
+
from ..llamaindex import setup_llamaindex_subparser
|
|
13
14
|
|
|
14
15
|
from ..utils import ConfigManager
|
|
15
16
|
|
|
16
|
-
from ..
|
|
17
|
+
from ..custom_logging import log
|
|
17
18
|
|
|
18
19
|
from .sun_rich import console
|
|
19
20
|
import sys
|
|
@@ -87,6 +88,8 @@ def main(args=None):
|
|
|
87
88
|
setup_swagger_subparser(subparsers)
|
|
88
89
|
# vertex
|
|
89
90
|
setup_vertex_subparser(subparsers)
|
|
91
|
+
# llamaindex
|
|
92
|
+
setup_llamaindex_subparser(subparsers)
|
|
90
93
|
|
|
91
94
|
#TODO: add database setup commands: alloydb and supabase
|
|
92
95
|
|
sunholo/components/llm.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
from ..utils import load_config_key, ConfigManager
|
|
16
16
|
|
|
17
17
|
import os
|
sunholo/components/retriever.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
from .vectorstore import pick_vectorstore
|
|
16
16
|
from ..utils import load_config_key, ConfigManager
|
|
17
17
|
from .llm import get_embeddings
|
|
@@ -41,9 +41,14 @@ def load_memories(vector_name:str=None, config:ConfigManager=None):
|
|
|
41
41
|
|
|
42
42
|
return memories
|
|
43
43
|
|
|
44
|
-
def pick_retriever(vector_name, embeddings=None):
|
|
44
|
+
def pick_retriever(vector_name:str=None, config:ConfigManager=None, embeddings=None):
|
|
45
45
|
|
|
46
|
-
|
|
46
|
+
if config is None:
|
|
47
|
+
if vector_name is None:
|
|
48
|
+
raise ValueError("vector_name and config were none")
|
|
49
|
+
config = ConfigManager(vector_name)
|
|
50
|
+
|
|
51
|
+
memories = load_memories(config=config)
|
|
47
52
|
|
|
48
53
|
retriever_list = []
|
|
49
54
|
for memory in memories: # Iterate over the list
|
|
@@ -58,7 +63,7 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
58
63
|
log.info(f"Skipped from_metadata_id for {vectorstore}")
|
|
59
64
|
continue
|
|
60
65
|
|
|
61
|
-
embeddings = embeddings or get_embeddings(
|
|
66
|
+
embeddings = embeddings or get_embeddings(config=config)
|
|
62
67
|
read_only = value.get('read_only')
|
|
63
68
|
try:
|
|
64
69
|
vectorstore = pick_vectorstore(vectorstore,
|
|
@@ -93,15 +98,15 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
93
98
|
log.info(f"No retrievers were created for {memories}")
|
|
94
99
|
return None
|
|
95
100
|
|
|
96
|
-
retriever = process_retrieval(retriever_list,
|
|
101
|
+
retriever = process_retrieval(retriever_list, config=config)
|
|
97
102
|
|
|
98
103
|
return retriever
|
|
99
104
|
|
|
100
|
-
def metadata_retriever(metadata: dict, key: str,
|
|
105
|
+
def metadata_retriever(metadata: dict, key: str, config:ConfigManager, embeddings=None):
|
|
101
106
|
"""
|
|
102
107
|
Decides which vector_name to retrieve from metadata passed
|
|
103
108
|
"""
|
|
104
|
-
memories = load_memories(
|
|
109
|
+
memories = load_memories(config=config)
|
|
105
110
|
|
|
106
111
|
retriever_list = []
|
|
107
112
|
for memory in memories: # Iterate over the list
|
|
@@ -118,7 +123,7 @@ def metadata_retriever(metadata: dict, key: str, vector_name:str, embeddings=Non
|
|
|
118
123
|
raise ValueError(f"Missing {key} in {metadata}")
|
|
119
124
|
the_id = metadata[key]
|
|
120
125
|
read_only = value.get('read_only')
|
|
121
|
-
embeddings = embeddings or get_embeddings(
|
|
126
|
+
embeddings = embeddings or get_embeddings(config=config)
|
|
122
127
|
vectorstore = pick_vectorstore(vectorstore,
|
|
123
128
|
vector_name=the_id,
|
|
124
129
|
embeddings=embeddings,
|
|
@@ -133,17 +138,18 @@ def metadata_retriever(metadata: dict, key: str, vector_name:str, embeddings=Non
|
|
|
133
138
|
log.info(f"No retrievers were created for {memories}")
|
|
134
139
|
return None
|
|
135
140
|
|
|
136
|
-
retriever = process_retrieval(retriever_list,
|
|
141
|
+
retriever = process_retrieval(retriever_list, config=config)
|
|
137
142
|
|
|
138
143
|
return retriever
|
|
139
144
|
|
|
140
145
|
|
|
141
146
|
|
|
142
|
-
def process_retrieval(retriever_list: list,
|
|
143
|
-
k_override =
|
|
147
|
+
def process_retrieval(retriever_list: list, config: ConfigManager):
|
|
148
|
+
k_override = config.vacConfig('memory_k')
|
|
144
149
|
lotr = MergerRetriever(retrievers=retriever_list)
|
|
145
150
|
|
|
146
|
-
filter_embeddings = get_embeddings(
|
|
151
|
+
filter_embeddings = get_embeddings(config=config)
|
|
152
|
+
|
|
147
153
|
filter = EmbeddingsRedundantFilter(embeddings=filter_embeddings)
|
|
148
154
|
pipeline = DocumentCompressorPipeline(transformers=[filter])
|
|
149
155
|
retriever = ContextualCompressionRetriever(
|
|
@@ -12,10 +12,21 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
|
-
from ..
|
|
15
|
+
from ..custom_logging import log
|
|
16
|
+
from ..utils import ConfigManager
|
|
16
17
|
|
|
17
|
-
def pick_vectorstore(vs_str: str, vector_name: str, embeddings, read_only=None):
|
|
18
|
+
def pick_vectorstore(vs_str: str, vector_name: str=None, embeddings=None, config:ConfigManager=None, read_only=None):
|
|
18
19
|
log.debug('Picking vectorstore')
|
|
20
|
+
|
|
21
|
+
if not embeddings:
|
|
22
|
+
raise ValueError("Requires embeddings")
|
|
23
|
+
|
|
24
|
+
if config is None:
|
|
25
|
+
if vector_name is None:
|
|
26
|
+
raise ValueError("vector_name and config were none")
|
|
27
|
+
config = ConfigManager(vector_name)
|
|
28
|
+
|
|
29
|
+
vector_name = config.vector_name
|
|
19
30
|
|
|
20
31
|
if vs_str == 'supabase':
|
|
21
32
|
from supabase import Client, create_client
|
sunholo/database/alloydb.py
CHANGED
|
@@ -11,7 +11,7 @@ except ImportError:
|
|
|
11
11
|
from .database import get_vector_size
|
|
12
12
|
from .alloydb_client import AlloyDBClient
|
|
13
13
|
|
|
14
|
-
from ..
|
|
14
|
+
from ..custom_logging import log
|
|
15
15
|
from ..utils.config import load_config_key
|
|
16
16
|
|
|
17
17
|
|
|
@@ -232,7 +232,7 @@ async def load_alloydb_sql_async(sql, vector_name):
|
|
|
232
232
|
return documents
|
|
233
233
|
|
|
234
234
|
def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
|
|
235
|
-
unique_sources = set(sources)
|
|
235
|
+
unique_sources = set(sources.split())
|
|
236
236
|
# Choose the delimiter based on the search_type argument
|
|
237
237
|
delimiter = ' AND ' if search_type.upper() == "AND" else ' OR '
|
|
238
238
|
|
|
@@ -240,14 +240,14 @@ def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
|
|
|
240
240
|
conditions = delimiter.join(f"TRIM(source) {operator} '%{source}%'" for source in unique_sources)
|
|
241
241
|
if not conditions:
|
|
242
242
|
log.warning("Alloydb doc query found no like_patterns")
|
|
243
|
-
return
|
|
243
|
+
return ""
|
|
244
244
|
|
|
245
245
|
return conditions
|
|
246
246
|
|
|
247
247
|
def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
|
|
248
248
|
if not sources:
|
|
249
249
|
log.warning("No sources found for alloydb fetch")
|
|
250
|
-
return
|
|
250
|
+
return ""
|
|
251
251
|
|
|
252
252
|
table_name = f"{vector_name}_docstore"
|
|
253
253
|
|
|
@@ -263,10 +263,37 @@ def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
|
|
|
263
263
|
|
|
264
264
|
return query
|
|
265
265
|
|
|
266
|
+
def _list_sources_from_docstore(sources, vector_name, search_type="OR"):
|
|
267
|
+
table_name = f"{vector_name}_docstore"
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
if sources:
|
|
271
|
+
conditions = and_or_ilike(sources, search_type=search_type)
|
|
272
|
+
query = f"""
|
|
273
|
+
SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
|
|
274
|
+
FROM {table_name}
|
|
275
|
+
WHERE {conditions}
|
|
276
|
+
ORDER BY langchain_metadata->>'objectId' ASC
|
|
277
|
+
LIMIT 500;
|
|
278
|
+
"""
|
|
279
|
+
else:
|
|
280
|
+
query = f"""
|
|
281
|
+
SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
|
|
282
|
+
FROM {table_name}
|
|
283
|
+
ORDER BY langchain_metadata->>'objectId' ASC
|
|
284
|
+
LIMIT 500;
|
|
285
|
+
"""
|
|
286
|
+
|
|
287
|
+
return query
|
|
266
288
|
|
|
267
|
-
|
|
289
|
+
|
|
290
|
+
async def get_sources_from_docstore_async(sources, vector_name, search_type="OR", just_source_name=False):
|
|
268
291
|
|
|
269
|
-
|
|
292
|
+
if just_source_name:
|
|
293
|
+
query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
|
|
294
|
+
else:
|
|
295
|
+
query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
|
|
296
|
+
|
|
270
297
|
if not query:
|
|
271
298
|
return []
|
|
272
299
|
|
|
@@ -274,9 +301,13 @@ async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"
|
|
|
274
301
|
|
|
275
302
|
return documents
|
|
276
303
|
|
|
277
|
-
def get_sources_from_docstore(sources, vector_name, search_type="OR"):
|
|
304
|
+
def get_sources_from_docstore(sources, vector_name, search_type="OR", just_source_name=False):
|
|
278
305
|
|
|
279
|
-
|
|
306
|
+
if just_source_name:
|
|
307
|
+
query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
|
|
308
|
+
else:
|
|
309
|
+
query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
|
|
310
|
+
|
|
280
311
|
if not query:
|
|
281
312
|
return []
|
|
282
313
|
|
|
@@ -303,3 +334,5 @@ def delete_sources_from_alloydb(sources, vector_name):
|
|
|
303
334
|
DELETE FROM {vector_name}_vectorstore_{vector_length}
|
|
304
335
|
WHERE {conditions}
|
|
305
336
|
"""
|
|
337
|
+
|
|
338
|
+
return query
|
sunholo/database/database.py
CHANGED
sunholo/database/lancedb.py
CHANGED
sunholo/database/static_dbs.py
CHANGED
sunholo/embedder/embed_chunk.py
CHANGED
|
@@ -20,7 +20,7 @@ import uuid
|
|
|
20
20
|
from langchain.schema import Document
|
|
21
21
|
|
|
22
22
|
from ..components import get_embeddings, pick_vectorstore, load_memories, pick_embedding
|
|
23
|
-
from ..
|
|
23
|
+
from ..custom_logging import log
|
|
24
24
|
from ..database.uuid import generate_uuid_from_object_id
|
|
25
25
|
from ..utils import ConfigManager
|
|
26
26
|
|
sunholo/gcs/add_file.py
CHANGED
sunholo/gcs/download_folder.py
CHANGED
sunholo/gcs/download_url.py
CHANGED
|
@@ -10,7 +10,7 @@ try:
|
|
|
10
10
|
except ImportError:
|
|
11
11
|
storage = None
|
|
12
12
|
|
|
13
|
-
from ..
|
|
13
|
+
from ..custom_logging import log
|
|
14
14
|
from ..utils.gcp import is_running_on_gcp
|
|
15
15
|
from ..auth.refresh import refresh_credentials, get_default_creds, get_default_email
|
|
16
16
|
from io import BytesIO
|
sunholo/gcs/metadata.py
CHANGED