sunholo 0.78.5__py3-none-any.whl → 0.79.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. sunholo/agents/chat_history.py +1 -1
  2. sunholo/agents/dispatch_to_qa.py +1 -1
  3. sunholo/agents/fastapi/base.py +1 -1
  4. sunholo/agents/fastapi/qna_routes.py +1 -1
  5. sunholo/agents/flask/qna_routes.py +1 -1
  6. sunholo/agents/flask/vac_routes.py +1 -1
  7. sunholo/agents/langserve.py +1 -1
  8. sunholo/agents/pubsub.py +1 -1
  9. sunholo/agents/route.py +1 -1
  10. sunholo/agents/special_commands.py +1 -1
  11. sunholo/agents/swagger.py +1 -1
  12. sunholo/archive/archive.py +1 -1
  13. sunholo/auth/refresh.py +1 -1
  14. sunholo/auth/run.py +1 -1
  15. sunholo/azure/auth.py +1 -1
  16. sunholo/azure/blobs.py +1 -1
  17. sunholo/azure/event_grid.py +1 -1
  18. sunholo/bots/discord.py +1 -1
  19. sunholo/chunker/azure.py +1 -1
  20. sunholo/chunker/doc_handling.py +5 -1
  21. sunholo/chunker/encode_metadata.py +1 -1
  22. sunholo/chunker/images.py +1 -1
  23. sunholo/chunker/loaders.py +1 -1
  24. sunholo/chunker/message_data.py +1 -1
  25. sunholo/chunker/pdfs.py +1 -1
  26. sunholo/chunker/process_chunker_data.py +1 -1
  27. sunholo/chunker/publish.py +1 -1
  28. sunholo/chunker/pubsub.py +1 -1
  29. sunholo/chunker/splitter.py +1 -1
  30. sunholo/cli/chat_vac.py +1 -1
  31. sunholo/cli/cli.py +4 -1
  32. sunholo/components/llm.py +1 -1
  33. sunholo/components/retriever.py +18 -12
  34. sunholo/components/vectorstore.py +13 -2
  35. sunholo/database/alloydb.py +41 -8
  36. sunholo/database/alloydb_client.py +1 -1
  37. sunholo/database/database.py +1 -1
  38. sunholo/database/lancedb.py +1 -1
  39. sunholo/database/static_dbs.py +1 -1
  40. sunholo/discovery_engine/chunker_handler.py +1 -1
  41. sunholo/discovery_engine/discovery_engine_client.py +1 -1
  42. sunholo/embedder/embed_chunk.py +1 -1
  43. sunholo/gcs/add_file.py +1 -1
  44. sunholo/gcs/download_folder.py +1 -1
  45. sunholo/gcs/download_url.py +1 -1
  46. sunholo/gcs/metadata.py +1 -1
  47. sunholo/invoke/direct_vac_func.py +1 -1
  48. sunholo/invoke/invoke_vac_utils.py +1 -1
  49. sunholo/langfuse/callback.py +1 -1
  50. sunholo/langfuse/prompts.py +1 -1
  51. sunholo/llamaindex/__init__.py +1 -0
  52. sunholo/llamaindex/import_files.py +24 -17
  53. sunholo/llamaindex/llamaindex_class.py +192 -0
  54. sunholo/pubsub/process_pubsub.py +1 -1
  55. sunholo/pubsub/pubsub_manager.py +1 -1
  56. sunholo/qna/parsers.py +1 -1
  57. sunholo/qna/retry.py +1 -1
  58. sunholo/streaming/content_buffer.py +1 -1
  59. sunholo/streaming/langserve.py +1 -1
  60. sunholo/streaming/stream_lookup.py +1 -1
  61. sunholo/streaming/streaming.py +1 -1
  62. sunholo/summarise/summarise.py +1 -1
  63. sunholo/tools/web_browser.py +1 -1
  64. sunholo/utils/config.py +5 -5
  65. sunholo/utils/config_class.py +8 -4
  66. sunholo/vertex/extensions_call.py +1 -1
  67. sunholo/vertex/extensions_class.py +1 -1
  68. sunholo/vertex/genai_functions.py +1 -1
  69. sunholo/vertex/init.py +1 -1
  70. sunholo/vertex/memory_tools.py +29 -14
  71. {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/METADATA +2 -2
  72. sunholo-0.79.1.dist-info/RECORD +140 -0
  73. {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/WHEEL +1 -1
  74. sunholo/llamaindex/generate.py +0 -22
  75. sunholo-0.78.5.dist-info/RECORD +0 -140
  76. /sunholo/{logging.py → custom_logging.py} +0 -0
  77. {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/LICENSE.txt +0 -0
  78. {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/entry_points.txt +0 -0
  79. {sunholo-0.78.5.dist-info → sunholo-0.79.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  import json
2
- from ..logging import log
2
+ from ..custom_logging import log
3
3
 
4
4
  def extract_chat_history(chat_history=None):
5
5
  """
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
  from ..utils import ConfigManager
16
16
  from ..auth import get_header
17
17
  import requests
@@ -11,7 +11,7 @@ try:
11
11
  except ImportError:
12
12
  FastAPI = None
13
13
 
14
- from ...logging import log
14
+ from ...custom_logging import log
15
15
 
16
16
  def create_fastapi_app(origins = ["*"],
17
17
  origin_regex = r"https://(.*\.)?sunholo\.com"):
@@ -15,7 +15,7 @@ from ...agents import extract_chat_history, handle_special_commands
15
15
  from ...qna.parsers import parse_output
16
16
  from ...streaming import start_streaming_chat_async
17
17
  from ...archive import archive_qa
18
- from ...logging import log
18
+ from ...custom_logging import log
19
19
 
20
20
  class VACRequest(BaseModel):
21
21
  user_input: str
@@ -21,7 +21,7 @@ from ...agents import extract_chat_history, handle_special_commands
21
21
  from ...qna.parsers import parse_output
22
22
  from ...streaming import start_streaming_chat
23
23
  from ...archive import archive_qa
24
- from ...logging import log
24
+ from ...custom_logging import log
25
25
  from ...utils.config import load_config
26
26
  from ...utils.version import sunholo_version
27
27
  import os
@@ -7,7 +7,7 @@ from ...agents import extract_chat_history, handle_special_commands
7
7
  from ...qna.parsers import parse_output
8
8
  from ...streaming import start_streaming_chat
9
9
  from ...archive import archive_qa
10
- from ...logging import log
10
+ from ...custom_logging import log
11
11
  from ...utils.config import load_config
12
12
  from ...utils.version import sunholo_version
13
13
  import os
@@ -1,5 +1,5 @@
1
1
  import requests
2
- from ..logging import log
2
+ from ..custom_logging import log
3
3
  from ..auth import get_header
4
4
 
5
5
  # Global cache for storing input schemas
sunholo/agents/pubsub.py CHANGED
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
 
16
16
 
17
17
  import base64
sunholo/agents/route.py CHANGED
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
  from ..utils import load_config, ConfigManager
16
16
 
17
17
  def read_cloud_run_url(agent, cloud_run_urls_file='config/cloud_run_urls.json'):
@@ -26,7 +26,7 @@ from ..utils.parsers import contains_url, extract_urls
26
26
  from ..chunker.publish import publish_text
27
27
  from ..gcs.add_file import add_file_to_gcs
28
28
  from ..utils.config import load_config_key
29
- from ..logging import log
29
+ from ..custom_logging import log
30
30
 
31
31
  # config file?
32
32
  command_descriptions = {
sunholo/agents/swagger.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import copy
2
2
  from ..utils.config import load_all_configs
3
3
  from .route import route_vac
4
- from ..logging import log
4
+ from ..custom_logging import log
5
5
  from ruamel.yaml import YAML
6
6
  from io import StringIO
7
7
  # check it here:
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from ..pubsub import PubSubManager
15
- from ..logging import log
15
+ from ..custom_logging import log
16
16
 
17
17
 
18
18
 
sunholo/auth/refresh.py CHANGED
@@ -6,7 +6,7 @@ from google.auth.transport import requests
6
6
  from ..utils.gcp import is_running_on_gcp
7
7
 
8
8
 
9
- from ..logging import log
9
+ from ..custom_logging import log
10
10
 
11
11
  def get_default_email():
12
12
  if not refresh_credentials():
sunholo/auth/run.py CHANGED
@@ -6,7 +6,7 @@ from ..utils.config import load_config
6
6
  from ..utils import ConfigManager
7
7
  from ..utils.gcp import is_running_on_cloudrun
8
8
  from ..utils.api_key import has_multivac_api_key, get_multivac_api_key
9
- from ..logging import log
9
+ from ..custom_logging import log
10
10
  from ..agents.route import route_vac
11
11
  from .gcloud import get_local_gcloud_token
12
12
 
sunholo/azure/auth.py CHANGED
@@ -5,7 +5,7 @@ except ImportError:
5
5
  DefaultAzureCredential = None
6
6
  ClientSecretCredential = None
7
7
 
8
- from ..logging import log
8
+ from ..custom_logging import log
9
9
 
10
10
  def azure_auth():
11
11
  """
sunholo/azure/blobs.py CHANGED
@@ -1,5 +1,5 @@
1
1
  import re
2
- from ..logging import log
2
+ from ..custom_logging import log
3
3
 
4
4
 
5
5
  def is_azure_blob(message_data):
@@ -1,5 +1,5 @@
1
1
  # process_azure_blob_event.py
2
- from ..logging import log
2
+ from ..custom_logging import log
3
3
 
4
4
  def process_azure_blob_event(events: list) -> tuple:
5
5
  """
sunholo/bots/discord.py CHANGED
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  import json, os
15
15
  import requests
16
- from ..logging import log
16
+ from ..custom_logging import log
17
17
 
18
18
 
19
19
 
sunholo/chunker/azure.py CHANGED
@@ -14,7 +14,7 @@
14
14
  import json
15
15
  from datetime import datetime, timezone
16
16
 
17
- from ..logging import log
17
+ from ..custom_logging import log
18
18
  from ..azure import process_azure_blob_event
19
19
 
20
20
  from ..invoke import invoke_vac
@@ -1,5 +1,5 @@
1
1
  from ..utils import load_config_key
2
- from ..logging import log
2
+ from ..custom_logging import log
3
3
  from ..database.alloydb import add_document_if_not_exists
4
4
  from ..database.uuid import generate_uuid_from_object_id
5
5
  from ..components.llm import llm_str_to_llm
@@ -175,6 +175,10 @@ Be careful not to add any speculation or any details that are not covered in the
175
175
  bucket_name = os.getenv("DOC_BUCKET")
176
176
  if not bucket_name:
177
177
  raise ValueError("No DOC_BUCKET configured for summary")
178
+
179
+ if bucket_name.startswith("gs://"):
180
+ bucket_name = bucket_name[len("gs://"):]
181
+
178
182
  with tempfile.NamedTemporaryFile(mode='w+', delete=False) as temp_file:
179
183
  temp_file.write(summary)
180
184
  temp_file.flush()
@@ -3,7 +3,7 @@ import base64
3
3
  import json
4
4
  from datetime import datetime, timezone
5
5
 
6
- from ..logging import log
6
+ from ..custom_logging import log
7
7
 
8
8
  def create_metadata(vac, metadata):
9
9
  now_utc = datetime.now(timezone.utc)
sunholo/chunker/images.py CHANGED
@@ -3,7 +3,7 @@ import datetime
3
3
  import tempfile
4
4
  import os
5
5
  from ..gcs.add_file import add_file_to_gcs, get_image_file_name
6
- from ..logging import log
6
+ from ..custom_logging import log
7
7
  from ..utils.gcp import is_running_on_gcp
8
8
 
9
9
 
@@ -18,7 +18,7 @@ from langchain_community.document_loaders import UnstructuredURLLoader
18
18
  from langchain_community.document_loaders import GitLoader
19
19
  from langchain_community.document_loaders import GoogleDriveLoader
20
20
 
21
- from ..logging import log
21
+ from ..custom_logging import log
22
22
  from .pdfs import read_pdf_file
23
23
  from ..utils.config import load_config
24
24
 
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
 
16
16
  import pathlib
17
17
  import tempfile
sunholo/chunker/pdfs.py CHANGED
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  import os
15
15
  import pathlib
16
- from ..logging import log
16
+ from ..custom_logging import log
17
17
 
18
18
  def split_pdf_to_pages(pdf_path, temp_dir):
19
19
 
@@ -16,7 +16,7 @@ from .publish import process_docs_chunks_vector_name
16
16
  from .splitter import chunk_doc_to_docs
17
17
  from ..azure.blobs import is_azure_blob
18
18
 
19
- from ..logging import log
19
+ from ..custom_logging import log
20
20
 
21
21
  def process_chunker_data(message_data, metadata, vector_name):
22
22
 
@@ -1,4 +1,4 @@
1
- from ..logging import log
1
+ from ..custom_logging import log
2
2
  from ..pubsub import PubSubManager
3
3
  from ..utils.parsers import contains_url, extract_urls
4
4
  from ..utils.gcp_project import get_gcp_project
sunholo/chunker/pubsub.py CHANGED
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ..logging import log
15
+ from ..custom_logging import log
16
16
  from ..pubsub import process_pubsub_message
17
17
  from .process_chunker_data import process_chunker_data
18
18
 
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
  from ..utils.parsers import remove_whitespace
16
16
  from langchain.schema import Document
17
17
  import langchain.text_splitter as text_splitter
sunholo/cli/chat_vac.py CHANGED
@@ -3,7 +3,7 @@ from ..streaming import generate_proxy_stream, can_agent_stream
3
3
  from ..utils.user_ids import generate_user_id
4
4
  from ..utils import ConfigManager
5
5
  from ..utils.api_key import has_multivac_api_key
6
- from ..logging import log
6
+ from ..custom_logging import log
7
7
  from ..qna.parsers import parse_output
8
8
  from ..gcs.add_file import add_file_to_gcs
9
9
  from .run_proxy import clean_proxy_list, start_proxy, stop_proxy
sunholo/cli/cli.py CHANGED
@@ -10,10 +10,11 @@ from .chat_vac import setup_vac_subparser
10
10
  from .embedder import setup_embedder_subparser
11
11
  from .swagger import setup_swagger_subparser
12
12
  from .vertex import setup_vertex_subparser
13
+ from ..llamaindex import setup_llamaindex_subparser
13
14
 
14
15
  from ..utils import ConfigManager
15
16
 
16
- from ..logging import log
17
+ from ..custom_logging import log
17
18
 
18
19
  from .sun_rich import console
19
20
  import sys
@@ -87,6 +88,8 @@ def main(args=None):
87
88
  setup_swagger_subparser(subparsers)
88
89
  # vertex
89
90
  setup_vertex_subparser(subparsers)
91
+ # llamaindex
92
+ setup_llamaindex_subparser(subparsers)
90
93
 
91
94
  #TODO: add database setup commands: alloydb and supabase
92
95
 
sunholo/components/llm.py CHANGED
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
  from ..utils import load_config_key, ConfigManager
16
16
 
17
17
  import os
@@ -11,7 +11,7 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
  from .vectorstore import pick_vectorstore
16
16
  from ..utils import load_config_key, ConfigManager
17
17
  from .llm import get_embeddings
@@ -41,9 +41,14 @@ def load_memories(vector_name:str=None, config:ConfigManager=None):
41
41
 
42
42
  return memories
43
43
 
44
- def pick_retriever(vector_name, embeddings=None):
44
+ def pick_retriever(vector_name:str=None, config:ConfigManager=None, embeddings=None):
45
45
 
46
- memories = load_memories(vector_name)
46
+ if config is None:
47
+ if vector_name is None:
48
+ raise ValueError("vector_name and config were none")
49
+ config = ConfigManager(vector_name)
50
+
51
+ memories = load_memories(config=config)
47
52
 
48
53
  retriever_list = []
49
54
  for memory in memories: # Iterate over the list
@@ -58,7 +63,7 @@ def pick_retriever(vector_name, embeddings=None):
58
63
  log.info(f"Skipped from_metadata_id for {vectorstore}")
59
64
  continue
60
65
 
61
- embeddings = embeddings or get_embeddings(vector_name)
66
+ embeddings = embeddings or get_embeddings(config=config)
62
67
  read_only = value.get('read_only')
63
68
  try:
64
69
  vectorstore = pick_vectorstore(vectorstore,
@@ -93,15 +98,15 @@ def pick_retriever(vector_name, embeddings=None):
93
98
  log.info(f"No retrievers were created for {memories}")
94
99
  return None
95
100
 
96
- retriever = process_retrieval(retriever_list, vector_name)
101
+ retriever = process_retrieval(retriever_list, config=config)
97
102
 
98
103
  return retriever
99
104
 
100
- def metadata_retriever(metadata: dict, key: str, vector_name:str, embeddings=None):
105
+ def metadata_retriever(metadata: dict, key: str, config:ConfigManager, embeddings=None):
101
106
  """
102
107
  Decides which vector_name to retrieve from metadata passed
103
108
  """
104
- memories = load_memories(vector_name)
109
+ memories = load_memories(config=config)
105
110
 
106
111
  retriever_list = []
107
112
  for memory in memories: # Iterate over the list
@@ -118,7 +123,7 @@ def metadata_retriever(metadata: dict, key: str, vector_name:str, embeddings=Non
118
123
  raise ValueError(f"Missing {key} in {metadata}")
119
124
  the_id = metadata[key]
120
125
  read_only = value.get('read_only')
121
- embeddings = embeddings or get_embeddings(vector_name)
126
+ embeddings = embeddings or get_embeddings(config=config)
122
127
  vectorstore = pick_vectorstore(vectorstore,
123
128
  vector_name=the_id,
124
129
  embeddings=embeddings,
@@ -133,17 +138,18 @@ def metadata_retriever(metadata: dict, key: str, vector_name:str, embeddings=Non
133
138
  log.info(f"No retrievers were created for {memories}")
134
139
  return None
135
140
 
136
- retriever = process_retrieval(retriever_list, vector_name)
141
+ retriever = process_retrieval(retriever_list, config=config)
137
142
 
138
143
  return retriever
139
144
 
140
145
 
141
146
 
142
- def process_retrieval(retriever_list: list, vector_name: str):
143
- k_override = load_config_key("memory_k", vector_name, kind="vacConfig")
147
+ def process_retrieval(retriever_list: list, config: ConfigManager):
148
+ k_override = config.vacConfig('memory_k')
144
149
  lotr = MergerRetriever(retrievers=retriever_list)
145
150
 
146
- filter_embeddings = get_embeddings(vector_name)
151
+ filter_embeddings = get_embeddings(config=config)
152
+
147
153
  filter = EmbeddingsRedundantFilter(embeddings=filter_embeddings)
148
154
  pipeline = DocumentCompressorPipeline(transformers=[filter])
149
155
  retriever = ContextualCompressionRetriever(
@@ -12,10 +12,21 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import os
15
- from ..logging import log
15
+ from ..custom_logging import log
16
+ from ..utils import ConfigManager
16
17
 
17
- def pick_vectorstore(vs_str: str, vector_name: str, embeddings, read_only=None):
18
+ def pick_vectorstore(vs_str: str, vector_name: str=None, embeddings=None, config:ConfigManager=None, read_only=None):
18
19
  log.debug('Picking vectorstore')
20
+
21
+ if not embeddings:
22
+ raise ValueError("Requires embeddings")
23
+
24
+ if config is None:
25
+ if vector_name is None:
26
+ raise ValueError("vector_name and config were none")
27
+ config = ConfigManager(vector_name)
28
+
29
+ vector_name = config.vector_name
19
30
 
20
31
  if vs_str == 'supabase':
21
32
  from supabase import Client, create_client
@@ -11,7 +11,7 @@ except ImportError:
11
11
  from .database import get_vector_size
12
12
  from .alloydb_client import AlloyDBClient
13
13
 
14
- from ..logging import log
14
+ from ..custom_logging import log
15
15
  from ..utils.config import load_config_key
16
16
 
17
17
 
@@ -232,7 +232,7 @@ async def load_alloydb_sql_async(sql, vector_name):
232
232
  return documents
233
233
 
234
234
  def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
235
- unique_sources = set(sources)
235
+ unique_sources = set(sources.split())
236
236
  # Choose the delimiter based on the search_type argument
237
237
  delimiter = ' AND ' if search_type.upper() == "AND" else ' OR '
238
238
 
@@ -240,14 +240,14 @@ def and_or_ilike(sources, search_type="OR", operator="ILIKE"):
240
240
  conditions = delimiter.join(f"TRIM(source) {operator} '%{source}%'" for source in unique_sources)
241
241
  if not conditions:
242
242
  log.warning("Alloydb doc query found no like_patterns")
243
- return []
243
+ return ""
244
244
 
245
245
  return conditions
246
246
 
247
247
  def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
248
248
  if not sources:
249
249
  log.warning("No sources found for alloydb fetch")
250
- return []
250
+ return ""
251
251
 
252
252
  table_name = f"{vector_name}_docstore"
253
253
 
@@ -263,10 +263,37 @@ def _get_sources_from_docstore(sources, vector_name, search_type="OR"):
263
263
 
264
264
  return query
265
265
 
266
+ def _list_sources_from_docstore(sources, vector_name, search_type="OR"):
267
+ table_name = f"{vector_name}_docstore"
268
+
269
+
270
+ if sources:
271
+ conditions = and_or_ilike(sources, search_type=search_type)
272
+ query = f"""
273
+ SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
274
+ FROM {table_name}
275
+ WHERE {conditions}
276
+ ORDER BY langchain_metadata->>'objectId' ASC
277
+ LIMIT 500;
278
+ """
279
+ else:
280
+ query = f"""
281
+ SELECT DISTINCT langchain_metadata->>'objectId' AS objectId
282
+ FROM {table_name}
283
+ ORDER BY langchain_metadata->>'objectId' ASC
284
+ LIMIT 500;
285
+ """
286
+
287
+ return query
266
288
 
267
- async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"):
289
+
290
+ async def get_sources_from_docstore_async(sources, vector_name, search_type="OR", just_source_name=False):
268
291
 
269
- query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
292
+ if just_source_name:
293
+ query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
294
+ else:
295
+ query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
296
+
270
297
  if not query:
271
298
  return []
272
299
 
@@ -274,9 +301,13 @@ async def get_sources_from_docstore_async(sources, vector_name, search_type="OR"
274
301
 
275
302
  return documents
276
303
 
277
- def get_sources_from_docstore(sources, vector_name, search_type="OR"):
304
+ def get_sources_from_docstore(sources, vector_name, search_type="OR", just_source_name=False):
278
305
 
279
- query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
306
+ if just_source_name:
307
+ query = _list_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
308
+ else:
309
+ query = _get_sources_from_docstore(sources, vector_name=vector_name, search_type=search_type)
310
+
280
311
  if not query:
281
312
  return []
282
313
 
@@ -303,3 +334,5 @@ def delete_sources_from_alloydb(sources, vector_name):
303
334
  DELETE FROM {vector_name}_vectorstore_{vector_length}
304
335
  WHERE {conditions}
305
336
  """
337
+
338
+ return query
@@ -8,7 +8,7 @@ except ImportError:
8
8
  pass
9
9
 
10
10
  from .database import get_vector_size
11
- from ..logging import log
11
+ from ..custom_logging import log
12
12
 
13
13
  class AlloyDBClient:
14
14
  """
@@ -17,7 +17,7 @@ import time
17
17
  import math
18
18
 
19
19
  from ..utils.config import get_module_filepath
20
- from ..logging import log
20
+ from ..custom_logging import log
21
21
  from ..utils.config import load_config_key
22
22
 
23
23
 
@@ -1,4 +1,4 @@
1
- from ..logging import log
1
+ from ..custom_logging import log
2
2
 
3
3
  def create_lancedb_index(bucket, vector_name, num_partitions=256, num_sub_vectors=96, text_key="text"):
4
4
  try:
@@ -1,5 +1,5 @@
1
1
  import os
2
- from ..logging import log_folder_location
2
+ from ..custom_logging import log_folder_location
3
3
 
4
4
  def get_db_directory(db_dir='db'):
5
5
  current_script_directory = os.path.dirname(os.path.abspath(__file__))
@@ -1,4 +1,4 @@
1
- from ..logging import log
1
+ from ..custom_logging import log
2
2
  from ..utils.config import load_config_key
3
3
  from ..utils.gcp_project import get_gcp_project
4
4
  from ..components import load_memories
@@ -10,7 +10,7 @@ except ImportError:
10
10
  Chunk = None
11
11
  SearchResponse = None
12
12
 
13
- from ..logging import log
13
+ from ..custom_logging import log
14
14
  from typing import Optional, List
15
15
 
16
16
  class DiscoveryEngineClient:
@@ -20,7 +20,7 @@ import uuid
20
20
  from langchain.schema import Document
21
21
 
22
22
  from ..components import get_embeddings, pick_vectorstore, load_memories, pick_embedding
23
- from ..logging import log
23
+ from ..custom_logging import log
24
24
  from ..database.uuid import generate_uuid_from_object_id
25
25
  from ..utils import ConfigManager
26
26
 
sunholo/gcs/add_file.py CHANGED
@@ -21,7 +21,7 @@ try:
21
21
  except ImportError:
22
22
  storage = None
23
23
 
24
- from ..logging import log
24
+ from ..custom_logging import log
25
25
  from ..utils import load_config_key, ConfigManager
26
26
 
27
27
 
@@ -5,7 +5,7 @@ try:
5
5
  except ImportError:
6
6
  storage = None
7
7
 
8
- from ..logging import log
8
+ from ..custom_logging import log
9
9
 
10
10
  def download_files_from_gcs(bucket_name: str, source_folder: str, destination_folder: str=None):
11
11
  """
@@ -10,7 +10,7 @@ try:
10
10
  except ImportError:
11
11
  storage = None
12
12
 
13
- from ..logging import log
13
+ from ..custom_logging import log
14
14
  from ..utils.gcp import is_running_on_gcp
15
15
  from ..auth.refresh import refresh_credentials, get_default_creds, get_default_email
16
16
  from io import BytesIO
sunholo/gcs/metadata.py CHANGED
@@ -3,7 +3,7 @@ try:
3
3
  except ImportError:
4
4
  storage = None
5
5
 
6
- from ..logging import log
6
+ from ..custom_logging import log
7
7
 
8
8
 
9
9
  def get_object_metadata(bucket_name, object_name):
@@ -1,4 +1,4 @@
1
- from ..logging import log
1
+ from ..custom_logging import log
2
2
  from ..agents import send_to_qa
3
3
  from ..qna.parsers import parse_output
4
4
  from ..streaming import generate_proxy_stream