sunholo 0.113.3__tar.gz → 0.114.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {sunholo-0.113.3 → sunholo-0.114.1}/PKG-INFO +7 -2
  2. {sunholo-0.113.3 → sunholo-0.114.1}/setup.py +7 -1
  3. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/doc_handling.py +14 -4
  4. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/loaders.py +51 -50
  5. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/message_data.py +4 -2
  6. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/publish.py +5 -2
  7. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/splitter.py +7 -2
  8. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/components/retriever.py +11 -7
  9. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/embedder/embed_chunk.py +5 -2
  10. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/genai/process_funcs_cls.py +255 -205
  11. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/streaming/content_buffer.py +7 -2
  12. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/summarise/summarise.py +18 -8
  13. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo.egg-info/PKG-INFO +7 -2
  14. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo.egg-info/requires.txt +6 -0
  15. {sunholo-0.113.3 → sunholo-0.114.1}/LICENSE.txt +0 -0
  16. {sunholo-0.113.3 → sunholo-0.114.1}/MANIFEST.in +0 -0
  17. {sunholo-0.113.3 → sunholo-0.114.1}/README.md +0 -0
  18. {sunholo-0.113.3 → sunholo-0.114.1}/setup.cfg +0 -0
  19. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/__init__.py +0 -0
  20. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/__init__.py +0 -0
  21. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/chat_history.py +0 -0
  22. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/dispatch_to_qa.py +0 -0
  23. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/fastapi/__init__.py +0 -0
  24. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/fastapi/base.py +0 -0
  25. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/fastapi/qna_routes.py +0 -0
  26. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/flask/__init__.py +0 -0
  27. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/flask/base.py +0 -0
  28. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/flask/qna_routes.py +0 -0
  29. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/flask/vac_routes.py +0 -0
  30. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/langserve.py +0 -0
  31. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/pubsub.py +0 -0
  32. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/route.py +0 -0
  33. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/special_commands.py +0 -0
  34. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/agents/swagger.py +0 -0
  35. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/archive/__init__.py +0 -0
  36. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/archive/archive.py +0 -0
  37. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/auth/__init__.py +0 -0
  38. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/auth/gcloud.py +0 -0
  39. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/auth/refresh.py +0 -0
  40. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/auth/run.py +0 -0
  41. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/azure/__init__.py +0 -0
  42. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/azure/auth.py +0 -0
  43. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/azure/blobs.py +0 -0
  44. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/azure/event_grid.py +0 -0
  45. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/bots/__init__.py +0 -0
  46. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/bots/discord.py +0 -0
  47. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/bots/github_webhook.py +0 -0
  48. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/bots/webapp.py +0 -0
  49. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/__init__.py +0 -0
  50. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/azure.py +0 -0
  51. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/encode_metadata.py +0 -0
  52. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/images.py +0 -0
  53. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/pdfs.py +0 -0
  54. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/process_chunker_data.py +0 -0
  55. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/chunker/pubsub.py +0 -0
  56. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/__init__.py +0 -0
  57. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/chat_vac.py +0 -0
  58. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/cli.py +0 -0
  59. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/cli_init.py +0 -0
  60. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/configs.py +0 -0
  61. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/deploy.py +0 -0
  62. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/embedder.py +0 -0
  63. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/merge_texts.py +0 -0
  64. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/run_proxy.py +0 -0
  65. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/sun_rich.py +0 -0
  66. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/swagger.py +0 -0
  67. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/cli/vertex.py +0 -0
  68. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/components/__init__.py +0 -0
  69. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/components/llm.py +0 -0
  70. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/components/vectorstore.py +0 -0
  71. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/custom_logging.py +0 -0
  72. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/__init__.py +0 -0
  73. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/alloydb.py +0 -0
  74. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/alloydb_client.py +0 -0
  75. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/database.py +0 -0
  76. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/lancedb.py +0 -0
  77. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/sql/sb/create_function.sql +0 -0
  78. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  79. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/sql/sb/create_table.sql +0 -0
  80. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  81. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/sql/sb/return_sources.sql +0 -0
  82. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/sql/sb/setup.sql +0 -0
  83. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/static_dbs.py +0 -0
  84. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/database/uuid.py +0 -0
  85. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/discovery_engine/__init__.py +0 -0
  86. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/discovery_engine/chunker_handler.py +0 -0
  87. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/discovery_engine/create_new.py +0 -0
  88. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/discovery_engine/discovery_engine_client.py +0 -0
  89. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/discovery_engine/get_ai_search_chunks.py +0 -0
  90. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/embedder/__init__.py +0 -0
  91. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/excel/__init__.py +0 -0
  92. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/excel/plugin.py +0 -0
  93. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/gcs/__init__.py +0 -0
  94. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/gcs/add_file.py +0 -0
  95. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/gcs/download_folder.py +0 -0
  96. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/gcs/download_url.py +0 -0
  97. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/gcs/extract_and_sign.py +0 -0
  98. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/gcs/metadata.py +0 -0
  99. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/genai/__init__.py +0 -0
  100. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/genai/file_handling.py +0 -0
  101. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/genai/images.py +0 -0
  102. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/genai/init.py +0 -0
  103. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/genai/safety.py +0 -0
  104. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/invoke/__init__.py +0 -0
  105. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/invoke/async_class.py +0 -0
  106. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/invoke/direct_vac_func.py +0 -0
  107. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/invoke/invoke_vac_utils.py +0 -0
  108. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/langfuse/__init__.py +0 -0
  109. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/langfuse/callback.py +0 -0
  110. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/langfuse/evals.py +0 -0
  111. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/langfuse/prompts.py +0 -0
  112. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/llamaindex/__init__.py +0 -0
  113. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/llamaindex/get_files.py +0 -0
  114. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/llamaindex/import_files.py +0 -0
  115. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/llamaindex/llamaindex_class.py +0 -0
  116. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/llamaindex/user_history.py +0 -0
  117. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/lookup/__init__.py +0 -0
  118. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/lookup/model_lookup.yaml +0 -0
  119. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/patches/__init__.py +0 -0
  120. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/patches/langchain/__init__.py +0 -0
  121. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/patches/langchain/lancedb.py +0 -0
  122. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/patches/langchain/vertexai.py +0 -0
  123. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/pubsub/__init__.py +0 -0
  124. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/pubsub/process_pubsub.py +0 -0
  125. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/pubsub/pubsub_manager.py +0 -0
  126. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/qna/__init__.py +0 -0
  127. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/qna/parsers.py +0 -0
  128. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/qna/retry.py +0 -0
  129. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/senses/__init__.py +0 -0
  130. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/senses/stream_voice.py +0 -0
  131. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/streaming/__init__.py +0 -0
  132. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/streaming/langserve.py +0 -0
  133. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/streaming/stream_lookup.py +0 -0
  134. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/streaming/streaming.py +0 -0
  135. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/summarise/__init__.py +0 -0
  136. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/terraform/__init__.py +0 -0
  137. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/terraform/tfvars_editor.py +0 -0
  138. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/tools/__init__.py +0 -0
  139. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/tools/web_browser.py +0 -0
  140. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/__init__.py +0 -0
  141. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/api_key.py +0 -0
  142. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/big_context.py +0 -0
  143. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/config.py +0 -0
  144. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/config_class.py +0 -0
  145. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/config_schema.py +0 -0
  146. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/gcp.py +0 -0
  147. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/gcp_project.py +0 -0
  148. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/mime.py +0 -0
  149. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/parsers.py +0 -0
  150. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/timedelta.py +0 -0
  151. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/user_ids.py +0 -0
  152. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/utils/version.py +0 -0
  153. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/__init__.py +0 -0
  154. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/extensions_call.py +0 -0
  155. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/extensions_class.py +0 -0
  156. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/genai_functions.py +0 -0
  157. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/init.py +0 -0
  158. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/memory_tools.py +0 -0
  159. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/safety.py +0 -0
  160. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo/vertex/type_dict_to_json.py +0 -0
  161. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo.egg-info/SOURCES.txt +0 -0
  162. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo.egg-info/dependency_links.txt +0 -0
  163. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo.egg-info/entry_points.txt +0 -0
  164. {sunholo-0.113.3 → sunholo-0.114.1}/sunholo.egg-info/top_level.txt +0 -0
  165. {sunholo-0.113.3 → sunholo-0.114.1}/tests/test_async.py +0 -0
  166. {sunholo-0.113.3 → sunholo-0.114.1}/tests/test_chat_history.py +0 -0
  167. {sunholo-0.113.3 → sunholo-0.114.1}/tests/test_config.py +0 -0
  168. {sunholo-0.113.3 → sunholo-0.114.1}/tests/test_unstructured.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.113.3
3
+ Version: 0.114.1
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.113.3.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.114.1.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -81,6 +81,11 @@ Requires-Dist: tenacity; extra == "all"
81
81
  Requires-Dist: tiktoken; extra == "all"
82
82
  Requires-Dist: unstructured[all-docs,local-inference]; extra == "all"
83
83
  Requires-Dist: xlwings; extra == "all"
84
+ Provides-Extra: langchain
85
+ Requires-Dist: langchain==0.2.16; extra == "langchain"
86
+ Requires-Dist: langchain_experimental==0.0.65; extra == "langchain"
87
+ Requires-Dist: langchain-community==0.2.17; extra == "langchain"
88
+ Requires-Dist: langsmith==0.1.143; extra == "langchain"
84
89
  Provides-Extra: azure
85
90
  Requires-Dist: azure-identity; extra == "azure"
86
91
  Requires-Dist: azure-storage-blob; extra == "azure"
@@ -1,6 +1,6 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
- version = '0.113.3'
3
+ version = '0.114.1'
4
4
 
5
5
  setup(
6
6
  name='sunholo',
@@ -94,6 +94,12 @@ setup(
94
94
  "unstructured[local-inference,all-docs]",
95
95
  "xlwings"
96
96
  ],
97
+ 'langchain': [
98
+ "langchain==0.2.16",
99
+ "langchain_experimental==0.0.65",
100
+ "langchain-community==0.2.17",
101
+ "langsmith==0.1.143",
102
+ ],
97
103
  'azure': [
98
104
  "azure-identity",
99
105
  "azure-storage-blob"
@@ -12,10 +12,14 @@ import tempfile
12
12
  import traceback
13
13
  import json
14
14
  import os
15
- from langchain.docstore.document import Document
16
-
17
- from langchain.prompts import PromptTemplate
18
- from langchain_core.output_parsers import StrOutputParser
15
+ try:
16
+ from langchain.docstore.document import Document
17
+ from langchain.prompts import PromptTemplate
18
+ from langchain_core.output_parsers import StrOutputParser
19
+ except ImportError:
20
+ Document = None
21
+ PromptTemplate = None
22
+ StrOutputParser = None
19
23
 
20
24
  def send_doc_to_docstore(docs, vector_name):
21
25
 
@@ -53,6 +57,9 @@ def send_doc_to_docstore(docs, vector_name):
53
57
 
54
58
  def create_big_doc(docs):
55
59
 
60
+ if not Document:
61
+ raise ImportError("Document object requires langchain installed")
62
+
56
63
  if not docs:
57
64
  return None, None, None
58
65
 
@@ -107,6 +114,9 @@ def create_big_doc(docs):
107
114
 
108
115
  def summarise_docs(docs, vector_name, summary_threshold_default=10000, model_limit_default=25000):
109
116
 
117
+ if not PromptTemplate or not StrOutputParser:
118
+ raise ImportError("PromptTemplate and StrOutputParser requires langchain installed")
119
+
110
120
  if not docs:
111
121
  return None
112
122
 
@@ -13,11 +13,12 @@
13
13
  # limitations under the License.
14
14
  try:
15
15
  from langchain_unstructured import UnstructuredLoader
16
+ from langchain_community.document_loaders import GitLoader
17
+ from langchain_community.document_loaders import GoogleDriveLoader
16
18
  except ImportError:
17
19
  UnstructuredLoader = None
18
-
19
- from langchain_community.document_loaders import GitLoader
20
- from langchain_community.document_loaders import GoogleDriveLoader
20
+ GitLoader=None
21
+ GoogleDriveLoader=None
21
22
 
22
23
  from ..custom_logging import log
23
24
  from .pdfs import read_pdf_file
@@ -43,56 +44,56 @@ def convert_to_txt(file_path):
43
44
  shutil.copyfile(file_path, txt_file)
44
45
  return txt_file
45
46
 
47
+ if GoogleDriveLoader is not None:
48
+ class MyGoogleDriveLoader(GoogleDriveLoader):
49
+ url: Optional[str] = Field(None)
46
50
 
47
- class MyGoogleDriveLoader(GoogleDriveLoader):
48
- url: Optional[str] = Field(None)
49
-
50
- def __init__(self, url, *args, **kwargs):
51
- super().__init__(*args, **kwargs, file_ids=['dummy']) # Pass dummy value
52
- self.url = url
53
-
54
- def _extract_id(self, url):
55
- parsed_url = urlparse(unquote(url))
56
- path_parts = parsed_url.path.split('/')
57
-
58
- # Iterate over the parts
59
- for part in path_parts:
60
- # IDs are typically alphanumeric and at least a few characters long
61
- # So let's say that to be an ID, a part has to be at least 15 characters long
62
- if all(char.isalnum() or char in ['_', '-'] for char in part) and len(part) >= 15:
63
- return part
64
-
65
- # Return None if no ID was found
66
- return None
51
+ def __init__(self, url, *args, **kwargs):
52
+ super().__init__(*args, **kwargs, file_ids=['dummy']) # Pass dummy value
53
+ self.url = url
67
54
 
68
- def load_from_url(self, url: str):
69
- id = self._extract_id(url)
70
- from googleapiclient.errors import HttpError
71
- from googleapiclient.discovery import build
72
-
73
- # Identify type of URL
74
- try:
75
- service = build("drive", "v3", credentials=self._load_credentials())
76
- file = service.files().get(fileId=id).execute()
77
- except HttpError as err:
78
- log.error(f"Error loading file {url}: {str(err)}")
79
- raise
80
-
81
- mime_type = file["mimeType"]
82
-
83
- if "folder" in mime_type:
84
- # If it's a folder, load documents from the folder
85
- return self._load_documents_from_folder(id)
86
- else:
87
- # If it's not a folder, treat it as a single file
88
- if mime_type == "application/vnd.google-apps.document":
89
- return [self._load_document_from_id(id)]
90
- elif mime_type == "application/vnd.google-apps.spreadsheet":
91
- return self._load_sheet_from_id(id)
92
- elif mime_type == "application/pdf":
93
- return [self._load_file_from_id(id)]
55
+ def _extract_id(self, url):
56
+ parsed_url = urlparse(unquote(url))
57
+ path_parts = parsed_url.path.split('/')
58
+
59
+ # Iterate over the parts
60
+ for part in path_parts:
61
+ # IDs are typically alphanumeric and at least a few characters long
62
+ # So let's say that to be an ID, a part has to be at least 15 characters long
63
+ if all(char.isalnum() or char in ['_', '-'] for char in part) and len(part) >= 15:
64
+ return part
65
+
66
+ # Return None if no ID was found
67
+ return None
68
+
69
+ def load_from_url(self, url: str):
70
+ id = self._extract_id(url)
71
+ from googleapiclient.errors import HttpError
72
+ from googleapiclient.discovery import build
73
+
74
+ # Identify type of URL
75
+ try:
76
+ service = build("drive", "v3", credentials=self._load_credentials())
77
+ file = service.files().get(fileId=id).execute()
78
+ except HttpError as err:
79
+ log.error(f"Error loading file {url}: {str(err)}")
80
+ raise
81
+
82
+ mime_type = file["mimeType"]
83
+
84
+ if "folder" in mime_type:
85
+ # If it's a folder, load documents from the folder
86
+ return self._load_documents_from_folder(id)
94
87
  else:
95
- return []
88
+ # If it's not a folder, treat it as a single file
89
+ if mime_type == "application/vnd.google-apps.document":
90
+ return [self._load_document_from_id(id)]
91
+ elif mime_type == "application/vnd.google-apps.spreadsheet":
92
+ return self._load_sheet_from_id(id)
93
+ elif mime_type == "application/pdf":
94
+ return [self._load_file_from_id(id)]
95
+ else:
96
+ return []
96
97
 
97
98
  def ignore_files(filepath):
98
99
  """Returns True if the given path's file extension is found within
@@ -29,8 +29,10 @@ try:
29
29
  except ImportError:
30
30
  BlobServiceClient = None
31
31
 
32
- from langchain.schema import Document
33
-
32
+ try:
33
+ from langchain.schema import Document
34
+ except ImportError:
35
+ Document = None
34
36
 
35
37
  from .splitter import chunk_doc_to_docs
36
38
  from .pdfs import split_pdf_to_pages
@@ -3,8 +3,11 @@ from ..pubsub import PubSubManager
3
3
  from ..utils.parsers import contains_url, extract_urls
4
4
  from ..utils.gcp_project import get_gcp_project
5
5
 
6
- from langchain.schema import Document
7
-
6
+ try:
7
+ from langchain.schema import Document
8
+ except ImportError:
9
+ Document=None
10
+
8
11
  def publish_if_urls(the_content, vector_name):
9
12
  """
10
13
  Extracts URLs and puts them in a queue for processing on PubSub
@@ -13,12 +13,17 @@
13
13
  # limitations under the License.
14
14
  from ..custom_logging import log
15
15
  from ..utils.parsers import remove_whitespace
16
- from langchain.schema import Document
17
- import langchain.text_splitter as text_splitter
16
+
18
17
  from .images import upload_doc_images
19
18
  from .doc_handling import send_doc_to_docstore, summarise_docs
20
19
  from ..database.uuid import generate_uuid_from_object_id
21
20
 
21
+ try:
22
+ from langchain.schema import Document
23
+ import langchain.text_splitter as text_splitter
24
+ except ImportError:
25
+ Document=None
26
+ text_splitter=None
22
27
 
23
28
  def chunk_doc_to_docs(documents: list, extension: str = ".md", min_size: int = 800, vector_name=None, **kwargs):
24
29
  """Turns a Document object into a list of many Document chunks.
@@ -17,13 +17,17 @@ from ..utils import ConfigManager
17
17
  from .llm import get_embeddings
18
18
  from ..utils.gcp_project import get_gcp_project
19
19
 
20
- from langchain.retrievers import MergerRetriever
21
- from langchain_community.retrievers import GoogleCloudEnterpriseSearchRetriever
22
- # https://python.langchain.com/docs/integrations/retrievers/merger_retriever
23
- from langchain_community.document_transformers import EmbeddingsRedundantFilter
24
- from langchain.retrievers.document_compressors import DocumentCompressorPipeline
25
- from langchain.retrievers import ContextualCompressionRetriever
26
-
20
+ try:
21
+ from langchain.retrievers import MergerRetriever
22
+ # https://python.langchain.com/docs/integrations/retrievers/merger_retriever
23
+ from langchain_community.document_transformers import EmbeddingsRedundantFilter
24
+ from langchain.retrievers.document_compressors import DocumentCompressorPipeline
25
+ from langchain.retrievers import ContextualCompressionRetriever
26
+ except ImportError:
27
+ MergerRetriever=None
28
+ EmbeddingsRedundantFilter=None
29
+ DocumentCompressorPipeline=None
30
+ ContextualCompressionRetriever=None
27
31
 
28
32
 
29
33
  def load_memories(vector_name:str=None, config:ConfigManager=None):
@@ -17,8 +17,11 @@ import json
17
17
  import datetime
18
18
  import uuid
19
19
 
20
- from langchain.schema import Document
21
-
20
+ try:
21
+ from langchain.schema import Document
22
+ except ImportError:
23
+ Document = None
24
+
22
25
  from ..components import get_embeddings, pick_vectorstore, load_memories, pick_embedding
23
26
  from ..custom_logging import log
24
27
  from ..database.uuid import generate_uuid_from_object_id