sunholo 0.111.0__tar.gz → 0.112.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (168) hide show
  1. {sunholo-0.111.0 → sunholo-0.112.3}/PKG-INFO +6 -4
  2. {sunholo-0.111.0 → sunholo-0.112.3}/setup.py +5 -3
  3. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/loaders.py +21 -12
  4. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo.egg-info/PKG-INFO +6 -4
  5. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo.egg-info/SOURCES.txt +2 -1
  6. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo.egg-info/requires.txt +4 -2
  7. sunholo-0.112.3/tests/test_unstructured.py +9 -0
  8. {sunholo-0.111.0 → sunholo-0.112.3}/LICENSE.txt +0 -0
  9. {sunholo-0.111.0 → sunholo-0.112.3}/MANIFEST.in +0 -0
  10. {sunholo-0.111.0 → sunholo-0.112.3}/README.md +0 -0
  11. {sunholo-0.111.0 → sunholo-0.112.3}/setup.cfg +0 -0
  12. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/__init__.py +0 -0
  13. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/__init__.py +0 -0
  14. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/chat_history.py +0 -0
  15. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/dispatch_to_qa.py +0 -0
  16. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/fastapi/__init__.py +0 -0
  17. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/fastapi/base.py +0 -0
  18. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/fastapi/qna_routes.py +0 -0
  19. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/flask/__init__.py +0 -0
  20. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/flask/base.py +0 -0
  21. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/flask/qna_routes.py +0 -0
  22. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/flask/vac_routes.py +0 -0
  23. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/langserve.py +0 -0
  24. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/pubsub.py +0 -0
  25. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/route.py +0 -0
  26. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/special_commands.py +0 -0
  27. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/agents/swagger.py +0 -0
  28. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/archive/__init__.py +0 -0
  29. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/archive/archive.py +0 -0
  30. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/auth/__init__.py +0 -0
  31. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/auth/gcloud.py +0 -0
  32. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/auth/refresh.py +0 -0
  33. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/auth/run.py +0 -0
  34. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/azure/__init__.py +0 -0
  35. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/azure/auth.py +0 -0
  36. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/azure/blobs.py +0 -0
  37. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/azure/event_grid.py +0 -0
  38. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/bots/__init__.py +0 -0
  39. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/bots/discord.py +0 -0
  40. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/bots/github_webhook.py +0 -0
  41. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/bots/webapp.py +0 -0
  42. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/__init__.py +0 -0
  43. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/azure.py +0 -0
  44. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/doc_handling.py +0 -0
  45. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/encode_metadata.py +0 -0
  46. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/images.py +0 -0
  47. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/message_data.py +0 -0
  48. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/pdfs.py +0 -0
  49. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/process_chunker_data.py +0 -0
  50. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/publish.py +0 -0
  51. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/pubsub.py +0 -0
  52. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/chunker/splitter.py +0 -0
  53. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/__init__.py +0 -0
  54. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/chat_vac.py +0 -0
  55. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/cli.py +0 -0
  56. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/cli_init.py +0 -0
  57. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/configs.py +0 -0
  58. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/deploy.py +0 -0
  59. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/embedder.py +0 -0
  60. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/merge_texts.py +0 -0
  61. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/run_proxy.py +0 -0
  62. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/sun_rich.py +0 -0
  63. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/swagger.py +0 -0
  64. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/cli/vertex.py +0 -0
  65. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/components/__init__.py +0 -0
  66. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/components/llm.py +0 -0
  67. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/components/retriever.py +0 -0
  68. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/components/vectorstore.py +0 -0
  69. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/custom_logging.py +0 -0
  70. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/__init__.py +0 -0
  71. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/alloydb.py +0 -0
  72. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/alloydb_client.py +0 -0
  73. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/database.py +0 -0
  74. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/lancedb.py +0 -0
  75. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/sql/sb/create_function.sql +0 -0
  76. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  77. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/sql/sb/create_table.sql +0 -0
  78. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  79. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/sql/sb/return_sources.sql +0 -0
  80. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/sql/sb/setup.sql +0 -0
  81. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/static_dbs.py +0 -0
  82. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/database/uuid.py +0 -0
  83. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/discovery_engine/__init__.py +0 -0
  84. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/discovery_engine/chunker_handler.py +0 -0
  85. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/discovery_engine/create_new.py +0 -0
  86. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/discovery_engine/discovery_engine_client.py +0 -0
  87. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/discovery_engine/get_ai_search_chunks.py +0 -0
  88. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/embedder/__init__.py +0 -0
  89. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/embedder/embed_chunk.py +0 -0
  90. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/excel/__init__.py +0 -0
  91. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/excel/plugin.py +0 -0
  92. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/gcs/__init__.py +0 -0
  93. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/gcs/add_file.py +0 -0
  94. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/gcs/download_folder.py +0 -0
  95. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/gcs/download_url.py +0 -0
  96. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/gcs/extract_and_sign.py +0 -0
  97. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/gcs/metadata.py +0 -0
  98. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/genai/__init__.py +0 -0
  99. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/genai/file_handling.py +0 -0
  100. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/genai/images.py +0 -0
  101. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/genai/init.py +0 -0
  102. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/genai/process_funcs_cls.py +0 -0
  103. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/genai/safety.py +0 -0
  104. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/invoke/__init__.py +0 -0
  105. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/invoke/async_class.py +0 -0
  106. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/invoke/direct_vac_func.py +0 -0
  107. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/invoke/invoke_vac_utils.py +0 -0
  108. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/langfuse/__init__.py +0 -0
  109. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/langfuse/callback.py +0 -0
  110. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/langfuse/evals.py +0 -0
  111. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/langfuse/prompts.py +0 -0
  112. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/llamaindex/__init__.py +0 -0
  113. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/llamaindex/get_files.py +0 -0
  114. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/llamaindex/import_files.py +0 -0
  115. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/llamaindex/llamaindex_class.py +0 -0
  116. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/llamaindex/user_history.py +0 -0
  117. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/lookup/__init__.py +0 -0
  118. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/lookup/model_lookup.yaml +0 -0
  119. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/patches/__init__.py +0 -0
  120. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/patches/langchain/__init__.py +0 -0
  121. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/patches/langchain/lancedb.py +0 -0
  122. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/patches/langchain/vertexai.py +0 -0
  123. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/pubsub/__init__.py +0 -0
  124. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/pubsub/process_pubsub.py +0 -0
  125. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/pubsub/pubsub_manager.py +0 -0
  126. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/qna/__init__.py +0 -0
  127. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/qna/parsers.py +0 -0
  128. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/qna/retry.py +0 -0
  129. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/senses/__init__.py +0 -0
  130. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/senses/stream_voice.py +0 -0
  131. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/streaming/__init__.py +0 -0
  132. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/streaming/content_buffer.py +0 -0
  133. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/streaming/langserve.py +0 -0
  134. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/streaming/stream_lookup.py +0 -0
  135. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/streaming/streaming.py +0 -0
  136. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/summarise/__init__.py +0 -0
  137. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/summarise/summarise.py +0 -0
  138. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/terraform/__init__.py +0 -0
  139. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/terraform/tfvars_editor.py +0 -0
  140. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/tools/__init__.py +0 -0
  141. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/tools/web_browser.py +0 -0
  142. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/__init__.py +0 -0
  143. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/api_key.py +0 -0
  144. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/big_context.py +0 -0
  145. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/config.py +0 -0
  146. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/config_class.py +0 -0
  147. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/config_schema.py +0 -0
  148. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/gcp.py +0 -0
  149. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/gcp_project.py +0 -0
  150. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/mime.py +0 -0
  151. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/parsers.py +0 -0
  152. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/timedelta.py +0 -0
  153. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/user_ids.py +0 -0
  154. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/utils/version.py +0 -0
  155. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/__init__.py +0 -0
  156. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/extensions_call.py +0 -0
  157. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/extensions_class.py +0 -0
  158. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/genai_functions.py +0 -0
  159. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/init.py +0 -0
  160. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/memory_tools.py +0 -0
  161. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/safety.py +0 -0
  162. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo/vertex/type_dict_to_json.py +0 -0
  163. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo.egg-info/dependency_links.txt +0 -0
  164. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo.egg-info/entry_points.txt +0 -0
  165. {sunholo-0.111.0 → sunholo-0.112.3}/sunholo.egg-info/top_level.txt +0 -0
  166. {sunholo-0.111.0 → sunholo-0.112.3}/tests/test_async.py +0 -0
  167. {sunholo-0.111.0 → sunholo-0.112.3}/tests/test_chat_history.py +0 -0
  168. {sunholo-0.111.0 → sunholo-0.112.3}/tests/test_config.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.111.0
3
+ Version: 0.112.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.111.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.112.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -59,6 +59,7 @@ Requires-Dist: langchain-google-genai==1.0.10; extra == "all"
59
59
  Requires-Dist: langchain_google_alloydb_pg; extra == "all"
60
60
  Requires-Dist: langchain-anthropic==0.1.23; extra == "all"
61
61
  Requires-Dist: langchain-google-vertexai; extra == "all"
62
+ Requires-Dist: langchain-unstructured; extra == "all"
62
63
  Requires-Dist: langfuse; extra == "all"
63
64
  Requires-Dist: numpy; extra == "all"
64
65
  Requires-Dist: pg8000; extra == "all"
@@ -78,7 +79,7 @@ Requires-Dist: tabulate; extra == "all"
78
79
  Requires-Dist: tantivy; extra == "all"
79
80
  Requires-Dist: tenacity; extra == "all"
80
81
  Requires-Dist: tiktoken; extra == "all"
81
- Requires-Dist: unstructured[local-inference]==0.14.9; extra == "all"
82
+ Requires-Dist: unstructured[all-docs,local-inference]; extra == "all"
82
83
  Requires-Dist: xlwings; extra == "all"
83
84
  Provides-Extra: azure
84
85
  Requires-Dist: azure-identity; extra == "azure"
@@ -98,11 +99,12 @@ Requires-Dist: tantivy; extra == "database"
98
99
  Provides-Extra: pipeline
99
100
  Requires-Dist: GitPython; extra == "pipeline"
100
101
  Requires-Dist: lark; extra == "pipeline"
102
+ Requires-Dist: langchain-unstructured; extra == "pipeline"
101
103
  Requires-Dist: psutil; extra == "pipeline"
102
104
  Requires-Dist: pypdf; extra == "pipeline"
103
105
  Requires-Dist: pytesseract; extra == "pipeline"
104
106
  Requires-Dist: tabulate; extra == "pipeline"
105
- Requires-Dist: unstructured[local-inference]==0.14.9; extra == "pipeline"
107
+ Requires-Dist: unstructured[all-docs,local-inference]; extra == "pipeline"
106
108
  Provides-Extra: gcp
107
109
  Requires-Dist: anthropic[vertex]; extra == "gcp"
108
110
  Requires-Dist: google-api-python-client; extra == "gcp"
@@ -1,6 +1,6 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
- version = '0.111.0'
3
+ version = '0.112.3'
4
4
 
5
5
  setup(
6
6
  name='sunholo',
@@ -71,6 +71,7 @@ setup(
71
71
  "langchain_google_alloydb_pg",
72
72
  "langchain-anthropic==0.1.23",
73
73
  "langchain-google-vertexai",
74
+ "langchain-unstructured",
74
75
  "langfuse",
75
76
  "numpy",
76
77
  "pg8000",
@@ -90,7 +91,7 @@ setup(
90
91
  "tantivy",
91
92
  "tenacity",
92
93
  "tiktoken",
93
- "unstructured[local-inference]==0.14.9",
94
+ "unstructured[local-inference,all-docs]",
94
95
  "xlwings"
95
96
  ],
96
97
  'azure': [
@@ -114,11 +115,12 @@ setup(
114
115
  'pipeline': [
115
116
  "GitPython",
116
117
  "lark",
118
+ "langchain-unstructured",
117
119
  "psutil",
118
120
  "pypdf",
119
121
  "pytesseract",
120
122
  "tabulate",
121
- "unstructured[local-inference]==0.14.9",
123
+ "unstructured[local-inference,all-docs]"
122
124
  ],
123
125
  'gcp': [
124
126
  "anthropic[vertex]",
@@ -11,9 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- from langchain_community.document_loaders import UnstructuredFileLoader
15
- from langchain_community.document_loaders import UnstructuredAPIFileLoader
16
- from langchain_community.document_loaders import UnstructuredURLLoader
14
+ try:
15
+ from langchain_unstructured import UnstructuredLoader
16
+ except ImportError:
17
+ UnstructuredLoader = None
17
18
 
18
19
  from langchain_community.document_loaders import GitLoader
19
20
  from langchain_community.document_loaders import GoogleDriveLoader
@@ -159,10 +160,12 @@ def read_gdrive_to_document(url: str, metadata: dict = None):
159
160
 
160
161
  def read_url_to_document(url: str, metadata: dict = None):
161
162
 
163
+ if not UnstructuredLoader:
164
+ raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
162
165
  unstructured_kwargs = {"pdf_infer_table_structure": True,
163
166
  "extract_image_block_types": ["Image", "Table"]
164
167
  }
165
- loader = UnstructuredURLLoader(urls=[url], mode="elements", unstructured_kwargs=unstructured_kwargs)
168
+ loader = UnstructuredLoader(web_url=url, mode="elements", unstructured_kwargs=unstructured_kwargs)
166
169
  docs = loader.load()
167
170
  if metadata is not None:
168
171
  for doc in docs:
@@ -170,7 +173,7 @@ def read_url_to_document(url: str, metadata: dict = None):
170
173
  if not doc.metadata.get("source") and doc.metadata.get("url"):
171
174
  doc.metadata["source"] = doc.metadata["url"]
172
175
 
173
- log.info(f"UnstructuredURLLoader docs: {docs}")
176
+ log.info(f"UnstructuredLoader docs: {docs}")
174
177
 
175
178
  return docs
176
179
 
@@ -184,18 +187,21 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
184
187
  log.info(f"Already uploaded to bucket, skipping {pdf_path}")
185
188
  return []
186
189
 
187
- log.info(f"Sending {pdf_path} to UnstructuredAPIFileLoader")
190
+ log.info(f"Sending {pdf_path} to UnstructuredLoader")
188
191
  UNSTRUCTURED_URL = os.getenv("UNSTRUCTURED_URL")
189
192
  unstructured_kwargs = {"pdf_infer_table_structure": True,
190
193
  "extract_image_block_types": ["Image", "Table"]
191
194
  }
192
195
 
196
+ if not UnstructuredLoader:
197
+ raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
198
+
193
199
  if UNSTRUCTURED_URL:
194
200
  log.debug(f"Found UNSTRUCTURED_URL: {UNSTRUCTURED_URL}")
195
201
  the_endpoint = f"{UNSTRUCTURED_URL}/general/v0/general"
196
202
  try:
197
- loader = UnstructuredAPIFileLoader(
198
- pdf_path,
203
+ loader = UnstructuredLoader(
204
+ file_path=pdf_path,
199
205
  url=the_endpoint,
200
206
  mode="elements",
201
207
  **unstructured_kwargs)
@@ -206,8 +212,8 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
206
212
  else:
207
213
  raise err
208
214
  else:
209
- loader = UnstructuredAPIFileLoader(
210
- pdf_path,
215
+ loader = UnstructuredLoader(
216
+ file_path=pdf_path,
211
217
  api_key=UNSTRUCTURED_KEY,
212
218
  mode="elements",
213
219
  **unstructured_kwargs)
@@ -216,7 +222,7 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
216
222
  try:
217
223
  docs = loader.load() # this takes a long time 30m+ for big PDF files
218
224
  except ValueError as e:
219
- log.info(f"Error for {gs_file} from UnstructuredAPIFileLoader: {str(e)}")
225
+ log.info(f"Error for {gs_file} from UnstructuredLoader: {str(e)}")
220
226
  pdf_path = pathlib.Path(gs_file)
221
227
  if pdf_path.suffix == ".pdf":
222
228
  local_doc = read_pdf_file(pdf_path, metadata=metadata)
@@ -262,13 +268,16 @@ def read_file_to_documents(gs_file: pathlib.Path, metadata: dict = None):
262
268
 
263
269
  def convert_to_txt_and_extract(gs_file, split=False):
264
270
 
271
+ if not UnstructuredLoader:
272
+ raise ImportError("UnstructuredLoader requires 'langchain_unstructured' to be installed")
273
+
265
274
  log.info("trying file parsing locally via .txt conversion")
266
275
  txt_file = None
267
276
  docs = []
268
277
  try:
269
278
  # Convert the file to .txt and try again
270
279
  txt_file = convert_to_txt(gs_file)
271
- loader = UnstructuredFileLoader(
280
+ loader = UnstructuredLoader(
272
281
  txt_file,
273
282
  mode="elements")
274
283
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.111.0
3
+ Version: 0.112.3
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.111.0.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.112.3.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -59,6 +59,7 @@ Requires-Dist: langchain-google-genai==1.0.10; extra == "all"
59
59
  Requires-Dist: langchain_google_alloydb_pg; extra == "all"
60
60
  Requires-Dist: langchain-anthropic==0.1.23; extra == "all"
61
61
  Requires-Dist: langchain-google-vertexai; extra == "all"
62
+ Requires-Dist: langchain-unstructured; extra == "all"
62
63
  Requires-Dist: langfuse; extra == "all"
63
64
  Requires-Dist: numpy; extra == "all"
64
65
  Requires-Dist: pg8000; extra == "all"
@@ -78,7 +79,7 @@ Requires-Dist: tabulate; extra == "all"
78
79
  Requires-Dist: tantivy; extra == "all"
79
80
  Requires-Dist: tenacity; extra == "all"
80
81
  Requires-Dist: tiktoken; extra == "all"
81
- Requires-Dist: unstructured[local-inference]==0.14.9; extra == "all"
82
+ Requires-Dist: unstructured[all-docs,local-inference]; extra == "all"
82
83
  Requires-Dist: xlwings; extra == "all"
83
84
  Provides-Extra: azure
84
85
  Requires-Dist: azure-identity; extra == "azure"
@@ -98,11 +99,12 @@ Requires-Dist: tantivy; extra == "database"
98
99
  Provides-Extra: pipeline
99
100
  Requires-Dist: GitPython; extra == "pipeline"
100
101
  Requires-Dist: lark; extra == "pipeline"
102
+ Requires-Dist: langchain-unstructured; extra == "pipeline"
101
103
  Requires-Dist: psutil; extra == "pipeline"
102
104
  Requires-Dist: pypdf; extra == "pipeline"
103
105
  Requires-Dist: pytesseract; extra == "pipeline"
104
106
  Requires-Dist: tabulate; extra == "pipeline"
105
- Requires-Dist: unstructured[local-inference]==0.14.9; extra == "pipeline"
107
+ Requires-Dist: unstructured[all-docs,local-inference]; extra == "pipeline"
106
108
  Provides-Extra: gcp
107
109
  Requires-Dist: anthropic[vertex]; extra == "gcp"
108
110
  Requires-Dist: google-api-python-client; extra == "gcp"
@@ -163,4 +163,5 @@ sunholo/vertex/safety.py
163
163
  sunholo/vertex/type_dict_to_json.py
164
164
  tests/test_async.py
165
165
  tests/test_chat_history.py
166
- tests/test_config.py
166
+ tests/test_config.py
167
+ tests/test_unstructured.py
@@ -40,6 +40,7 @@ langchain-google-genai==1.0.10
40
40
  langchain_google_alloydb_pg
41
41
  langchain-anthropic==0.1.23
42
42
  langchain-google-vertexai
43
+ langchain-unstructured
43
44
  langfuse
44
45
  numpy
45
46
  pg8000
@@ -59,7 +60,7 @@ tabulate
59
60
  tantivy
60
61
  tenacity
61
62
  tiktoken
62
- unstructured[local-inference]==0.14.9
63
+ unstructured[all-docs,local-inference]
63
64
  xlwings
64
65
 
65
66
  [anthropic]
@@ -130,11 +131,12 @@ tiktoken
130
131
  [pipeline]
131
132
  GitPython
132
133
  lark
134
+ langchain-unstructured
133
135
  psutil
134
136
  pypdf
135
137
  pytesseract
136
138
  tabulate
137
- unstructured[local-inference]==0.14.9
139
+ unstructured[all-docs,local-inference]
138
140
 
139
141
  [tools]
140
142
  openapi-spec-validator
@@ -0,0 +1,9 @@
1
+ def main():
2
+ from sunholo.chunker.loaders import read_file_to_documents
3
+
4
+ result = read_file_to_documents("README.md")
5
+ print(result)
6
+
7
+
8
+ if __name__ == "__main__":
9
+ main()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes