sunholo 0.57.2__tar.gz → 0.58.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sunholo-0.57.2/sunholo.egg-info → sunholo-0.58.2}/PKG-INFO +29 -2
- {sunholo-0.57.2 → sunholo-0.58.2}/setup.py +29 -1
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/__init__.py +1 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/flask/qna_routes.py +0 -1
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/data_to_embed_pubsub.py +8 -8
- sunholo-0.58.2/sunholo/cli/cli.py +39 -0
- sunholo-0.58.2/sunholo/cli/cli_init.py +80 -0
- sunholo-0.58.2/sunholo/cli/configs.py +29 -0
- sunholo-0.58.2/sunholo/cli/deploy.py +43 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/retriever.py +8 -3
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/embedder/embed_chunk.py +9 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/llamaindex/import_files.py +24 -53
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/logging.py +9 -1
- sunholo-0.58.2/sunholo/utils/big_context.py +144 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/config.py +3 -0
- sunholo-0.58.2/sunholo/vertex/__init__.py +0 -0
- sunholo-0.58.2/sunholo/vertex/init_vertex.py +43 -0
- {sunholo-0.57.2 → sunholo-0.58.2/sunholo.egg-info}/PKG-INFO +29 -2
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/SOURCES.txt +6 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/requires.txt +28 -0
- sunholo-0.57.2/sunholo/cli/cli.py +0 -82
- {sunholo-0.57.2 → sunholo-0.58.2}/LICENSE.txt +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/MANIFEST.in +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/README.md +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/setup.cfg +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/chat_history.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/dispatch_to_qa.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/fastapi/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/fastapi/base.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/fastapi/qna_routes.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/flask/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/flask/base.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/langserve.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/pubsub.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/route.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/special_commands.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/test_chat_history.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/archive/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/archive/archive.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/auth/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/auth/run.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/bots/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/bots/discord.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/bots/webapp.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/doc_handling.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/images.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/loaders.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/message_data.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/pdfs.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/publish.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/splitter.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/cli/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/llm.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/prompt.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/vectorstore.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/alloydb.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/database.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/lancedb.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/create_function.sql +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/create_function_time.sql +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/create_table.sql +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/return_sources.sql +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/setup.sql +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/static_dbs.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/uuid.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/embedder/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/add_file.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/download_url.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/metadata.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/langfuse/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/langfuse/callback.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/langfuse/prompts.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/llamaindex/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/llamaindex/generate.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/lookup/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/lookup/model_lookup.yaml +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/langchain/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/langchain/lancedb.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/langchain/vertexai.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/pubsub/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/pubsub/process_pubsub.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/pubsub/pubsub_manager.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/qna/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/qna/parsers.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/qna/retry.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/content_buffer.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/langserve.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/streaming.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/summarise/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/summarise/summarise.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/__init__.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/gcp.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/parsers.py +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/dependency_links.txt +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/entry_points.txt +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/top_level.txt +0 -0
- {sunholo-0.57.2 → sunholo-0.58.2}/test/test_dispatch_to_qa.py +0 -0
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.58.2
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.58.2.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -24,13 +24,19 @@ Requires-Dist: langchain-community
|
|
|
24
24
|
Provides-Extra: all
|
|
25
25
|
Requires-Dist: asyncpg; extra == "all"
|
|
26
26
|
Requires-Dist: flask; extra == "all"
|
|
27
|
+
Requires-Dist: google-auth; extra == "all"
|
|
28
|
+
Requires-Dist: google-auth-httplib2; extra == "all"
|
|
29
|
+
Requires-Dist: google-auth-oauthlib; extra == "all"
|
|
27
30
|
Requires-Dist: google-cloud-aiplatform; extra == "all"
|
|
28
31
|
Requires-Dist: google-api-python-client; extra == "all"
|
|
29
32
|
Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "all"
|
|
33
|
+
Requires-Dist: google-cloud-bigquery; extra == "all"
|
|
30
34
|
Requires-Dist: google-cloud-build; extra == "all"
|
|
31
35
|
Requires-Dist: google-cloud-logging; extra == "all"
|
|
32
36
|
Requires-Dist: google-cloud-storage; extra == "all"
|
|
33
37
|
Requires-Dist: google-cloud-pubsub; extra == "all"
|
|
38
|
+
Requires-Dist: google-cloud-discoveryengine; extra == "all"
|
|
39
|
+
Requires-Dist: google-generativeai; extra == "all"
|
|
34
40
|
Requires-Dist: gunicorn; extra == "all"
|
|
35
41
|
Requires-Dist: httpcore; extra == "all"
|
|
36
42
|
Requires-Dist: httpx; extra == "all"
|
|
@@ -44,25 +50,45 @@ Requires-Dist: langchain_google_alloydb_pg; extra == "all"
|
|
|
44
50
|
Requires-Dist: langchain-anthropic; extra == "all"
|
|
45
51
|
Requires-Dist: langfuse; extra == "all"
|
|
46
52
|
Requires-Dist: pg8000; extra == "all"
|
|
53
|
+
Requires-Dist: pgvector; extra == "all"
|
|
54
|
+
Requires-Dist: psycopg2-binary; extra == "all"
|
|
55
|
+
Requires-Dist: pypdf; extra == "all"
|
|
47
56
|
Requires-Dist: fastapi; extra == "all"
|
|
57
|
+
Requires-Dist: supabase; extra == "all"
|
|
58
|
+
Requires-Dist: tiktoken; extra == "all"
|
|
48
59
|
Requires-Dist: python-socketio; extra == "all"
|
|
49
60
|
Provides-Extra: database
|
|
50
61
|
Requires-Dist: asyncpg; extra == "database"
|
|
62
|
+
Requires-Dist: supabase; extra == "database"
|
|
51
63
|
Requires-Dist: sqlalchemy; extra == "database"
|
|
52
64
|
Requires-Dist: pg8000; extra == "database"
|
|
65
|
+
Requires-Dist: pgvector; extra == "database"
|
|
66
|
+
Requires-Dist: psycopg2-binary; extra == "database"
|
|
53
67
|
Requires-Dist: lancedb; extra == "database"
|
|
68
|
+
Provides-Extra: pipeline
|
|
69
|
+
Requires-Dist: GitPython; extra == "pipeline"
|
|
70
|
+
Requires-Dist: lark; extra == "pipeline"
|
|
71
|
+
Requires-Dist: pypdf; extra == "pipeline"
|
|
72
|
+
Requires-Dist: tabulate; extra == "pipeline"
|
|
54
73
|
Provides-Extra: gcp
|
|
74
|
+
Requires-Dist: google-auth; extra == "gcp"
|
|
75
|
+
Requires-Dist: google-auth-httplib2; extra == "gcp"
|
|
76
|
+
Requires-Dist: google-auth-oauthlib; extra == "gcp"
|
|
55
77
|
Requires-Dist: google-cloud-aiplatform; extra == "gcp"
|
|
78
|
+
Requires-Dist: google-cloud-bigquery; extra == "gcp"
|
|
56
79
|
Requires-Dist: google-cloud-build; extra == "gcp"
|
|
57
80
|
Requires-Dist: google-cloud-storage; extra == "gcp"
|
|
58
81
|
Requires-Dist: google-cloud-logging; extra == "gcp"
|
|
59
82
|
Requires-Dist: google-cloud-pubsub; extra == "gcp"
|
|
83
|
+
Requires-Dist: google-cloud-discoveryengine; extra == "gcp"
|
|
84
|
+
Requires-Dist: google-generativeai; extra == "gcp"
|
|
60
85
|
Requires-Dist: langchain-google-genai; extra == "gcp"
|
|
61
86
|
Requires-Dist: langchain_google_alloydb_pg; extra == "gcp"
|
|
62
87
|
Requires-Dist: google-api-python-client; extra == "gcp"
|
|
63
88
|
Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "gcp"
|
|
64
89
|
Provides-Extra: openai
|
|
65
90
|
Requires-Dist: langchain-openai; extra == "openai"
|
|
91
|
+
Requires-Dist: tiktoken; extra == "openai"
|
|
66
92
|
Provides-Extra: anthropic
|
|
67
93
|
Requires-Dist: langchain-anthropic; extra == "anthropic"
|
|
68
94
|
Provides-Extra: http
|
|
@@ -73,6 +99,7 @@ Requires-Dist: httpcore; extra == "http"
|
|
|
73
99
|
Requires-Dist: httpx; extra == "http"
|
|
74
100
|
Requires-Dist: langfuse; extra == "http"
|
|
75
101
|
Requires-Dist: python-socketio; extra == "http"
|
|
102
|
+
Requires-Dist: requests; extra == "http"
|
|
76
103
|
|
|
77
104
|
## Introduction
|
|
78
105
|
This is the Sunholo Python project, a comprehensive toolkit for working with language models and vector stores on Google Cloud Platform. It provides a wide range of functionalities and utilities to facilitate the development and deployment of language model applications.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from setuptools import setup, find_packages
|
|
2
2
|
|
|
3
3
|
# Define your base version
|
|
4
|
-
version = '0.
|
|
4
|
+
version = '0.58.2'
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name='sunholo',
|
|
@@ -37,13 +37,19 @@ setup(
|
|
|
37
37
|
'all': [
|
|
38
38
|
"asyncpg",
|
|
39
39
|
"flask",
|
|
40
|
+
"google-auth",
|
|
41
|
+
"google-auth-httplib2",
|
|
42
|
+
"google-auth-oauthlib",
|
|
40
43
|
"google-cloud-aiplatform",
|
|
41
44
|
"google-api-python-client",
|
|
42
45
|
"google-cloud-alloydb-connector[pg8000]",
|
|
46
|
+
"google-cloud-bigquery",
|
|
43
47
|
"google-cloud-build",
|
|
44
48
|
"google-cloud-logging",
|
|
45
49
|
"google-cloud-storage",
|
|
46
50
|
"google-cloud-pubsub",
|
|
51
|
+
"google-cloud-discoveryengine",
|
|
52
|
+
"google-generativeai",
|
|
47
53
|
"gunicorn",
|
|
48
54
|
"httpcore",
|
|
49
55
|
"httpx",
|
|
@@ -57,21 +63,41 @@ setup(
|
|
|
57
63
|
"langchain-anthropic",
|
|
58
64
|
"langfuse",
|
|
59
65
|
"pg8000",
|
|
66
|
+
"pgvector",
|
|
67
|
+
"psycopg2-binary",
|
|
68
|
+
"pypdf",
|
|
60
69
|
"fastapi",
|
|
70
|
+
"supabase",
|
|
71
|
+
"tiktoken",
|
|
61
72
|
"python-socketio"
|
|
62
73
|
],
|
|
63
74
|
'database': [
|
|
64
75
|
"asyncpg",
|
|
76
|
+
"supabase",
|
|
65
77
|
"sqlalchemy",
|
|
66
78
|
"pg8000",
|
|
79
|
+
"pgvector",
|
|
80
|
+
"psycopg2-binary",
|
|
67
81
|
"lancedb",
|
|
68
82
|
],
|
|
83
|
+
'pipeline': [
|
|
84
|
+
"GitPython",
|
|
85
|
+
"lark",
|
|
86
|
+
"pypdf",
|
|
87
|
+
"tabulate",
|
|
88
|
+
],
|
|
69
89
|
'gcp': [
|
|
90
|
+
"google-auth",
|
|
91
|
+
"google-auth-httplib2",
|
|
92
|
+
"google-auth-oauthlib",
|
|
70
93
|
"google-cloud-aiplatform",
|
|
94
|
+
"google-cloud-bigquery",
|
|
71
95
|
"google-cloud-build",
|
|
72
96
|
"google-cloud-storage",
|
|
73
97
|
"google-cloud-logging",
|
|
74
98
|
"google-cloud-pubsub",
|
|
99
|
+
"google-cloud-discoveryengine",
|
|
100
|
+
"google-generativeai",
|
|
75
101
|
"langchain-google-genai",
|
|
76
102
|
"langchain_google_alloydb_pg",
|
|
77
103
|
"google-api-python-client",
|
|
@@ -79,6 +105,7 @@ setup(
|
|
|
79
105
|
],
|
|
80
106
|
'openai': [
|
|
81
107
|
"langchain-openai",
|
|
108
|
+
"tiktoken"
|
|
82
109
|
],
|
|
83
110
|
'anthropic': [
|
|
84
111
|
"langchain-anthropic",
|
|
@@ -91,6 +118,7 @@ setup(
|
|
|
91
118
|
"httpx",
|
|
92
119
|
"langfuse",
|
|
93
120
|
"python-socketio",
|
|
121
|
+
"requests"
|
|
94
122
|
]
|
|
95
123
|
},
|
|
96
124
|
classifiers=[
|
|
@@ -63,14 +63,14 @@ def data_to_embed_pubsub(data: dict):
|
|
|
63
63
|
if metadata:
|
|
64
64
|
metadata["vector_name"] = vector_name
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
66
|
+
if metadata.get("return_chunks"):
|
|
67
|
+
log.info("attributes.return_chunks=True detected, skipping process chunks queue")
|
|
68
|
+
output_list = []
|
|
69
|
+
if chunks:
|
|
70
|
+
for chunk in chunks:
|
|
71
|
+
output_list.append({"page_content": chunk.page_content, "metadata": chunk.metadata})
|
|
72
|
+
|
|
73
|
+
return output_list
|
|
74
74
|
|
|
75
75
|
process_docs_chunks_vector_name(chunks, vector_name, metadata)
|
|
76
76
|
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
|
|
3
|
+
from .configs import setup_list_configs_subparser
|
|
4
|
+
from .deploy import setup_deploy_subparser
|
|
5
|
+
from .cli_init import setup_init_subparser
|
|
6
|
+
|
|
7
|
+
def main(args=None):
|
|
8
|
+
"""
|
|
9
|
+
Entry point for the sunholo console script. This function parses command line arguments
|
|
10
|
+
and invokes the appropriate functionality based on the user input.
|
|
11
|
+
|
|
12
|
+
Example commands:
|
|
13
|
+
```bash
|
|
14
|
+
sunholo deploy --config_path . --gcs_bucket your-gcs-bucket --lancedb_bucket your-lancedb-bucket
|
|
15
|
+
```
|
|
16
|
+
"""
|
|
17
|
+
parser = argparse.ArgumentParser(description="sunholo CLI tool for deploying GenAI VACs")
|
|
18
|
+
subparsers = parser.add_subparsers(title='commands',
|
|
19
|
+
description='Valid commands',
|
|
20
|
+
help='Commands',
|
|
21
|
+
dest='command',
|
|
22
|
+
required=True)
|
|
23
|
+
|
|
24
|
+
# Setup deploy command
|
|
25
|
+
setup_deploy_subparser(subparsers)
|
|
26
|
+
# Setup list-configs command
|
|
27
|
+
setup_list_configs_subparser(subparsers)
|
|
28
|
+
# init
|
|
29
|
+
setup_init_subparser(subparsers)
|
|
30
|
+
|
|
31
|
+
args = parser.parse_args(args)
|
|
32
|
+
|
|
33
|
+
if hasattr(args, 'func'):
|
|
34
|
+
args.func(args)
|
|
35
|
+
else:
|
|
36
|
+
parser.print_help()
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
main()
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from ..utils.config import get_module_filepath
|
|
4
|
+
|
|
5
|
+
def init_project(args):
|
|
6
|
+
"""
|
|
7
|
+
Initializes a new sunholo project with a basic configuration file and directory structure.
|
|
8
|
+
|
|
9
|
+
**Explanation:**
|
|
10
|
+
|
|
11
|
+
1. **Import Necessary Modules:**
|
|
12
|
+
- `os` for file system operations.
|
|
13
|
+
- `shutil` for copying files and directories.
|
|
14
|
+
- `log` from `sunholo.logging` for logging messages.
|
|
15
|
+
- `get_module_filepath` from `sunholo.utils.config` to get the absolute path of template files.
|
|
16
|
+
|
|
17
|
+
2. **`init_project` Function:**
|
|
18
|
+
- Takes an `args` object from argparse, containing the `project_name`.
|
|
19
|
+
- Creates the project directory using `os.makedirs`.
|
|
20
|
+
- Copies template files from the `templates/project` directory to the new project directory using `shutil.copy` and `shutil.copytree`.
|
|
21
|
+
- Logs informative messages about the initialization process.
|
|
22
|
+
|
|
23
|
+
3. **`setup_init_subparser` Function:**
|
|
24
|
+
- Sets up the `init` subcommand for the `sunholo` CLI.
|
|
25
|
+
- Adds an argument `project_name` to specify the name of the new project.
|
|
26
|
+
- Sets the `func` attribute to `init_project`, so the parser knows which function to call when the `init` command is used.
|
|
27
|
+
|
|
28
|
+
**Template Files (`templates/project`):**
|
|
29
|
+
|
|
30
|
+
You'll need to create a `templates/project` directory within your `sunholo` package and place the following template files in it:
|
|
31
|
+
|
|
32
|
+
* **`config/llm_config.yaml`:** A basic configuration file with placeholders for LLM settings, vector stores, etc.
|
|
33
|
+
* **`config/cloud_run_urls.json`:** A template for Cloud Run URLs.
|
|
34
|
+
* **`app.py`:** A basic Flask app that can be customized for the project.
|
|
35
|
+
* **`.gitignore`:** A gitignore file to exclude unnecessary files from version control.
|
|
36
|
+
* **`README.md`:** A README file with instructions for setting up and running the project.
|
|
37
|
+
|
|
38
|
+
**Usage:**
|
|
39
|
+
|
|
40
|
+
After adding this code to your `cli.py` and creating the template files, users can initialize a new project using the following command:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
sunholo init my_genai_project
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
This will create a new directory named `my_genai_project` with the template files, allowing users to start building their GenAI application.
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
project_name = args.project_name
|
|
50
|
+
project_dir = os.path.join(os.getcwd(), project_name)
|
|
51
|
+
|
|
52
|
+
print(f"Initializing project: {project_name} in directory: {project_dir}")
|
|
53
|
+
|
|
54
|
+
# Create project directory
|
|
55
|
+
if os.path.exists(project_dir):
|
|
56
|
+
print(f"Directory {project_dir} already exists. Please choose a different project name.")
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
os.makedirs(project_dir)
|
|
60
|
+
|
|
61
|
+
# Copy template files
|
|
62
|
+
template_dir = get_module_filepath("templates/project")
|
|
63
|
+
for filename in os.listdir(template_dir):
|
|
64
|
+
src_path = os.path.join(template_dir, filename)
|
|
65
|
+
dest_path = os.path.join(project_dir, filename)
|
|
66
|
+
if os.path.isfile(src_path):
|
|
67
|
+
shutil.copy(src_path, dest_path)
|
|
68
|
+
elif os.path.isdir(src_path):
|
|
69
|
+
shutil.copytree(src_path, dest_path)
|
|
70
|
+
|
|
71
|
+
print(f"Project {project_name} initialized successfully.")
|
|
72
|
+
print(f"Navigate to {project_dir} and customize the configuration files in the 'config' directory.")
|
|
73
|
+
|
|
74
|
+
def setup_init_subparser(subparsers):
|
|
75
|
+
"""
|
|
76
|
+
Sets up an argparse subparser for the 'init' command.
|
|
77
|
+
"""
|
|
78
|
+
init_parser = subparsers.add_parser('init', help='Initializes a new sunholo project.')
|
|
79
|
+
init_parser.add_argument('project_name', help='The name of the new project.')
|
|
80
|
+
init_parser.set_defaults(func=init_project)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from ..utils.config import load_all_configs
|
|
2
|
+
|
|
3
|
+
from pprint import pprint
|
|
4
|
+
|
|
5
|
+
def list_configs(args):
|
|
6
|
+
"""
|
|
7
|
+
Lists configuration files, filtered by kind if specified.
|
|
8
|
+
"""
|
|
9
|
+
print("Listing configuration files")
|
|
10
|
+
configs = load_all_configs()
|
|
11
|
+
|
|
12
|
+
if args.kind:
|
|
13
|
+
if args.kind in configs:
|
|
14
|
+
print(f"## Config kind: {args.kind}")
|
|
15
|
+
pprint(configs[args.kind])
|
|
16
|
+
else:
|
|
17
|
+
print(f"No configurations found for kind: {args.kind}")
|
|
18
|
+
else:
|
|
19
|
+
for kind, config in configs.items():
|
|
20
|
+
pprint(f"## Config kind: {kind}")
|
|
21
|
+
pprint(config)
|
|
22
|
+
|
|
23
|
+
def setup_list_configs_subparser(subparsers):
|
|
24
|
+
"""
|
|
25
|
+
Sets up an argparse subparser for the 'list-configs' command.
|
|
26
|
+
"""
|
|
27
|
+
list_configs_parser = subparsers.add_parser('list-configs', help='Lists all configuration files and their details.')
|
|
28
|
+
list_configs_parser.add_argument('--kind', help='Filter configurations by kind.')
|
|
29
|
+
list_configs_parser.set_defaults(func=list_configs)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from subprocess import Popen
|
|
3
|
+
from ..utils.config import load_all_configs
|
|
4
|
+
|
|
5
|
+
def deploy_vac(args):
|
|
6
|
+
"""
|
|
7
|
+
Deploys the VAC by running a Flask app locally.
|
|
8
|
+
"""
|
|
9
|
+
print(f"Deploying VAC: {args.vac_name} locally")
|
|
10
|
+
|
|
11
|
+
# Load the vacConfig
|
|
12
|
+
configs_by_kind = load_all_configs()
|
|
13
|
+
vac_config = configs_by_kind.get('vacConfig', {}).get('vac', {}).get(args.vac_name)
|
|
14
|
+
|
|
15
|
+
if not vac_config:
|
|
16
|
+
raise ValueError(f"No configuration found for VAC: {args.vac_name}")
|
|
17
|
+
|
|
18
|
+
# Assuming the Flask app is in 'app.py' within the config path
|
|
19
|
+
app_path = os.path.join(args.config_path, 'app.py')
|
|
20
|
+
if not os.path.exists(app_path):
|
|
21
|
+
raise ValueError(f"app.py not found in {args.config_path}")
|
|
22
|
+
|
|
23
|
+
print(f"Running Flask app from {app_path}")
|
|
24
|
+
|
|
25
|
+
# Run the Flask app
|
|
26
|
+
command = ["python", app_path]
|
|
27
|
+
print(f"Running Flask app with command: {' '.join(command)}")
|
|
28
|
+
process = Popen(command)
|
|
29
|
+
process.communicate()
|
|
30
|
+
|
|
31
|
+
def setup_deploy_subparser(subparsers):
|
|
32
|
+
"""
|
|
33
|
+
Sets up an argparse subparser for the 'deploy' command.
|
|
34
|
+
|
|
35
|
+
Example command:
|
|
36
|
+
```bash
|
|
37
|
+
sunholo deploy "vac_name" --config_path .
|
|
38
|
+
```
|
|
39
|
+
"""
|
|
40
|
+
deploy_parser = subparsers.add_parser('deploy', help='Triggers a deployment of a VAC.')
|
|
41
|
+
deploy_parser.add_argument('vac_name', help='The name of the VAC to deploy.')
|
|
42
|
+
deploy_parser.add_argument('--config_path', default='.', help='Path to the directory containing the config folder `config/` and Flask app `app.py`, defaults to current directory. Set _CONFIG_FOLDER env var to change config location.')
|
|
43
|
+
deploy_parser.set_defaults(func=deploy_vac)
|
|
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def load_memories(vector_name):
|
|
30
|
-
memories = load_config_key("memory", vector_name,
|
|
30
|
+
memories = load_config_key("memory", vector_name, type="vacConfig")
|
|
31
31
|
log.info(f"Found memory settings for {vector_name}: {memories}")
|
|
32
32
|
if len(memories) == 0:
|
|
33
33
|
log.info(f"No memory settings found for {vector_name}")
|
|
@@ -49,7 +49,8 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
49
49
|
if embeddings is None:
|
|
50
50
|
embeddings = get_embeddings(vector_name)
|
|
51
51
|
vectorstore = pick_vectorstore(vectorstore, vector_name=vector_name, embeddings=embeddings)
|
|
52
|
-
|
|
52
|
+
k_override = value.get('k', 3)
|
|
53
|
+
vs_retriever = vectorstore.as_retriever(search_kwargs=dict(k=k_override))
|
|
53
54
|
retriever_list.append(vs_retriever)
|
|
54
55
|
|
|
55
56
|
if value.get('provider') == "GoogleCloudEnterpriseSearchRetriever":
|
|
@@ -68,6 +69,10 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
68
69
|
if len(retriever_list) == 0:
|
|
69
70
|
log.info(f"No retrievers were created for {memories}")
|
|
70
71
|
return None
|
|
72
|
+
|
|
73
|
+
k_override = load_config_key("memory_k", vector_name, type="vacConfig")
|
|
74
|
+
if not k_override:
|
|
75
|
+
k_override = 3
|
|
71
76
|
|
|
72
77
|
lotr = MergerRetriever(retrievers=retriever_list)
|
|
73
78
|
|
|
@@ -76,6 +81,6 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
76
81
|
pipeline = DocumentCompressorPipeline(transformers=[filter])
|
|
77
82
|
retriever = ContextualCompressionRetriever(
|
|
78
83
|
base_compressor=pipeline, base_retriever=lotr,
|
|
79
|
-
k=
|
|
84
|
+
k=k_override)
|
|
80
85
|
|
|
81
86
|
return retriever
|
|
@@ -115,6 +115,15 @@ def embed_pubsub_chunk(data: dict):
|
|
|
115
115
|
embed_llm = value.get('llm')
|
|
116
116
|
if embed_llm:
|
|
117
117
|
embeddings = pick_embedding(embed_llm)
|
|
118
|
+
# check if read only
|
|
119
|
+
read_only = value.get('readonly')
|
|
120
|
+
if read_only:
|
|
121
|
+
continue
|
|
122
|
+
# read from a different vector_name
|
|
123
|
+
vector_name_other = value.get('vector_name')
|
|
124
|
+
if vector_name_other:
|
|
125
|
+
log.warning(f"Using different vector_name for vectorstore: {vector_name_other} overriding {vector_name}")
|
|
126
|
+
vector_name = vector_name_other
|
|
118
127
|
vectorstore_obj = pick_vectorstore(vectorstore, vector_name=vector_name, embeddings=embeddings)
|
|
119
128
|
vs_retriever = vectorstore_obj.as_retriever(search_kwargs=dict(k=3))
|
|
120
129
|
vectorstore_list.append(vs_retriever)
|
|
@@ -1,54 +1,14 @@
|
|
|
1
|
-
|
|
2
1
|
try:
|
|
3
2
|
from vertexai.preview import rag
|
|
4
|
-
from vertexai.preview.generative_models import GenerativeModel, Tool
|
|
5
|
-
import vertexai
|
|
6
3
|
except ImportError:
|
|
7
4
|
rag = None
|
|
8
5
|
|
|
9
6
|
from ..logging import log
|
|
10
7
|
from ..utils.config import load_config_key
|
|
8
|
+
from ..vertex import init_vertex
|
|
11
9
|
|
|
12
10
|
# Create a RAG Corpus, Import Files
|
|
13
11
|
|
|
14
|
-
def init_vertex(gcp_config):
|
|
15
|
-
"""
|
|
16
|
-
Initializes the Vertex AI environment using the provided Google Cloud Platform configuration.
|
|
17
|
-
|
|
18
|
-
This function configures the Vertex AI API session with specified project and location details
|
|
19
|
-
from the gcp_config dictionary. It is essential to call this function at the beginning of a session
|
|
20
|
-
before performing any operations related to Vertex AI.
|
|
21
|
-
|
|
22
|
-
Parameters:
|
|
23
|
-
gcp_config (dict): A dictionary containing the Google Cloud Platform configuration with keys:
|
|
24
|
-
- 'project_id': The Google Cloud project ID to configure for Vertex AI.
|
|
25
|
-
- 'location': The Google Cloud region to configure for Vertex AI.
|
|
26
|
-
|
|
27
|
-
Raises:
|
|
28
|
-
KeyError: If the necessary keys ('project_id' or 'location') are missing in the gcp_config dictionary.
|
|
29
|
-
ModuleNotFoundError: If the Vertex AI module is not installed and needs to be installed via pip.
|
|
30
|
-
|
|
31
|
-
Example:
|
|
32
|
-
```python
|
|
33
|
-
gcp_config = {
|
|
34
|
-
'project_id': 'your-project-id',
|
|
35
|
-
'location': 'us-central1'
|
|
36
|
-
}
|
|
37
|
-
init_vertex(gcp_config)
|
|
38
|
-
# This will initialize the Vertex AI session with the provided project ID and location.
|
|
39
|
-
|
|
40
|
-
Note:
|
|
41
|
-
Ensure that the 'vertexai' module is installed and correctly configured before calling this function.
|
|
42
|
-
The function assumes that the required 'vertexai' library is available and that the logging setup is already in place.
|
|
43
|
-
"""
|
|
44
|
-
if not rag:
|
|
45
|
-
log.error("Need to install vertexai module via `pip install google-cloud-aiplatform`")
|
|
46
|
-
|
|
47
|
-
# Initialize Vertex AI API once per session
|
|
48
|
-
project_id = gcp_config.get('project_id')
|
|
49
|
-
location = gcp_config.get('location')
|
|
50
|
-
vertexai.init(project=project_id, location=location)
|
|
51
|
-
|
|
52
12
|
def get_corpus(gcp_config):
|
|
53
13
|
"""
|
|
54
14
|
Retrieves a LlamaIndex corpus from Vertex AI based on the provided Google Cloud configuration.
|
|
@@ -86,6 +46,9 @@ def get_corpus(gcp_config):
|
|
|
86
46
|
print("Error fetching corpus:", str(e))
|
|
87
47
|
```
|
|
88
48
|
"""
|
|
49
|
+
if not rag:
|
|
50
|
+
raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
|
|
51
|
+
|
|
89
52
|
project_id = gcp_config.get('project_id')
|
|
90
53
|
location = gcp_config.get('location')
|
|
91
54
|
rag_id = gcp_config.get('rag_id')
|
|
@@ -136,7 +99,10 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
136
99
|
# Imported file to corpus: {'status': 'success'}
|
|
137
100
|
```
|
|
138
101
|
"""
|
|
139
|
-
|
|
102
|
+
if not rag:
|
|
103
|
+
raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
|
|
104
|
+
|
|
105
|
+
gcp_config = load_config_key("gcp_config", vector_name=vector_name, type="vacConfig")
|
|
140
106
|
if not gcp_config:
|
|
141
107
|
raise ValueError(f"Need config.{vector_name}.gcp_config to configure llamaindex on VertexAI")
|
|
142
108
|
|
|
@@ -154,7 +120,7 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
154
120
|
log.info(f"Found llamaindex corpus: {corpus}")
|
|
155
121
|
|
|
156
122
|
# native support for cloud storage and drive links
|
|
157
|
-
chunker_config = load_config_key("chunker", vector_name=vector_name,
|
|
123
|
+
chunker_config = load_config_key("chunker", vector_name=vector_name, type="vacConfig")
|
|
158
124
|
|
|
159
125
|
if message_data.startswith("gs://") or message_data.startswith("https://drive.google.com"):
|
|
160
126
|
log.info(f"rag.import_files for {message_data}")
|
|
@@ -193,12 +159,8 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
193
159
|
# description=description,
|
|
194
160
|
#)
|
|
195
161
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# llamaindex handles its own chunking/embedding
|
|
199
|
-
memories = load_config_key("memory", vector_name=vector_name, filename = "config/llm_config.yaml")
|
|
200
|
-
total_memories = len(memories)
|
|
201
|
-
llama = None
|
|
162
|
+
def check_llamaindex_in_memory(vector_name):
|
|
163
|
+
memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
|
|
202
164
|
for memory in memories: # Iterate over the list
|
|
203
165
|
for key, value in memory.items(): # Now iterate over the dictionary
|
|
204
166
|
log.info(f"Found memory {key}")
|
|
@@ -206,10 +168,19 @@ def llamaindex_chunker_check(message_data, metadata, vector_name):
|
|
|
206
168
|
if vectorstore:
|
|
207
169
|
log.info(f"Found vectorstore {vectorstore}")
|
|
208
170
|
if vectorstore == "llamaindex":
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
171
|
+
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
def llamaindex_chunker_check(message_data, metadata, vector_name):
|
|
177
|
+
# llamaindex handles its own chunking/embedding
|
|
178
|
+
memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
|
|
179
|
+
total_memories = len(memories)
|
|
180
|
+
llama = None
|
|
181
|
+
if check_llamaindex_in_memory(vector_name):
|
|
182
|
+
llama = do_llamaindex(message_data, metadata, vector_name)
|
|
183
|
+
log.info(f"Processed llamaindex: {llama}")
|
|
213
184
|
|
|
214
185
|
# If llamaindex is the only entry, return
|
|
215
186
|
if llama and total_memories == 1:
|
|
@@ -249,4 +249,12 @@ def log_folder_location(folder_name):
|
|
|
249
249
|
else:
|
|
250
250
|
logging.warning(f"The folder '{folder_name}' does not exist in the current working directory: {current_working_directory}")
|
|
251
251
|
|
|
252
|
-
|
|
252
|
+
# lazy eval
|
|
253
|
+
_logger = None
|
|
254
|
+
def get_logger():
|
|
255
|
+
global _logger
|
|
256
|
+
if _logger is None:
|
|
257
|
+
_logger = setup_logging("sunholo")
|
|
258
|
+
return _logger
|
|
259
|
+
|
|
260
|
+
log = get_logger()
|