sunholo 0.57.2__tar.gz → 0.58.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. {sunholo-0.57.2/sunholo.egg-info → sunholo-0.58.2}/PKG-INFO +29 -2
  2. {sunholo-0.57.2 → sunholo-0.58.2}/setup.py +29 -1
  3. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/__init__.py +1 -0
  4. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/flask/qna_routes.py +0 -1
  5. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/data_to_embed_pubsub.py +8 -8
  6. sunholo-0.58.2/sunholo/cli/cli.py +39 -0
  7. sunholo-0.58.2/sunholo/cli/cli_init.py +80 -0
  8. sunholo-0.58.2/sunholo/cli/configs.py +29 -0
  9. sunholo-0.58.2/sunholo/cli/deploy.py +43 -0
  10. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/retriever.py +8 -3
  11. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/embedder/embed_chunk.py +9 -0
  12. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/llamaindex/import_files.py +24 -53
  13. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/logging.py +9 -1
  14. sunholo-0.58.2/sunholo/utils/big_context.py +144 -0
  15. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/config.py +3 -0
  16. sunholo-0.58.2/sunholo/vertex/__init__.py +0 -0
  17. sunholo-0.58.2/sunholo/vertex/init_vertex.py +43 -0
  18. {sunholo-0.57.2 → sunholo-0.58.2/sunholo.egg-info}/PKG-INFO +29 -2
  19. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/SOURCES.txt +6 -0
  20. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/requires.txt +28 -0
  21. sunholo-0.57.2/sunholo/cli/cli.py +0 -82
  22. {sunholo-0.57.2 → sunholo-0.58.2}/LICENSE.txt +0 -0
  23. {sunholo-0.57.2 → sunholo-0.58.2}/MANIFEST.in +0 -0
  24. {sunholo-0.57.2 → sunholo-0.58.2}/README.md +0 -0
  25. {sunholo-0.57.2 → sunholo-0.58.2}/setup.cfg +0 -0
  26. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/__init__.py +0 -0
  27. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/chat_history.py +0 -0
  28. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/dispatch_to_qa.py +0 -0
  29. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/fastapi/__init__.py +0 -0
  30. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/fastapi/base.py +0 -0
  31. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/fastapi/qna_routes.py +0 -0
  32. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/flask/__init__.py +0 -0
  33. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/flask/base.py +0 -0
  34. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/langserve.py +0 -0
  35. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/pubsub.py +0 -0
  36. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/route.py +0 -0
  37. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/special_commands.py +0 -0
  38. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/agents/test_chat_history.py +0 -0
  39. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/archive/__init__.py +0 -0
  40. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/archive/archive.py +0 -0
  41. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/auth/__init__.py +0 -0
  42. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/auth/run.py +0 -0
  43. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/bots/__init__.py +0 -0
  44. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/bots/discord.py +0 -0
  45. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/bots/webapp.py +0 -0
  46. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/__init__.py +0 -0
  47. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/doc_handling.py +0 -0
  48. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/images.py +0 -0
  49. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/loaders.py +0 -0
  50. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/message_data.py +0 -0
  51. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/pdfs.py +0 -0
  52. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/publish.py +0 -0
  53. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/chunker/splitter.py +0 -0
  54. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/cli/__init__.py +0 -0
  55. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/__init__.py +0 -0
  56. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/llm.py +0 -0
  57. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/prompt.py +0 -0
  58. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/components/vectorstore.py +0 -0
  59. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/__init__.py +0 -0
  60. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/alloydb.py +0 -0
  61. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/database.py +0 -0
  62. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/lancedb.py +0 -0
  63. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/create_function.sql +0 -0
  64. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/create_function_time.sql +0 -0
  65. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/create_table.sql +0 -0
  66. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/delete_source_row.sql +0 -0
  67. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/return_sources.sql +0 -0
  68. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/sql/sb/setup.sql +0 -0
  69. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/static_dbs.py +0 -0
  70. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/database/uuid.py +0 -0
  71. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/embedder/__init__.py +0 -0
  72. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/__init__.py +0 -0
  73. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/add_file.py +0 -0
  74. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/download_url.py +0 -0
  75. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/gcs/metadata.py +0 -0
  76. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/langfuse/__init__.py +0 -0
  77. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/langfuse/callback.py +0 -0
  78. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/langfuse/prompts.py +0 -0
  79. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/llamaindex/__init__.py +0 -0
  80. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/llamaindex/generate.py +0 -0
  81. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/lookup/__init__.py +0 -0
  82. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/lookup/model_lookup.yaml +0 -0
  83. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/__init__.py +0 -0
  84. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/langchain/__init__.py +0 -0
  85. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/langchain/lancedb.py +0 -0
  86. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/patches/langchain/vertexai.py +0 -0
  87. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/pubsub/__init__.py +0 -0
  88. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/pubsub/process_pubsub.py +0 -0
  89. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/pubsub/pubsub_manager.py +0 -0
  90. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/qna/__init__.py +0 -0
  91. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/qna/parsers.py +0 -0
  92. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/qna/retry.py +0 -0
  93. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/__init__.py +0 -0
  94. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/content_buffer.py +0 -0
  95. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/langserve.py +0 -0
  96. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/streaming/streaming.py +0 -0
  97. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/summarise/__init__.py +0 -0
  98. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/summarise/summarise.py +0 -0
  99. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/__init__.py +0 -0
  100. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/gcp.py +0 -0
  101. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo/utils/parsers.py +0 -0
  102. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/dependency_links.txt +0 -0
  103. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/entry_points.txt +0 -0
  104. {sunholo-0.57.2 → sunholo-0.58.2}/sunholo.egg-info/top_level.txt +0 -0
  105. {sunholo-0.57.2 → sunholo-0.58.2}/test/test_dispatch_to_qa.py +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: sunholo
3
- Version: 0.57.2
3
+ Version: 0.58.2
4
4
  Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
5
5
  Home-page: https://github.com/sunholo-data/sunholo-py
6
- Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.57.2.tar.gz
6
+ Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.58.2.tar.gz
7
7
  Author: Holosun ApS
8
8
  Author-email: multivac@sunholo.com
9
9
  License: Apache License, Version 2.0
@@ -24,13 +24,19 @@ Requires-Dist: langchain-community
24
24
  Provides-Extra: all
25
25
  Requires-Dist: asyncpg; extra == "all"
26
26
  Requires-Dist: flask; extra == "all"
27
+ Requires-Dist: google-auth; extra == "all"
28
+ Requires-Dist: google-auth-httplib2; extra == "all"
29
+ Requires-Dist: google-auth-oauthlib; extra == "all"
27
30
  Requires-Dist: google-cloud-aiplatform; extra == "all"
28
31
  Requires-Dist: google-api-python-client; extra == "all"
29
32
  Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "all"
33
+ Requires-Dist: google-cloud-bigquery; extra == "all"
30
34
  Requires-Dist: google-cloud-build; extra == "all"
31
35
  Requires-Dist: google-cloud-logging; extra == "all"
32
36
  Requires-Dist: google-cloud-storage; extra == "all"
33
37
  Requires-Dist: google-cloud-pubsub; extra == "all"
38
+ Requires-Dist: google-cloud-discoveryengine; extra == "all"
39
+ Requires-Dist: google-generativeai; extra == "all"
34
40
  Requires-Dist: gunicorn; extra == "all"
35
41
  Requires-Dist: httpcore; extra == "all"
36
42
  Requires-Dist: httpx; extra == "all"
@@ -44,25 +50,45 @@ Requires-Dist: langchain_google_alloydb_pg; extra == "all"
44
50
  Requires-Dist: langchain-anthropic; extra == "all"
45
51
  Requires-Dist: langfuse; extra == "all"
46
52
  Requires-Dist: pg8000; extra == "all"
53
+ Requires-Dist: pgvector; extra == "all"
54
+ Requires-Dist: psycopg2-binary; extra == "all"
55
+ Requires-Dist: pypdf; extra == "all"
47
56
  Requires-Dist: fastapi; extra == "all"
57
+ Requires-Dist: supabase; extra == "all"
58
+ Requires-Dist: tiktoken; extra == "all"
48
59
  Requires-Dist: python-socketio; extra == "all"
49
60
  Provides-Extra: database
50
61
  Requires-Dist: asyncpg; extra == "database"
62
+ Requires-Dist: supabase; extra == "database"
51
63
  Requires-Dist: sqlalchemy; extra == "database"
52
64
  Requires-Dist: pg8000; extra == "database"
65
+ Requires-Dist: pgvector; extra == "database"
66
+ Requires-Dist: psycopg2-binary; extra == "database"
53
67
  Requires-Dist: lancedb; extra == "database"
68
+ Provides-Extra: pipeline
69
+ Requires-Dist: GitPython; extra == "pipeline"
70
+ Requires-Dist: lark; extra == "pipeline"
71
+ Requires-Dist: pypdf; extra == "pipeline"
72
+ Requires-Dist: tabulate; extra == "pipeline"
54
73
  Provides-Extra: gcp
74
+ Requires-Dist: google-auth; extra == "gcp"
75
+ Requires-Dist: google-auth-httplib2; extra == "gcp"
76
+ Requires-Dist: google-auth-oauthlib; extra == "gcp"
55
77
  Requires-Dist: google-cloud-aiplatform; extra == "gcp"
78
+ Requires-Dist: google-cloud-bigquery; extra == "gcp"
56
79
  Requires-Dist: google-cloud-build; extra == "gcp"
57
80
  Requires-Dist: google-cloud-storage; extra == "gcp"
58
81
  Requires-Dist: google-cloud-logging; extra == "gcp"
59
82
  Requires-Dist: google-cloud-pubsub; extra == "gcp"
83
+ Requires-Dist: google-cloud-discoveryengine; extra == "gcp"
84
+ Requires-Dist: google-generativeai; extra == "gcp"
60
85
  Requires-Dist: langchain-google-genai; extra == "gcp"
61
86
  Requires-Dist: langchain_google_alloydb_pg; extra == "gcp"
62
87
  Requires-Dist: google-api-python-client; extra == "gcp"
63
88
  Requires-Dist: google-cloud-alloydb-connector[pg8000]; extra == "gcp"
64
89
  Provides-Extra: openai
65
90
  Requires-Dist: langchain-openai; extra == "openai"
91
+ Requires-Dist: tiktoken; extra == "openai"
66
92
  Provides-Extra: anthropic
67
93
  Requires-Dist: langchain-anthropic; extra == "anthropic"
68
94
  Provides-Extra: http
@@ -73,6 +99,7 @@ Requires-Dist: httpcore; extra == "http"
73
99
  Requires-Dist: httpx; extra == "http"
74
100
  Requires-Dist: langfuse; extra == "http"
75
101
  Requires-Dist: python-socketio; extra == "http"
102
+ Requires-Dist: requests; extra == "http"
76
103
 
77
104
  ## Introduction
78
105
  This is the Sunholo Python project, a comprehensive toolkit for working with language models and vector stores on Google Cloud Platform. It provides a wide range of functionalities and utilities to facilitate the development and deployment of language model applications.
@@ -1,7 +1,7 @@
1
1
  from setuptools import setup, find_packages
2
2
 
3
3
  # Define your base version
4
- version = '0.57.2'
4
+ version = '0.58.2'
5
5
 
6
6
  setup(
7
7
  name='sunholo',
@@ -37,13 +37,19 @@ setup(
37
37
  'all': [
38
38
  "asyncpg",
39
39
  "flask",
40
+ "google-auth",
41
+ "google-auth-httplib2",
42
+ "google-auth-oauthlib",
40
43
  "google-cloud-aiplatform",
41
44
  "google-api-python-client",
42
45
  "google-cloud-alloydb-connector[pg8000]",
46
+ "google-cloud-bigquery",
43
47
  "google-cloud-build",
44
48
  "google-cloud-logging",
45
49
  "google-cloud-storage",
46
50
  "google-cloud-pubsub",
51
+ "google-cloud-discoveryengine",
52
+ "google-generativeai",
47
53
  "gunicorn",
48
54
  "httpcore",
49
55
  "httpx",
@@ -57,21 +63,41 @@ setup(
57
63
  "langchain-anthropic",
58
64
  "langfuse",
59
65
  "pg8000",
66
+ "pgvector",
67
+ "psycopg2-binary",
68
+ "pypdf",
60
69
  "fastapi",
70
+ "supabase",
71
+ "tiktoken",
61
72
  "python-socketio"
62
73
  ],
63
74
  'database': [
64
75
  "asyncpg",
76
+ "supabase",
65
77
  "sqlalchemy",
66
78
  "pg8000",
79
+ "pgvector",
80
+ "psycopg2-binary",
67
81
  "lancedb",
68
82
  ],
83
+ 'pipeline': [
84
+ "GitPython",
85
+ "lark",
86
+ "pypdf",
87
+ "tabulate",
88
+ ],
69
89
  'gcp': [
90
+ "google-auth",
91
+ "google-auth-httplib2",
92
+ "google-auth-oauthlib",
70
93
  "google-cloud-aiplatform",
94
+ "google-cloud-bigquery",
71
95
  "google-cloud-build",
72
96
  "google-cloud-storage",
73
97
  "google-cloud-logging",
74
98
  "google-cloud-pubsub",
99
+ "google-cloud-discoveryengine",
100
+ "google-generativeai",
75
101
  "langchain-google-genai",
76
102
  "langchain_google_alloydb_pg",
77
103
  "google-api-python-client",
@@ -79,6 +105,7 @@ setup(
79
105
  ],
80
106
  'openai': [
81
107
  "langchain-openai",
108
+ "tiktoken"
82
109
  ],
83
110
  'anthropic': [
84
111
  "langchain-anthropic",
@@ -91,6 +118,7 @@ setup(
91
118
  "httpx",
92
119
  "langfuse",
93
120
  "python-socketio",
121
+ "requests"
94
122
  ]
95
123
  },
96
124
  classifiers=[
@@ -16,6 +16,7 @@ from . import pubsub
16
16
  from . import qna
17
17
  from . import streaming
18
18
  from . import utils
19
+ from . import vertex
19
20
  import logging
20
21
 
21
22
  __all__ = ['agents',
@@ -61,7 +61,6 @@ def register_qna_routes(app, stream_interpreter, vac_interpreter):
61
61
  model=vac_config.get("model") or vac_config.get("llm")
62
62
  )
63
63
 
64
- @observe()
65
64
  def generate_response_content():
66
65
 
67
66
  for chunk in start_streaming_chat(question=all_input["user_input"],
@@ -63,14 +63,14 @@ def data_to_embed_pubsub(data: dict):
63
63
  if metadata:
64
64
  metadata["vector_name"] = vector_name
65
65
 
66
- if metadata.get("return_chunks"):
67
- log.info("attributes.return_chunks=True detected, skipping process chunks queue")
68
- output_list = []
69
- if chunks:
70
- for chunk in chunks:
71
- output_list.append({"page_content": chunk.page_content, "metadata": chunk.metadata})
72
-
73
- return output_list
66
+ if metadata.get("return_chunks"):
67
+ log.info("attributes.return_chunks=True detected, skipping process chunks queue")
68
+ output_list = []
69
+ if chunks:
70
+ for chunk in chunks:
71
+ output_list.append({"page_content": chunk.page_content, "metadata": chunk.metadata})
72
+
73
+ return output_list
74
74
 
75
75
  process_docs_chunks_vector_name(chunks, vector_name, metadata)
76
76
 
@@ -0,0 +1,39 @@
1
+ import argparse
2
+
3
+ from .configs import setup_list_configs_subparser
4
+ from .deploy import setup_deploy_subparser
5
+ from .cli_init import setup_init_subparser
6
+
7
+ def main(args=None):
8
+ """
9
+ Entry point for the sunholo console script. This function parses command line arguments
10
+ and invokes the appropriate functionality based on the user input.
11
+
12
+ Example commands:
13
+ ```bash
14
+ sunholo deploy --config_path . --gcs_bucket your-gcs-bucket --lancedb_bucket your-lancedb-bucket
15
+ ```
16
+ """
17
+ parser = argparse.ArgumentParser(description="sunholo CLI tool for deploying GenAI VACs")
18
+ subparsers = parser.add_subparsers(title='commands',
19
+ description='Valid commands',
20
+ help='Commands',
21
+ dest='command',
22
+ required=True)
23
+
24
+ # Setup deploy command
25
+ setup_deploy_subparser(subparsers)
26
+ # Setup list-configs command
27
+ setup_list_configs_subparser(subparsers)
28
+ # init
29
+ setup_init_subparser(subparsers)
30
+
31
+ args = parser.parse_args(args)
32
+
33
+ if hasattr(args, 'func'):
34
+ args.func(args)
35
+ else:
36
+ parser.print_help()
37
+
38
+ if __name__ == "__main__":
39
+ main()
@@ -0,0 +1,80 @@
1
+ import os
2
+ import shutil
3
+ from ..utils.config import get_module_filepath
4
+
5
+ def init_project(args):
6
+ """
7
+ Initializes a new sunholo project with a basic configuration file and directory structure.
8
+
9
+ **Explanation:**
10
+
11
+ 1. **Import Necessary Modules:**
12
+ - `os` for file system operations.
13
+ - `shutil` for copying files and directories.
14
+ - `log` from `sunholo.logging` for logging messages.
15
+ - `get_module_filepath` from `sunholo.utils.config` to get the absolute path of template files.
16
+
17
+ 2. **`init_project` Function:**
18
+ - Takes an `args` object from argparse, containing the `project_name`.
19
+ - Creates the project directory using `os.makedirs`.
20
+ - Copies template files from the `templates/project` directory to the new project directory using `shutil.copy` and `shutil.copytree`.
21
+ - Logs informative messages about the initialization process.
22
+
23
+ 3. **`setup_init_subparser` Function:**
24
+ - Sets up the `init` subcommand for the `sunholo` CLI.
25
+ - Adds an argument `project_name` to specify the name of the new project.
26
+ - Sets the `func` attribute to `init_project`, so the parser knows which function to call when the `init` command is used.
27
+
28
+ **Template Files (`templates/project`):**
29
+
30
+ You'll need to create a `templates/project` directory within your `sunholo` package and place the following template files in it:
31
+
32
+ * **`config/llm_config.yaml`:** A basic configuration file with placeholders for LLM settings, vector stores, etc.
33
+ * **`config/cloud_run_urls.json`:** A template for Cloud Run URLs.
34
+ * **`app.py`:** A basic Flask app that can be customized for the project.
35
+ * **`.gitignore`:** A gitignore file to exclude unnecessary files from version control.
36
+ * **`README.md`:** A README file with instructions for setting up and running the project.
37
+
38
+ **Usage:**
39
+
40
+ After adding this code to your `cli.py` and creating the template files, users can initialize a new project using the following command:
41
+
42
+ ```bash
43
+ sunholo init my_genai_project
44
+ ```
45
+
46
+ This will create a new directory named `my_genai_project` with the template files, allowing users to start building their GenAI application.
47
+
48
+ """
49
+ project_name = args.project_name
50
+ project_dir = os.path.join(os.getcwd(), project_name)
51
+
52
+ print(f"Initializing project: {project_name} in directory: {project_dir}")
53
+
54
+ # Create project directory
55
+ if os.path.exists(project_dir):
56
+ print(f"Directory {project_dir} already exists. Please choose a different project name.")
57
+ return
58
+
59
+ os.makedirs(project_dir)
60
+
61
+ # Copy template files
62
+ template_dir = get_module_filepath("templates/project")
63
+ for filename in os.listdir(template_dir):
64
+ src_path = os.path.join(template_dir, filename)
65
+ dest_path = os.path.join(project_dir, filename)
66
+ if os.path.isfile(src_path):
67
+ shutil.copy(src_path, dest_path)
68
+ elif os.path.isdir(src_path):
69
+ shutil.copytree(src_path, dest_path)
70
+
71
+ print(f"Project {project_name} initialized successfully.")
72
+ print(f"Navigate to {project_dir} and customize the configuration files in the 'config' directory.")
73
+
74
+ def setup_init_subparser(subparsers):
75
+ """
76
+ Sets up an argparse subparser for the 'init' command.
77
+ """
78
+ init_parser = subparsers.add_parser('init', help='Initializes a new sunholo project.')
79
+ init_parser.add_argument('project_name', help='The name of the new project.')
80
+ init_parser.set_defaults(func=init_project)
@@ -0,0 +1,29 @@
1
+ from ..utils.config import load_all_configs
2
+
3
+ from pprint import pprint
4
+
5
+ def list_configs(args):
6
+ """
7
+ Lists configuration files, filtered by kind if specified.
8
+ """
9
+ print("Listing configuration files")
10
+ configs = load_all_configs()
11
+
12
+ if args.kind:
13
+ if args.kind in configs:
14
+ print(f"## Config kind: {args.kind}")
15
+ pprint(configs[args.kind])
16
+ else:
17
+ print(f"No configurations found for kind: {args.kind}")
18
+ else:
19
+ for kind, config in configs.items():
20
+ pprint(f"## Config kind: {kind}")
21
+ pprint(config)
22
+
23
+ def setup_list_configs_subparser(subparsers):
24
+ """
25
+ Sets up an argparse subparser for the 'list-configs' command.
26
+ """
27
+ list_configs_parser = subparsers.add_parser('list-configs', help='Lists all configuration files and their details.')
28
+ list_configs_parser.add_argument('--kind', help='Filter configurations by kind.')
29
+ list_configs_parser.set_defaults(func=list_configs)
@@ -0,0 +1,43 @@
1
+ import os
2
+ from subprocess import Popen
3
+ from ..utils.config import load_all_configs
4
+
5
+ def deploy_vac(args):
6
+ """
7
+ Deploys the VAC by running a Flask app locally.
8
+ """
9
+ print(f"Deploying VAC: {args.vac_name} locally")
10
+
11
+ # Load the vacConfig
12
+ configs_by_kind = load_all_configs()
13
+ vac_config = configs_by_kind.get('vacConfig', {}).get('vac', {}).get(args.vac_name)
14
+
15
+ if not vac_config:
16
+ raise ValueError(f"No configuration found for VAC: {args.vac_name}")
17
+
18
+ # Assuming the Flask app is in 'app.py' within the config path
19
+ app_path = os.path.join(args.config_path, 'app.py')
20
+ if not os.path.exists(app_path):
21
+ raise ValueError(f"app.py not found in {args.config_path}")
22
+
23
+ print(f"Running Flask app from {app_path}")
24
+
25
+ # Run the Flask app
26
+ command = ["python", app_path]
27
+ print(f"Running Flask app with command: {' '.join(command)}")
28
+ process = Popen(command)
29
+ process.communicate()
30
+
31
+ def setup_deploy_subparser(subparsers):
32
+ """
33
+ Sets up an argparse subparser for the 'deploy' command.
34
+
35
+ Example command:
36
+ ```bash
37
+ sunholo deploy "vac_name" --config_path .
38
+ ```
39
+ """
40
+ deploy_parser = subparsers.add_parser('deploy', help='Triggers a deployment of a VAC.')
41
+ deploy_parser.add_argument('vac_name', help='The name of the VAC to deploy.')
42
+ deploy_parser.add_argument('--config_path', default='.', help='Path to the directory containing the config folder `config/` and Flask app `app.py`, defaults to current directory. Set _CONFIG_FOLDER env var to change config location.')
43
+ deploy_parser.set_defaults(func=deploy_vac)
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
27
27
 
28
28
 
29
29
  def load_memories(vector_name):
30
- memories = load_config_key("memory", vector_name, filename="config/llm_config.yaml")
30
+ memories = load_config_key("memory", vector_name, type="vacConfig")
31
31
  log.info(f"Found memory settings for {vector_name}: {memories}")
32
32
  if len(memories) == 0:
33
33
  log.info(f"No memory settings found for {vector_name}")
@@ -49,7 +49,8 @@ def pick_retriever(vector_name, embeddings=None):
49
49
  if embeddings is None:
50
50
  embeddings = get_embeddings(vector_name)
51
51
  vectorstore = pick_vectorstore(vectorstore, vector_name=vector_name, embeddings=embeddings)
52
- vs_retriever = vectorstore.as_retriever(search_kwargs=dict(k=3))
52
+ k_override = value.get('k', 3)
53
+ vs_retriever = vectorstore.as_retriever(search_kwargs=dict(k=k_override))
53
54
  retriever_list.append(vs_retriever)
54
55
 
55
56
  if value.get('provider') == "GoogleCloudEnterpriseSearchRetriever":
@@ -68,6 +69,10 @@ def pick_retriever(vector_name, embeddings=None):
68
69
  if len(retriever_list) == 0:
69
70
  log.info(f"No retrievers were created for {memories}")
70
71
  return None
72
+
73
+ k_override = load_config_key("memory_k", vector_name, type="vacConfig")
74
+ if not k_override:
75
+ k_override = 3
71
76
 
72
77
  lotr = MergerRetriever(retrievers=retriever_list)
73
78
 
@@ -76,6 +81,6 @@ def pick_retriever(vector_name, embeddings=None):
76
81
  pipeline = DocumentCompressorPipeline(transformers=[filter])
77
82
  retriever = ContextualCompressionRetriever(
78
83
  base_compressor=pipeline, base_retriever=lotr,
79
- k=3)
84
+ k=k_override)
80
85
 
81
86
  return retriever
@@ -115,6 +115,15 @@ def embed_pubsub_chunk(data: dict):
115
115
  embed_llm = value.get('llm')
116
116
  if embed_llm:
117
117
  embeddings = pick_embedding(embed_llm)
118
+ # check if read only
119
+ read_only = value.get('readonly')
120
+ if read_only:
121
+ continue
122
+ # read from a different vector_name
123
+ vector_name_other = value.get('vector_name')
124
+ if vector_name_other:
125
+ log.warning(f"Using different vector_name for vectorstore: {vector_name_other} overriding {vector_name}")
126
+ vector_name = vector_name_other
118
127
  vectorstore_obj = pick_vectorstore(vectorstore, vector_name=vector_name, embeddings=embeddings)
119
128
  vs_retriever = vectorstore_obj.as_retriever(search_kwargs=dict(k=3))
120
129
  vectorstore_list.append(vs_retriever)
@@ -1,54 +1,14 @@
1
-
2
1
  try:
3
2
  from vertexai.preview import rag
4
- from vertexai.preview.generative_models import GenerativeModel, Tool
5
- import vertexai
6
3
  except ImportError:
7
4
  rag = None
8
5
 
9
6
  from ..logging import log
10
7
  from ..utils.config import load_config_key
8
+ from ..vertex import init_vertex
11
9
 
12
10
  # Create a RAG Corpus, Import Files
13
11
 
14
- def init_vertex(gcp_config):
15
- """
16
- Initializes the Vertex AI environment using the provided Google Cloud Platform configuration.
17
-
18
- This function configures the Vertex AI API session with specified project and location details
19
- from the gcp_config dictionary. It is essential to call this function at the beginning of a session
20
- before performing any operations related to Vertex AI.
21
-
22
- Parameters:
23
- gcp_config (dict): A dictionary containing the Google Cloud Platform configuration with keys:
24
- - 'project_id': The Google Cloud project ID to configure for Vertex AI.
25
- - 'location': The Google Cloud region to configure for Vertex AI.
26
-
27
- Raises:
28
- KeyError: If the necessary keys ('project_id' or 'location') are missing in the gcp_config dictionary.
29
- ModuleNotFoundError: If the Vertex AI module is not installed and needs to be installed via pip.
30
-
31
- Example:
32
- ```python
33
- gcp_config = {
34
- 'project_id': 'your-project-id',
35
- 'location': 'us-central1'
36
- }
37
- init_vertex(gcp_config)
38
- # This will initialize the Vertex AI session with the provided project ID and location.
39
-
40
- Note:
41
- Ensure that the 'vertexai' module is installed and correctly configured before calling this function.
42
- The function assumes that the required 'vertexai' library is available and that the logging setup is already in place.
43
- """
44
- if not rag:
45
- log.error("Need to install vertexai module via `pip install google-cloud-aiplatform`")
46
-
47
- # Initialize Vertex AI API once per session
48
- project_id = gcp_config.get('project_id')
49
- location = gcp_config.get('location')
50
- vertexai.init(project=project_id, location=location)
51
-
52
12
  def get_corpus(gcp_config):
53
13
  """
54
14
  Retrieves a LlamaIndex corpus from Vertex AI based on the provided Google Cloud configuration.
@@ -86,6 +46,9 @@ def get_corpus(gcp_config):
86
46
  print("Error fetching corpus:", str(e))
87
47
  ```
88
48
  """
49
+ if not rag:
50
+ raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
51
+
89
52
  project_id = gcp_config.get('project_id')
90
53
  location = gcp_config.get('location')
91
54
  rag_id = gcp_config.get('rag_id')
@@ -136,7 +99,10 @@ def do_llamaindex(message_data, metadata, vector_name):
136
99
  # Imported file to corpus: {'status': 'success'}
137
100
  ```
138
101
  """
139
- gcp_config = load_config_key("gcp_config", vector_name=vector_name, filename = "config/llm_config.yaml")
102
+ if not rag:
103
+ raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
104
+
105
+ gcp_config = load_config_key("gcp_config", vector_name=vector_name, type="vacConfig")
140
106
  if not gcp_config:
141
107
  raise ValueError(f"Need config.{vector_name}.gcp_config to configure llamaindex on VertexAI")
142
108
 
@@ -154,7 +120,7 @@ def do_llamaindex(message_data, metadata, vector_name):
154
120
  log.info(f"Found llamaindex corpus: {corpus}")
155
121
 
156
122
  # native support for cloud storage and drive links
157
- chunker_config = load_config_key("chunker", vector_name=vector_name, filename="config/llm_config.yaml")
123
+ chunker_config = load_config_key("chunker", vector_name=vector_name, type="vacConfig")
158
124
 
159
125
  if message_data.startswith("gs://") or message_data.startswith("https://drive.google.com"):
160
126
  log.info(f"rag.import_files for {message_data}")
@@ -193,12 +159,8 @@ def do_llamaindex(message_data, metadata, vector_name):
193
159
  # description=description,
194
160
  #)
195
161
 
196
-
197
- def llamaindex_chunker_check(message_data, metadata, vector_name):
198
- # llamaindex handles its own chunking/embedding
199
- memories = load_config_key("memory", vector_name=vector_name, filename = "config/llm_config.yaml")
200
- total_memories = len(memories)
201
- llama = None
162
+ def check_llamaindex_in_memory(vector_name):
163
+ memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
202
164
  for memory in memories: # Iterate over the list
203
165
  for key, value in memory.items(): # Now iterate over the dictionary
204
166
  log.info(f"Found memory {key}")
@@ -206,10 +168,19 @@ def llamaindex_chunker_check(message_data, metadata, vector_name):
206
168
  if vectorstore:
207
169
  log.info(f"Found vectorstore {vectorstore}")
208
170
  if vectorstore == "llamaindex":
209
- # https://cloud.google.com/vertex-ai/generative-ai/docs/llamaindex-on-vertexai
210
- log.info(f"llamaindex on vertex indexing for {vector_name}")
211
- llama = do_llamaindex(message_data, metadata, vector_name)
212
- log.info(f"Processed llamaindex: {llama}")
171
+
172
+ return True
173
+
174
+ return False
175
+
176
+ def llamaindex_chunker_check(message_data, metadata, vector_name):
177
+ # llamaindex handles its own chunking/embedding
178
+ memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
179
+ total_memories = len(memories)
180
+ llama = None
181
+ if check_llamaindex_in_memory(vector_name):
182
+ llama = do_llamaindex(message_data, metadata, vector_name)
183
+ log.info(f"Processed llamaindex: {llama}")
213
184
 
214
185
  # If llamaindex is the only entry, return
215
186
  if llama and total_memories == 1:
@@ -249,4 +249,12 @@ def log_folder_location(folder_name):
249
249
  else:
250
250
  logging.warning(f"The folder '{folder_name}' does not exist in the current working directory: {current_working_directory}")
251
251
 
252
- log = setup_logging("sunholo")
252
+ # lazy eval
253
+ _logger = None
254
+ def get_logger():
255
+ global _logger
256
+ if _logger is None:
257
+ _logger = setup_logging("sunholo")
258
+ return _logger
259
+
260
+ log = get_logger()