sunholo 0.57.2__py3-none-any.whl → 0.58.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sunholo/__init__.py +1 -0
- sunholo/agents/flask/qna_routes.py +0 -1
- sunholo/chunker/data_to_embed_pubsub.py +8 -8
- sunholo/cli/cli.py +14 -57
- sunholo/cli/cli_init.py +80 -0
- sunholo/cli/configs.py +29 -0
- sunholo/cli/deploy.py +43 -0
- sunholo/components/retriever.py +8 -3
- sunholo/embedder/embed_chunk.py +9 -0
- sunholo/llamaindex/import_files.py +24 -53
- sunholo/logging.py +9 -1
- sunholo/utils/big_context.py +144 -0
- sunholo/utils/config.py +3 -0
- sunholo/vertex/__init__.py +0 -0
- sunholo/vertex/init_vertex.py +43 -0
- {sunholo-0.57.2.dist-info → sunholo-0.58.2.dist-info}/METADATA +29 -2
- {sunholo-0.57.2.dist-info → sunholo-0.58.2.dist-info}/RECORD +21 -15
- {sunholo-0.57.2.dist-info → sunholo-0.58.2.dist-info}/LICENSE.txt +0 -0
- {sunholo-0.57.2.dist-info → sunholo-0.58.2.dist-info}/WHEEL +0 -0
- {sunholo-0.57.2.dist-info → sunholo-0.58.2.dist-info}/entry_points.txt +0 -0
- {sunholo-0.57.2.dist-info → sunholo-0.58.2.dist-info}/top_level.txt +0 -0
sunholo/__init__.py
CHANGED
|
@@ -63,14 +63,14 @@ def data_to_embed_pubsub(data: dict):
|
|
|
63
63
|
if metadata:
|
|
64
64
|
metadata["vector_name"] = vector_name
|
|
65
65
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
66
|
+
if metadata.get("return_chunks"):
|
|
67
|
+
log.info("attributes.return_chunks=True detected, skipping process chunks queue")
|
|
68
|
+
output_list = []
|
|
69
|
+
if chunks:
|
|
70
|
+
for chunk in chunks:
|
|
71
|
+
output_list.append({"page_content": chunk.page_content, "metadata": chunk.metadata})
|
|
72
|
+
|
|
73
|
+
return output_list
|
|
74
74
|
|
|
75
75
|
process_docs_chunks_vector_name(chunks, vector_name, metadata)
|
|
76
76
|
|
sunholo/cli/cli.py
CHANGED
|
@@ -1,59 +1,8 @@
|
|
|
1
1
|
import argparse
|
|
2
|
-
try:
|
|
3
|
-
from google.cloud.devtools import cloudbuild_v1
|
|
4
|
-
except ImportError:
|
|
5
|
-
cloudbuild_v1 = None
|
|
6
2
|
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
"""
|
|
11
|
-
Triggers a Google Cloud Build using an existing build trigger configured in GCP.
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
args: argparse.Namespace containing the command line arguments specified for the 'deploy' command.
|
|
15
|
-
|
|
16
|
-
Example:
|
|
17
|
-
trigger_build(args) where args contains project_id, trigger_id, repo_name, and branch_name.
|
|
18
|
-
"""
|
|
19
|
-
if not cloudbuild_v1:
|
|
20
|
-
log.warning("Can't deploy - google-cloud-build not installed, enable via `pip install sunholo[gcp]")
|
|
21
|
-
|
|
22
|
-
return None
|
|
23
|
-
|
|
24
|
-
client = cloudbuild_v1.CloudBuildClient()
|
|
25
|
-
# Assuming the build source uses the path to the cloudbuild.yaml if specified.
|
|
26
|
-
source = cloudbuild_v1.RepoSource(
|
|
27
|
-
project_id=args.project_id,
|
|
28
|
-
repo_name=args.repo_name,
|
|
29
|
-
branch_name=args.branch_name,
|
|
30
|
-
substitutions=args.substitutions,
|
|
31
|
-
dir=args.config_path # Path to directory containing cloudbuild.yaml
|
|
32
|
-
)
|
|
33
|
-
request = cloudbuild_v1.RunBuildTriggerRequest(
|
|
34
|
-
project_id=args.project_id,
|
|
35
|
-
trigger_id=args.trigger_id,
|
|
36
|
-
source=source
|
|
37
|
-
)
|
|
38
|
-
operation = client.run_build_trigger(request)
|
|
39
|
-
print(f"Triggered build with id: {operation.metadata.build.id}")
|
|
40
|
-
|
|
41
|
-
def setup_deploy_subparser(subparsers):
|
|
42
|
-
"""
|
|
43
|
-
Sets up an argparse subparser for the 'deploy' command.
|
|
44
|
-
|
|
45
|
-
Example command:
|
|
46
|
-
```bash
|
|
47
|
-
sunholo deploy --project_id "my-gcp-project" --trigger_id "my-trigger-id" --repo_name "my-repo"
|
|
48
|
-
```
|
|
49
|
-
"""
|
|
50
|
-
deploy_parser = subparsers.add_parser('deploy', help='Triggers a deployment using an existing Google Cloud Build trigger.')
|
|
51
|
-
deploy_parser.add_argument('--project_id', required=True, help='Google Cloud Project ID required for deployment.')
|
|
52
|
-
deploy_parser.add_argument('--trigger_id', required=True, help='Google Cloud Build Trigger ID required for deployment.')
|
|
53
|
-
deploy_parser.add_argument('--repo_name', required=True, help='Name of the linked repository in Google Cloud Source Repositories required for deployment.')
|
|
54
|
-
deploy_parser.add_argument('--branch_name', default='dev', help='Branch name to trigger the build from, defaults to "dev".')
|
|
55
|
-
deploy_parser.add_argument('--config_path', default='.', help='Path to the directory containing the cloudbuild.yaml file, defaults to current directory.')
|
|
56
|
-
deploy_parser.set_defaults(func=trigger_build)
|
|
3
|
+
from .configs import setup_list_configs_subparser
|
|
4
|
+
from .deploy import setup_deploy_subparser
|
|
5
|
+
from .cli_init import setup_init_subparser
|
|
57
6
|
|
|
58
7
|
def main(args=None):
|
|
59
8
|
"""
|
|
@@ -62,14 +11,22 @@ def main(args=None):
|
|
|
62
11
|
|
|
63
12
|
Example commands:
|
|
64
13
|
```bash
|
|
65
|
-
sunholo deploy --
|
|
14
|
+
sunholo deploy --config_path . --gcs_bucket your-gcs-bucket --lancedb_bucket your-lancedb-bucket
|
|
66
15
|
```
|
|
67
16
|
"""
|
|
68
|
-
parser = argparse.ArgumentParser(description="sunholo CLI tool for deploying
|
|
69
|
-
subparsers = parser.add_subparsers(title='commands',
|
|
17
|
+
parser = argparse.ArgumentParser(description="sunholo CLI tool for deploying GenAI VACs")
|
|
18
|
+
subparsers = parser.add_subparsers(title='commands',
|
|
19
|
+
description='Valid commands',
|
|
20
|
+
help='Commands',
|
|
21
|
+
dest='command',
|
|
22
|
+
required=True)
|
|
70
23
|
|
|
71
24
|
# Setup deploy command
|
|
72
25
|
setup_deploy_subparser(subparsers)
|
|
26
|
+
# Setup list-configs command
|
|
27
|
+
setup_list_configs_subparser(subparsers)
|
|
28
|
+
# init
|
|
29
|
+
setup_init_subparser(subparsers)
|
|
73
30
|
|
|
74
31
|
args = parser.parse_args(args)
|
|
75
32
|
|
sunholo/cli/cli_init.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from ..utils.config import get_module_filepath
|
|
4
|
+
|
|
5
|
+
def init_project(args):
|
|
6
|
+
"""
|
|
7
|
+
Initializes a new sunholo project with a basic configuration file and directory structure.
|
|
8
|
+
|
|
9
|
+
**Explanation:**
|
|
10
|
+
|
|
11
|
+
1. **Import Necessary Modules:**
|
|
12
|
+
- `os` for file system operations.
|
|
13
|
+
- `shutil` for copying files and directories.
|
|
14
|
+
- `log` from `sunholo.logging` for logging messages.
|
|
15
|
+
- `get_module_filepath` from `sunholo.utils.config` to get the absolute path of template files.
|
|
16
|
+
|
|
17
|
+
2. **`init_project` Function:**
|
|
18
|
+
- Takes an `args` object from argparse, containing the `project_name`.
|
|
19
|
+
- Creates the project directory using `os.makedirs`.
|
|
20
|
+
- Copies template files from the `templates/project` directory to the new project directory using `shutil.copy` and `shutil.copytree`.
|
|
21
|
+
- Logs informative messages about the initialization process.
|
|
22
|
+
|
|
23
|
+
3. **`setup_init_subparser` Function:**
|
|
24
|
+
- Sets up the `init` subcommand for the `sunholo` CLI.
|
|
25
|
+
- Adds an argument `project_name` to specify the name of the new project.
|
|
26
|
+
- Sets the `func` attribute to `init_project`, so the parser knows which function to call when the `init` command is used.
|
|
27
|
+
|
|
28
|
+
**Template Files (`templates/project`):**
|
|
29
|
+
|
|
30
|
+
You'll need to create a `templates/project` directory within your `sunholo` package and place the following template files in it:
|
|
31
|
+
|
|
32
|
+
* **`config/llm_config.yaml`:** A basic configuration file with placeholders for LLM settings, vector stores, etc.
|
|
33
|
+
* **`config/cloud_run_urls.json`:** A template for Cloud Run URLs.
|
|
34
|
+
* **`app.py`:** A basic Flask app that can be customized for the project.
|
|
35
|
+
* **`.gitignore`:** A gitignore file to exclude unnecessary files from version control.
|
|
36
|
+
* **`README.md`:** A README file with instructions for setting up and running the project.
|
|
37
|
+
|
|
38
|
+
**Usage:**
|
|
39
|
+
|
|
40
|
+
After adding this code to your `cli.py` and creating the template files, users can initialize a new project using the following command:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
sunholo init my_genai_project
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
This will create a new directory named `my_genai_project` with the template files, allowing users to start building their GenAI application.
|
|
47
|
+
|
|
48
|
+
"""
|
|
49
|
+
project_name = args.project_name
|
|
50
|
+
project_dir = os.path.join(os.getcwd(), project_name)
|
|
51
|
+
|
|
52
|
+
print(f"Initializing project: {project_name} in directory: {project_dir}")
|
|
53
|
+
|
|
54
|
+
# Create project directory
|
|
55
|
+
if os.path.exists(project_dir):
|
|
56
|
+
print(f"Directory {project_dir} already exists. Please choose a different project name.")
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
os.makedirs(project_dir)
|
|
60
|
+
|
|
61
|
+
# Copy template files
|
|
62
|
+
template_dir = get_module_filepath("templates/project")
|
|
63
|
+
for filename in os.listdir(template_dir):
|
|
64
|
+
src_path = os.path.join(template_dir, filename)
|
|
65
|
+
dest_path = os.path.join(project_dir, filename)
|
|
66
|
+
if os.path.isfile(src_path):
|
|
67
|
+
shutil.copy(src_path, dest_path)
|
|
68
|
+
elif os.path.isdir(src_path):
|
|
69
|
+
shutil.copytree(src_path, dest_path)
|
|
70
|
+
|
|
71
|
+
print(f"Project {project_name} initialized successfully.")
|
|
72
|
+
print(f"Navigate to {project_dir} and customize the configuration files in the 'config' directory.")
|
|
73
|
+
|
|
74
|
+
def setup_init_subparser(subparsers):
|
|
75
|
+
"""
|
|
76
|
+
Sets up an argparse subparser for the 'init' command.
|
|
77
|
+
"""
|
|
78
|
+
init_parser = subparsers.add_parser('init', help='Initializes a new sunholo project.')
|
|
79
|
+
init_parser.add_argument('project_name', help='The name of the new project.')
|
|
80
|
+
init_parser.set_defaults(func=init_project)
|
sunholo/cli/configs.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from ..utils.config import load_all_configs
|
|
2
|
+
|
|
3
|
+
from pprint import pprint
|
|
4
|
+
|
|
5
|
+
def list_configs(args):
|
|
6
|
+
"""
|
|
7
|
+
Lists configuration files, filtered by kind if specified.
|
|
8
|
+
"""
|
|
9
|
+
print("Listing configuration files")
|
|
10
|
+
configs = load_all_configs()
|
|
11
|
+
|
|
12
|
+
if args.kind:
|
|
13
|
+
if args.kind in configs:
|
|
14
|
+
print(f"## Config kind: {args.kind}")
|
|
15
|
+
pprint(configs[args.kind])
|
|
16
|
+
else:
|
|
17
|
+
print(f"No configurations found for kind: {args.kind}")
|
|
18
|
+
else:
|
|
19
|
+
for kind, config in configs.items():
|
|
20
|
+
pprint(f"## Config kind: {kind}")
|
|
21
|
+
pprint(config)
|
|
22
|
+
|
|
23
|
+
def setup_list_configs_subparser(subparsers):
|
|
24
|
+
"""
|
|
25
|
+
Sets up an argparse subparser for the 'list-configs' command.
|
|
26
|
+
"""
|
|
27
|
+
list_configs_parser = subparsers.add_parser('list-configs', help='Lists all configuration files and their details.')
|
|
28
|
+
list_configs_parser.add_argument('--kind', help='Filter configurations by kind.')
|
|
29
|
+
list_configs_parser.set_defaults(func=list_configs)
|
sunholo/cli/deploy.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from subprocess import Popen
|
|
3
|
+
from ..utils.config import load_all_configs
|
|
4
|
+
|
|
5
|
+
def deploy_vac(args):
|
|
6
|
+
"""
|
|
7
|
+
Deploys the VAC by running a Flask app locally.
|
|
8
|
+
"""
|
|
9
|
+
print(f"Deploying VAC: {args.vac_name} locally")
|
|
10
|
+
|
|
11
|
+
# Load the vacConfig
|
|
12
|
+
configs_by_kind = load_all_configs()
|
|
13
|
+
vac_config = configs_by_kind.get('vacConfig', {}).get('vac', {}).get(args.vac_name)
|
|
14
|
+
|
|
15
|
+
if not vac_config:
|
|
16
|
+
raise ValueError(f"No configuration found for VAC: {args.vac_name}")
|
|
17
|
+
|
|
18
|
+
# Assuming the Flask app is in 'app.py' within the config path
|
|
19
|
+
app_path = os.path.join(args.config_path, 'app.py')
|
|
20
|
+
if not os.path.exists(app_path):
|
|
21
|
+
raise ValueError(f"app.py not found in {args.config_path}")
|
|
22
|
+
|
|
23
|
+
print(f"Running Flask app from {app_path}")
|
|
24
|
+
|
|
25
|
+
# Run the Flask app
|
|
26
|
+
command = ["python", app_path]
|
|
27
|
+
print(f"Running Flask app with command: {' '.join(command)}")
|
|
28
|
+
process = Popen(command)
|
|
29
|
+
process.communicate()
|
|
30
|
+
|
|
31
|
+
def setup_deploy_subparser(subparsers):
|
|
32
|
+
"""
|
|
33
|
+
Sets up an argparse subparser for the 'deploy' command.
|
|
34
|
+
|
|
35
|
+
Example command:
|
|
36
|
+
```bash
|
|
37
|
+
sunholo deploy "vac_name" --config_path .
|
|
38
|
+
```
|
|
39
|
+
"""
|
|
40
|
+
deploy_parser = subparsers.add_parser('deploy', help='Triggers a deployment of a VAC.')
|
|
41
|
+
deploy_parser.add_argument('vac_name', help='The name of the VAC to deploy.')
|
|
42
|
+
deploy_parser.add_argument('--config_path', default='.', help='Path to the directory containing the config folder `config/` and Flask app `app.py`, defaults to current directory. Set _CONFIG_FOLDER env var to change config location.')
|
|
43
|
+
deploy_parser.set_defaults(func=deploy_vac)
|
sunholo/components/retriever.py
CHANGED
|
@@ -27,7 +27,7 @@ from langchain.retrievers import ContextualCompressionRetriever
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def load_memories(vector_name):
|
|
30
|
-
memories = load_config_key("memory", vector_name,
|
|
30
|
+
memories = load_config_key("memory", vector_name, type="vacConfig")
|
|
31
31
|
log.info(f"Found memory settings for {vector_name}: {memories}")
|
|
32
32
|
if len(memories) == 0:
|
|
33
33
|
log.info(f"No memory settings found for {vector_name}")
|
|
@@ -49,7 +49,8 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
49
49
|
if embeddings is None:
|
|
50
50
|
embeddings = get_embeddings(vector_name)
|
|
51
51
|
vectorstore = pick_vectorstore(vectorstore, vector_name=vector_name, embeddings=embeddings)
|
|
52
|
-
|
|
52
|
+
k_override = value.get('k', 3)
|
|
53
|
+
vs_retriever = vectorstore.as_retriever(search_kwargs=dict(k=k_override))
|
|
53
54
|
retriever_list.append(vs_retriever)
|
|
54
55
|
|
|
55
56
|
if value.get('provider') == "GoogleCloudEnterpriseSearchRetriever":
|
|
@@ -68,6 +69,10 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
68
69
|
if len(retriever_list) == 0:
|
|
69
70
|
log.info(f"No retrievers were created for {memories}")
|
|
70
71
|
return None
|
|
72
|
+
|
|
73
|
+
k_override = load_config_key("memory_k", vector_name, type="vacConfig")
|
|
74
|
+
if not k_override:
|
|
75
|
+
k_override = 3
|
|
71
76
|
|
|
72
77
|
lotr = MergerRetriever(retrievers=retriever_list)
|
|
73
78
|
|
|
@@ -76,6 +81,6 @@ def pick_retriever(vector_name, embeddings=None):
|
|
|
76
81
|
pipeline = DocumentCompressorPipeline(transformers=[filter])
|
|
77
82
|
retriever = ContextualCompressionRetriever(
|
|
78
83
|
base_compressor=pipeline, base_retriever=lotr,
|
|
79
|
-
k=
|
|
84
|
+
k=k_override)
|
|
80
85
|
|
|
81
86
|
return retriever
|
sunholo/embedder/embed_chunk.py
CHANGED
|
@@ -115,6 +115,15 @@ def embed_pubsub_chunk(data: dict):
|
|
|
115
115
|
embed_llm = value.get('llm')
|
|
116
116
|
if embed_llm:
|
|
117
117
|
embeddings = pick_embedding(embed_llm)
|
|
118
|
+
# check if read only
|
|
119
|
+
read_only = value.get('readonly')
|
|
120
|
+
if read_only:
|
|
121
|
+
continue
|
|
122
|
+
# read from a different vector_name
|
|
123
|
+
vector_name_other = value.get('vector_name')
|
|
124
|
+
if vector_name_other:
|
|
125
|
+
log.warning(f"Using different vector_name for vectorstore: {vector_name_other} overriding {vector_name}")
|
|
126
|
+
vector_name = vector_name_other
|
|
118
127
|
vectorstore_obj = pick_vectorstore(vectorstore, vector_name=vector_name, embeddings=embeddings)
|
|
119
128
|
vs_retriever = vectorstore_obj.as_retriever(search_kwargs=dict(k=3))
|
|
120
129
|
vectorstore_list.append(vs_retriever)
|
|
@@ -1,54 +1,14 @@
|
|
|
1
|
-
|
|
2
1
|
try:
|
|
3
2
|
from vertexai.preview import rag
|
|
4
|
-
from vertexai.preview.generative_models import GenerativeModel, Tool
|
|
5
|
-
import vertexai
|
|
6
3
|
except ImportError:
|
|
7
4
|
rag = None
|
|
8
5
|
|
|
9
6
|
from ..logging import log
|
|
10
7
|
from ..utils.config import load_config_key
|
|
8
|
+
from ..vertex import init_vertex
|
|
11
9
|
|
|
12
10
|
# Create a RAG Corpus, Import Files
|
|
13
11
|
|
|
14
|
-
def init_vertex(gcp_config):
|
|
15
|
-
"""
|
|
16
|
-
Initializes the Vertex AI environment using the provided Google Cloud Platform configuration.
|
|
17
|
-
|
|
18
|
-
This function configures the Vertex AI API session with specified project and location details
|
|
19
|
-
from the gcp_config dictionary. It is essential to call this function at the beginning of a session
|
|
20
|
-
before performing any operations related to Vertex AI.
|
|
21
|
-
|
|
22
|
-
Parameters:
|
|
23
|
-
gcp_config (dict): A dictionary containing the Google Cloud Platform configuration with keys:
|
|
24
|
-
- 'project_id': The Google Cloud project ID to configure for Vertex AI.
|
|
25
|
-
- 'location': The Google Cloud region to configure for Vertex AI.
|
|
26
|
-
|
|
27
|
-
Raises:
|
|
28
|
-
KeyError: If the necessary keys ('project_id' or 'location') are missing in the gcp_config dictionary.
|
|
29
|
-
ModuleNotFoundError: If the Vertex AI module is not installed and needs to be installed via pip.
|
|
30
|
-
|
|
31
|
-
Example:
|
|
32
|
-
```python
|
|
33
|
-
gcp_config = {
|
|
34
|
-
'project_id': 'your-project-id',
|
|
35
|
-
'location': 'us-central1'
|
|
36
|
-
}
|
|
37
|
-
init_vertex(gcp_config)
|
|
38
|
-
# This will initialize the Vertex AI session with the provided project ID and location.
|
|
39
|
-
|
|
40
|
-
Note:
|
|
41
|
-
Ensure that the 'vertexai' module is installed and correctly configured before calling this function.
|
|
42
|
-
The function assumes that the required 'vertexai' library is available and that the logging setup is already in place.
|
|
43
|
-
"""
|
|
44
|
-
if not rag:
|
|
45
|
-
log.error("Need to install vertexai module via `pip install google-cloud-aiplatform`")
|
|
46
|
-
|
|
47
|
-
# Initialize Vertex AI API once per session
|
|
48
|
-
project_id = gcp_config.get('project_id')
|
|
49
|
-
location = gcp_config.get('location')
|
|
50
|
-
vertexai.init(project=project_id, location=location)
|
|
51
|
-
|
|
52
12
|
def get_corpus(gcp_config):
|
|
53
13
|
"""
|
|
54
14
|
Retrieves a LlamaIndex corpus from Vertex AI based on the provided Google Cloud configuration.
|
|
@@ -86,6 +46,9 @@ def get_corpus(gcp_config):
|
|
|
86
46
|
print("Error fetching corpus:", str(e))
|
|
87
47
|
```
|
|
88
48
|
"""
|
|
49
|
+
if not rag:
|
|
50
|
+
raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
|
|
51
|
+
|
|
89
52
|
project_id = gcp_config.get('project_id')
|
|
90
53
|
location = gcp_config.get('location')
|
|
91
54
|
rag_id = gcp_config.get('rag_id')
|
|
@@ -136,7 +99,10 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
136
99
|
# Imported file to corpus: {'status': 'success'}
|
|
137
100
|
```
|
|
138
101
|
"""
|
|
139
|
-
|
|
102
|
+
if not rag:
|
|
103
|
+
raise ValueError("Need to install vertexai module via `pip install sunholo[gcp]`")
|
|
104
|
+
|
|
105
|
+
gcp_config = load_config_key("gcp_config", vector_name=vector_name, type="vacConfig")
|
|
140
106
|
if not gcp_config:
|
|
141
107
|
raise ValueError(f"Need config.{vector_name}.gcp_config to configure llamaindex on VertexAI")
|
|
142
108
|
|
|
@@ -154,7 +120,7 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
154
120
|
log.info(f"Found llamaindex corpus: {corpus}")
|
|
155
121
|
|
|
156
122
|
# native support for cloud storage and drive links
|
|
157
|
-
chunker_config = load_config_key("chunker", vector_name=vector_name,
|
|
123
|
+
chunker_config = load_config_key("chunker", vector_name=vector_name, type="vacConfig")
|
|
158
124
|
|
|
159
125
|
if message_data.startswith("gs://") or message_data.startswith("https://drive.google.com"):
|
|
160
126
|
log.info(f"rag.import_files for {message_data}")
|
|
@@ -193,12 +159,8 @@ def do_llamaindex(message_data, metadata, vector_name):
|
|
|
193
159
|
# description=description,
|
|
194
160
|
#)
|
|
195
161
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# llamaindex handles its own chunking/embedding
|
|
199
|
-
memories = load_config_key("memory", vector_name=vector_name, filename = "config/llm_config.yaml")
|
|
200
|
-
total_memories = len(memories)
|
|
201
|
-
llama = None
|
|
162
|
+
def check_llamaindex_in_memory(vector_name):
|
|
163
|
+
memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
|
|
202
164
|
for memory in memories: # Iterate over the list
|
|
203
165
|
for key, value in memory.items(): # Now iterate over the dictionary
|
|
204
166
|
log.info(f"Found memory {key}")
|
|
@@ -206,10 +168,19 @@ def llamaindex_chunker_check(message_data, metadata, vector_name):
|
|
|
206
168
|
if vectorstore:
|
|
207
169
|
log.info(f"Found vectorstore {vectorstore}")
|
|
208
170
|
if vectorstore == "llamaindex":
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
171
|
+
|
|
172
|
+
return True
|
|
173
|
+
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
def llamaindex_chunker_check(message_data, metadata, vector_name):
|
|
177
|
+
# llamaindex handles its own chunking/embedding
|
|
178
|
+
memories = load_config_key("memory", vector_name=vector_name, type="vacConfig")
|
|
179
|
+
total_memories = len(memories)
|
|
180
|
+
llama = None
|
|
181
|
+
if check_llamaindex_in_memory(vector_name):
|
|
182
|
+
llama = do_llamaindex(message_data, metadata, vector_name)
|
|
183
|
+
log.info(f"Processed llamaindex: {llama}")
|
|
213
184
|
|
|
214
185
|
# If llamaindex is the only entry, return
|
|
215
186
|
if llama and total_memories == 1:
|
sunholo/logging.py
CHANGED
|
@@ -249,4 +249,12 @@ def log_folder_location(folder_name):
|
|
|
249
249
|
else:
|
|
250
250
|
logging.warning(f"The folder '{folder_name}' does not exist in the current working directory: {current_working_directory}")
|
|
251
251
|
|
|
252
|
-
|
|
252
|
+
# lazy eval
|
|
253
|
+
_logger = None
|
|
254
|
+
def get_logger():
|
|
255
|
+
global _logger
|
|
256
|
+
if _logger is None:
|
|
257
|
+
_logger = setup_logging("sunholo")
|
|
258
|
+
return _logger
|
|
259
|
+
|
|
260
|
+
log = get_logger()
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
def has_text_extension(file_path):
|
|
4
|
+
"""
|
|
5
|
+
Check if a file has a common text or code file extension.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
file_path (str): The path to the file.
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
bool: True if the file has a text extension, False otherwise.
|
|
12
|
+
|
|
13
|
+
Examples:
|
|
14
|
+
>>> has_text_extension("example.txt")
|
|
15
|
+
True
|
|
16
|
+
>>> has_text_extension("example.pdf")
|
|
17
|
+
False
|
|
18
|
+
"""
|
|
19
|
+
# Define a set of common text and code file extensions
|
|
20
|
+
text_extensions = {
|
|
21
|
+
'.txt', '.md', '.py', '.json', '.xml', '.csv', '.html', '.htm',
|
|
22
|
+
'.css', '.js', '.java', '.c', '.cpp', '.h', '.hpp', '.r', '.sh',
|
|
23
|
+
'.bat', '.ini', '.yaml', '.yml', '.toml', '.pl', '.rb', '.go',
|
|
24
|
+
'.ts', '.tsx', '.rs', '.swift', '.kt', '.kts', '.scala', '.sql'
|
|
25
|
+
}
|
|
26
|
+
# Get the file extension and check if it's in the set
|
|
27
|
+
_, ext = os.path.splitext(file_path)
|
|
28
|
+
return ext.lower() in text_extensions
|
|
29
|
+
|
|
30
|
+
def load_gitignore_patterns(gitignore_path):
|
|
31
|
+
"""
|
|
32
|
+
Load .gitignore file and compile ignore patterns.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
gitignore_path (str): The path to the .gitignore file.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
list: A list of patterns to ignore.
|
|
39
|
+
|
|
40
|
+
Examples:
|
|
41
|
+
>>> patterns = load_gitignore_patterns("path/to/source/folder/.gitignore")
|
|
42
|
+
"""
|
|
43
|
+
with open(gitignore_path, 'r') as f:
|
|
44
|
+
patterns = [line.strip() for line in f if line.strip() and not line.startswith('#')]
|
|
45
|
+
return patterns
|
|
46
|
+
|
|
47
|
+
def should_ignore(file_path, patterns):
|
|
48
|
+
"""
|
|
49
|
+
Check if a file path matches any of the ignore patterns.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
file_path (str): The path to the file.
|
|
53
|
+
patterns (list): A list of patterns to ignore.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
bool: True if the file should be ignored, False otherwise.
|
|
57
|
+
|
|
58
|
+
Examples:
|
|
59
|
+
>>> should_ignore("path/to/file.txt", ["*.txt", "node_modules/"])
|
|
60
|
+
True
|
|
61
|
+
"""
|
|
62
|
+
from fnmatch import fnmatch
|
|
63
|
+
rel_path = os.path.relpath(file_path)
|
|
64
|
+
for pattern in patterns:
|
|
65
|
+
if fnmatch(rel_path, pattern) or fnmatch(os.path.basename(rel_path), pattern):
|
|
66
|
+
return True
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
def build_file_tree(source_folder, patterns):
|
|
70
|
+
"""
|
|
71
|
+
Build a hierarchical file tree structure of a directory, ignoring files and directories in .gitignore.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
source_folder (str): The root directory to build the file tree from.
|
|
75
|
+
patterns (list): A list of patterns to ignore.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
list: A list of strings representing the file tree structure.
|
|
79
|
+
|
|
80
|
+
Examples:
|
|
81
|
+
>>> build_file_tree("path/to/source/folder", patterns)
|
|
82
|
+
['source_folder/', ' file1.txt', ' subfolder/', ' file2.py']
|
|
83
|
+
"""
|
|
84
|
+
file_tree = []
|
|
85
|
+
for root, dirs, files in os.walk(source_folder):
|
|
86
|
+
# Filter out ignored directories
|
|
87
|
+
dirs[:] = [d for d in dirs if not should_ignore(os.path.join(root, d), patterns)]
|
|
88
|
+
# Filter out ignored files
|
|
89
|
+
files = [f for f in files if not should_ignore(os.path.join(root, f), patterns)]
|
|
90
|
+
|
|
91
|
+
level = root.replace(source_folder, '').count(os.sep)
|
|
92
|
+
indent = ' ' * 4 * (level)
|
|
93
|
+
file_tree.append(f"{indent}{os.path.basename(root)}/")
|
|
94
|
+
sub_indent = ' ' * 4 * (level + 1)
|
|
95
|
+
for f in files:
|
|
96
|
+
file_tree.append(f"{sub_indent}{f}")
|
|
97
|
+
return file_tree
|
|
98
|
+
|
|
99
|
+
def merge_text_files(source_folder, output_file, patterns):
|
|
100
|
+
"""
|
|
101
|
+
Merge the contents of all readable text files in a directory into one file.
|
|
102
|
+
Also append the file tree structure at the end of the output file.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
source_folder (str): The directory containing the text files to merge.
|
|
106
|
+
output_file (str): The path to the output file where contents will be written.
|
|
107
|
+
patterns (list): A list of patterns to ignore.
|
|
108
|
+
|
|
109
|
+
Examples:
|
|
110
|
+
>>> merge_text_files("path/to/source/folder", "path/to/output/bigfile.txt", patterns)
|
|
111
|
+
"""
|
|
112
|
+
file_tree = build_file_tree(source_folder, patterns)
|
|
113
|
+
with open(output_file, 'w', encoding='utf-8') as outfile:
|
|
114
|
+
for root, dirs, files in os.walk(source_folder):
|
|
115
|
+
# Filter out ignored directories
|
|
116
|
+
dirs[:] = [d for d in dirs if not should_ignore(os.path.join(root, d), patterns)]
|
|
117
|
+
# Filter out ignored files
|
|
118
|
+
files = [f for f in files if not should_ignore(os.path.join(root, f), patterns)]
|
|
119
|
+
|
|
120
|
+
for file_name in files:
|
|
121
|
+
file_path = os.path.join(root, file_name)
|
|
122
|
+
if has_text_extension(file_path):
|
|
123
|
+
try:
|
|
124
|
+
with open(file_path, 'r', encoding='utf-8') as infile:
|
|
125
|
+
outfile.write(f"--- Start of {file_path} ---\n")
|
|
126
|
+
outfile.write(infile.read())
|
|
127
|
+
outfile.write(f"\n--- End of {file_path} ---\n\n")
|
|
128
|
+
except (IOError, UnicodeDecodeError):
|
|
129
|
+
print(f"Skipping file (cannot read as text): {file_path}")
|
|
130
|
+
outfile.write("\n--- File Tree ---\n")
|
|
131
|
+
outfile.write("\n".join(file_tree))
|
|
132
|
+
|
|
133
|
+
# Example usage
|
|
134
|
+
if __name__ == "__main__":
|
|
135
|
+
source_folder = 'sunholo'
|
|
136
|
+
output_file = 'bigfile.txt'
|
|
137
|
+
gitignore_path = os.path.join(source_folder, '.gitignore')
|
|
138
|
+
|
|
139
|
+
if os.path.exists(gitignore_path):
|
|
140
|
+
patterns = load_gitignore_patterns(gitignore_path)
|
|
141
|
+
else:
|
|
142
|
+
patterns = [] # Empty list if no .gitignore
|
|
143
|
+
|
|
144
|
+
merge_text_files(source_folder, output_file, patterns)
|
sunholo/utils/config.py
CHANGED
|
@@ -65,6 +65,9 @@ def load_all_configs():
|
|
|
65
65
|
configs_by_kind = defaultdict(dict)
|
|
66
66
|
for filename in os.listdir(config_folder):
|
|
67
67
|
log.info(f"config file: {filename}")
|
|
68
|
+
if filename in ["cloudbuild.yaml", "cloud_run_urls.json"]:
|
|
69
|
+
# skip these
|
|
70
|
+
continue
|
|
68
71
|
if filename.endswith(('.yaml', '.yml', '.json')):
|
|
69
72
|
config_file = os.path.join(config_folder, filename)
|
|
70
73
|
|
|
File without changes
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from ..logging import log
|
|
2
|
+
|
|
3
|
+
def init_vertex(gcp_config):
|
|
4
|
+
"""
|
|
5
|
+
Initializes the Vertex AI environment using the provided Google Cloud Platform configuration.
|
|
6
|
+
|
|
7
|
+
This function configures the Vertex AI API session with specified project and location details
|
|
8
|
+
from the gcp_config dictionary. It is essential to call this function at the beginning of a session
|
|
9
|
+
before performing any operations related to Vertex AI.
|
|
10
|
+
|
|
11
|
+
Parameters:
|
|
12
|
+
gcp_config (dict): A dictionary containing the Google Cloud Platform configuration with keys:
|
|
13
|
+
- 'project_id': The Google Cloud project ID to configure for Vertex AI.
|
|
14
|
+
- 'location': The Google Cloud region to configure for Vertex AI.
|
|
15
|
+
|
|
16
|
+
Raises:
|
|
17
|
+
KeyError: If the necessary keys ('project_id' or 'location') are missing in the gcp_config dictionary.
|
|
18
|
+
ModuleNotFoundError: If the Vertex AI module is not installed and needs to be installed via pip.
|
|
19
|
+
|
|
20
|
+
Example:
|
|
21
|
+
```python
|
|
22
|
+
gcp_config = {
|
|
23
|
+
'project_id': 'your-project-id',
|
|
24
|
+
'location': 'us-central1'
|
|
25
|
+
}
|
|
26
|
+
init_vertex(gcp_config)
|
|
27
|
+
# This will initialize the Vertex AI session with the provided project ID and location.
|
|
28
|
+
|
|
29
|
+
Note:
|
|
30
|
+
Ensure that the 'vertexai' module is installed and correctly configured before calling this function.
|
|
31
|
+
The function assumes that the required 'vertexai' library is available and that the logging setup is already in place.
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
import vertexai
|
|
35
|
+
except ImportError:
|
|
36
|
+
log.error("Need to install vertexai module via `pip install sunholo[gcp]`")
|
|
37
|
+
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
# Initialize Vertex AI API once per session
|
|
41
|
+
project_id = gcp_config.get('project_id')
|
|
42
|
+
location = gcp_config.get('location')
|
|
43
|
+
vertexai.init(project=project_id, location=location)
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sunholo
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.58.2
|
|
4
4
|
Summary: Large Language Model DevOps - a package to help deploy LLMs to the Cloud.
|
|
5
5
|
Home-page: https://github.com/sunholo-data/sunholo-py
|
|
6
|
-
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.
|
|
6
|
+
Download-URL: https://github.com/sunholo-data/sunholo-py/archive/refs/tags/v0.58.2.tar.gz
|
|
7
7
|
Author: Holosun ApS
|
|
8
8
|
Author-email: multivac@sunholo.com
|
|
9
9
|
License: Apache License, Version 2.0
|
|
@@ -24,13 +24,19 @@ Requires-Dist: langchain-community
|
|
|
24
24
|
Provides-Extra: all
|
|
25
25
|
Requires-Dist: asyncpg ; extra == 'all'
|
|
26
26
|
Requires-Dist: flask ; extra == 'all'
|
|
27
|
+
Requires-Dist: google-auth ; extra == 'all'
|
|
28
|
+
Requires-Dist: google-auth-httplib2 ; extra == 'all'
|
|
29
|
+
Requires-Dist: google-auth-oauthlib ; extra == 'all'
|
|
27
30
|
Requires-Dist: google-cloud-aiplatform ; extra == 'all'
|
|
28
31
|
Requires-Dist: google-api-python-client ; extra == 'all'
|
|
29
32
|
Requires-Dist: google-cloud-alloydb-connector[pg8000] ; extra == 'all'
|
|
33
|
+
Requires-Dist: google-cloud-bigquery ; extra == 'all'
|
|
30
34
|
Requires-Dist: google-cloud-build ; extra == 'all'
|
|
31
35
|
Requires-Dist: google-cloud-logging ; extra == 'all'
|
|
32
36
|
Requires-Dist: google-cloud-storage ; extra == 'all'
|
|
33
37
|
Requires-Dist: google-cloud-pubsub ; extra == 'all'
|
|
38
|
+
Requires-Dist: google-cloud-discoveryengine ; extra == 'all'
|
|
39
|
+
Requires-Dist: google-generativeai ; extra == 'all'
|
|
34
40
|
Requires-Dist: gunicorn ; extra == 'all'
|
|
35
41
|
Requires-Dist: httpcore ; extra == 'all'
|
|
36
42
|
Requires-Dist: httpx ; extra == 'all'
|
|
@@ -44,21 +50,35 @@ Requires-Dist: langchain-google-alloydb-pg ; extra == 'all'
|
|
|
44
50
|
Requires-Dist: langchain-anthropic ; extra == 'all'
|
|
45
51
|
Requires-Dist: langfuse ; extra == 'all'
|
|
46
52
|
Requires-Dist: pg8000 ; extra == 'all'
|
|
53
|
+
Requires-Dist: pgvector ; extra == 'all'
|
|
54
|
+
Requires-Dist: psycopg2-binary ; extra == 'all'
|
|
55
|
+
Requires-Dist: pypdf ; extra == 'all'
|
|
47
56
|
Requires-Dist: fastapi ; extra == 'all'
|
|
57
|
+
Requires-Dist: supabase ; extra == 'all'
|
|
58
|
+
Requires-Dist: tiktoken ; extra == 'all'
|
|
48
59
|
Requires-Dist: python-socketio ; extra == 'all'
|
|
49
60
|
Provides-Extra: anthropic
|
|
50
61
|
Requires-Dist: langchain-anthropic ; extra == 'anthropic'
|
|
51
62
|
Provides-Extra: database
|
|
52
63
|
Requires-Dist: asyncpg ; extra == 'database'
|
|
64
|
+
Requires-Dist: supabase ; extra == 'database'
|
|
53
65
|
Requires-Dist: sqlalchemy ; extra == 'database'
|
|
54
66
|
Requires-Dist: pg8000 ; extra == 'database'
|
|
67
|
+
Requires-Dist: pgvector ; extra == 'database'
|
|
68
|
+
Requires-Dist: psycopg2-binary ; extra == 'database'
|
|
55
69
|
Requires-Dist: lancedb ; extra == 'database'
|
|
56
70
|
Provides-Extra: gcp
|
|
71
|
+
Requires-Dist: google-auth ; extra == 'gcp'
|
|
72
|
+
Requires-Dist: google-auth-httplib2 ; extra == 'gcp'
|
|
73
|
+
Requires-Dist: google-auth-oauthlib ; extra == 'gcp'
|
|
57
74
|
Requires-Dist: google-cloud-aiplatform ; extra == 'gcp'
|
|
75
|
+
Requires-Dist: google-cloud-bigquery ; extra == 'gcp'
|
|
58
76
|
Requires-Dist: google-cloud-build ; extra == 'gcp'
|
|
59
77
|
Requires-Dist: google-cloud-storage ; extra == 'gcp'
|
|
60
78
|
Requires-Dist: google-cloud-logging ; extra == 'gcp'
|
|
61
79
|
Requires-Dist: google-cloud-pubsub ; extra == 'gcp'
|
|
80
|
+
Requires-Dist: google-cloud-discoveryengine ; extra == 'gcp'
|
|
81
|
+
Requires-Dist: google-generativeai ; extra == 'gcp'
|
|
62
82
|
Requires-Dist: langchain-google-genai ; extra == 'gcp'
|
|
63
83
|
Requires-Dist: langchain-google-alloydb-pg ; extra == 'gcp'
|
|
64
84
|
Requires-Dist: google-api-python-client ; extra == 'gcp'
|
|
@@ -71,8 +91,15 @@ Requires-Dist: httpcore ; extra == 'http'
|
|
|
71
91
|
Requires-Dist: httpx ; extra == 'http'
|
|
72
92
|
Requires-Dist: langfuse ; extra == 'http'
|
|
73
93
|
Requires-Dist: python-socketio ; extra == 'http'
|
|
94
|
+
Requires-Dist: requests ; extra == 'http'
|
|
74
95
|
Provides-Extra: openai
|
|
75
96
|
Requires-Dist: langchain-openai ; extra == 'openai'
|
|
97
|
+
Requires-Dist: tiktoken ; extra == 'openai'
|
|
98
|
+
Provides-Extra: pipeline
|
|
99
|
+
Requires-Dist: GitPython ; extra == 'pipeline'
|
|
100
|
+
Requires-Dist: lark ; extra == 'pipeline'
|
|
101
|
+
Requires-Dist: pypdf ; extra == 'pipeline'
|
|
102
|
+
Requires-Dist: tabulate ; extra == 'pipeline'
|
|
76
103
|
|
|
77
104
|
## Introduction
|
|
78
105
|
This is the Sunholo Python project, a comprehensive toolkit for working with language models and vector stores on Google Cloud Platform. It provides a wide range of functionalities and utilities to facilitate the development and deployment of language model applications.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
sunholo/__init__.py,sha256=
|
|
2
|
-
sunholo/logging.py,sha256=
|
|
1
|
+
sunholo/__init__.py,sha256=KfqJV0T-2WCIlabNxeX3CvLS-bBGCl9n5aWy091lr2M,841
|
|
2
|
+
sunholo/logging.py,sha256=JSzS_6HY6BIxy8bmVV2TDHXOjyfB1q5nI76OZXAtQDc,11372
|
|
3
3
|
sunholo/agents/__init__.py,sha256=CnlbVohPt-Doth9PyROSlN3P8xMV9j9yS19YE-wCS90,341
|
|
4
4
|
sunholo/agents/chat_history.py,sha256=PbwYmw1TwzI8H-cwQIGgHZ6UIr2Qb-JWow0RG3ayLM8,5195
|
|
5
5
|
sunholo/agents/dispatch_to_qa.py,sha256=kWrO-CJel5kJAyyCShShpACUuZpqDOP7DN8vo_7ciao,8056
|
|
@@ -13,7 +13,7 @@ sunholo/agents/fastapi/base.py,sha256=clk76cHbUAvU0OYJrRfCWX_5f0ACbhDsIzYBhI3wyo
|
|
|
13
13
|
sunholo/agents/fastapi/qna_routes.py,sha256=DgK4Btu5XriOC1JaRQ4G_nWEjJfnQ0J5pyLanF6eF1g,3857
|
|
14
14
|
sunholo/agents/flask/__init__.py,sha256=uqfHNw2Ru3EJ4dJEcbp86h_lkquBQPMxZbjhV_xe3rs,72
|
|
15
15
|
sunholo/agents/flask/base.py,sha256=RUGWBYWeV60FatYF5sMRrxD-INU97Vodsi6JaB6i93s,763
|
|
16
|
-
sunholo/agents/flask/qna_routes.py,sha256=
|
|
16
|
+
sunholo/agents/flask/qna_routes.py,sha256=JLjYrVN2mGoWrGUM_o5N93um46gXpQa8LWdxjF2yN4Y,8554
|
|
17
17
|
sunholo/archive/__init__.py,sha256=qNHWm5rGPVOlxZBZCpA1wTYPbalizRT7f8X4rs2t290,31
|
|
18
18
|
sunholo/archive/archive.py,sha256=C-UhG5x-XtZ8VheQp92IYJqgD0V3NFQjniqlit94t18,1197
|
|
19
19
|
sunholo/auth/__init__.py,sha256=4owDjSaWYkbTlPK47UHTOC0gCWbZsqn4ZIEw5NWZTlg,28
|
|
@@ -22,7 +22,7 @@ sunholo/bots/__init__.py,sha256=EMFd7e2z68l6pzYOnkzHbLd2xJRvxTKFRNCTuhZ8hIw,130
|
|
|
22
22
|
sunholo/bots/discord.py,sha256=cCFae5K1BCa6JVkWGLh_iZ9qFO1JpXb6K4eJrlDfEro,2442
|
|
23
23
|
sunholo/bots/webapp.py,sha256=EIMxdAJ_xtufwJmvnn7N_Fb_1hZ9DjhJ0Kf_hp02vEU,1926
|
|
24
24
|
sunholo/chunker/__init__.py,sha256=UhQBZTKwDfBXm0TPv4LvsGc5pdUGCbYzi3qPTOkU4gw,55
|
|
25
|
-
sunholo/chunker/data_to_embed_pubsub.py,sha256=
|
|
25
|
+
sunholo/chunker/data_to_embed_pubsub.py,sha256=t-pWNYv2mnwVAkMcIOK2CrIb3yr2aS9iAdtryk7hT8o,2931
|
|
26
26
|
sunholo/chunker/doc_handling.py,sha256=2w5oDkU2RX3ynq7GkR5CUouiEzBXbEkaK_4p6yRcC3M,8597
|
|
27
27
|
sunholo/chunker/images.py,sha256=Xmh1vwHrVhoXm5iH2dhCc52O8YgdzE8KrDSdL-pGnp8,1861
|
|
28
28
|
sunholo/chunker/loaders.py,sha256=xiToUVgPz2ZzcqpUAq7aNP3PTenb_rBUAFzu0JPycIg,10268
|
|
@@ -31,11 +31,14 @@ sunholo/chunker/pdfs.py,sha256=daCZ1xjn1YvxlifIyxskWNpLJLe-Q9D_Jq12MWx3tZo,2473
|
|
|
31
31
|
sunholo/chunker/publish.py,sha256=PoT8q3XJeFCg10WrLkYhuaaXIrGVkvUD3-R9IfoWoH4,2703
|
|
32
32
|
sunholo/chunker/splitter.py,sha256=ug_v-h0wos3b7OkhmedVQs5jtLuDdFDWypvsZVYgxbU,6743
|
|
33
33
|
sunholo/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
-
sunholo/cli/cli.py,sha256=
|
|
34
|
+
sunholo/cli/cli.py,sha256=rcO1hMthy5nWC_5sOHqRm7ut70c9JfxFTSjFRBNYuYg,1248
|
|
35
|
+
sunholo/cli/cli_init.py,sha256=WReZuMQwDfkRUvssYT7TirUoG6SiT1dTDol8nLI8O70,3418
|
|
36
|
+
sunholo/cli/configs.py,sha256=wLRSL-Z58B7dQ_gxTK_d8HjNP5pABhegLqjB0-IKGjg,992
|
|
37
|
+
sunholo/cli/deploy.py,sha256=zxdwUsRTRMC8U5vyRv0JiKBLFn84Ug_Tc88-_h9hJSs,1609
|
|
35
38
|
sunholo/components/__init__.py,sha256=RJGNEihwvRIiDScKis04RHJv4yZGI1UpXlOmuCptNZI,208
|
|
36
39
|
sunholo/components/llm.py,sha256=T4we3tGmqUj4tPwxQr9M6AXv_BALqZV_dRSvINan-oU,10374
|
|
37
40
|
sunholo/components/prompt.py,sha256=eZSghXkIlRzXiSrzgkG7e5ytUYq6R6LV-qjHU8jStig,6353
|
|
38
|
-
sunholo/components/retriever.py,sha256=
|
|
41
|
+
sunholo/components/retriever.py,sha256=QA_l7HXwd4g6IFuT2A5mICzUcV80K0YvDbCtRxefq8o,3684
|
|
39
42
|
sunholo/components/vectorstore.py,sha256=RB_Dgc9234G_TE3w3abCfBw1pqm2br2RrLP0UqshkvA,5172
|
|
40
43
|
sunholo/database/__init__.py,sha256=Zz0Shcq-CtStf9rJGIYB_Ybzb8rY_Q9mfSj-nviM490,241
|
|
41
44
|
sunholo/database/alloydb.py,sha256=18Q4AG_W-Sz8udIj3gWMkGrMWmiEelqgOwJ7VKHElV0,14877
|
|
@@ -50,7 +53,7 @@ sunholo/database/sql/sb/delete_source_row.sql,sha256=r6fEuUKdbiLHCDGKSbKINDCpJjs
|
|
|
50
53
|
sunholo/database/sql/sb/return_sources.sql,sha256=89KAnxfK8n_qGK9jy1OQT8f9n4uYUtYL5cCxbC2mj_c,255
|
|
51
54
|
sunholo/database/sql/sb/setup.sql,sha256=CvoFvZQev2uWjmFa3aj3m3iuPFzAAJZ0S7Qi3L3-zZI,89
|
|
52
55
|
sunholo/embedder/__init__.py,sha256=sI4N_CqgEVcrMDxXgxKp1FsfsB4FpjoXgPGkl4N_u4I,44
|
|
53
|
-
sunholo/embedder/embed_chunk.py,sha256=
|
|
56
|
+
sunholo/embedder/embed_chunk.py,sha256=XV1kdDUWw2QO-am5_Yl7GrYP9V_4i1XRNNFPhqUSnZQ,5851
|
|
54
57
|
sunholo/gcs/__init__.py,sha256=DtVw_AZwQn-IguR5BJuIi2XJeF_FQXizhJikzRNrXiE,50
|
|
55
58
|
sunholo/gcs/add_file.py,sha256=JmJIuz5Z1h7-eJ6s2eE3wc8Y4IAv3Jridq1xfQbD9_E,4711
|
|
56
59
|
sunholo/gcs/download_url.py,sha256=PAwYShV-sRd9sNvuJrEOvfF1V34ovVP0omWbuwDkRrA,4751
|
|
@@ -60,7 +63,7 @@ sunholo/langfuse/callback.py,sha256=G9xcZHpLvyzolU57ycItLaooMCtRuM37QJSWjiwQEd0,
|
|
|
60
63
|
sunholo/langfuse/prompts.py,sha256=HO4Zy9usn5tKooBPCKksuw4Lff3c03Ny5wqn4ce_xZM,1217
|
|
61
64
|
sunholo/llamaindex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
65
|
sunholo/llamaindex/generate.py,sha256=l1Picr-hVwkmAUD7XmTCa63qY9ERliFHQXwyX3BqB2Q,686
|
|
63
|
-
sunholo/llamaindex/import_files.py,sha256=
|
|
66
|
+
sunholo/llamaindex/import_files.py,sha256=j71_rGIpZg1Zbsy-PnNIUgpNJznnsDiYxR6sulBlQ_8,7786
|
|
64
67
|
sunholo/lookup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
65
68
|
sunholo/lookup/model_lookup.yaml,sha256=O7o-jP53MLA06C8pI-ILwERShO-xf6z_258wtpZBv6A,739
|
|
66
69
|
sunholo/patches/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -80,12 +83,15 @@ sunholo/streaming/streaming.py,sha256=H8qpQC5wr4dVyInWTAjTmvGePVueWcwNaR8T_5mEp6
|
|
|
80
83
|
sunholo/summarise/__init__.py,sha256=MZk3dblUMODcPb1crq4v-Z508NrFIpkSWNf9FIO8BcU,38
|
|
81
84
|
sunholo/summarise/summarise.py,sha256=C3HhjepTjUhUC8FLk4jMQIBvq1BcORniwuTFHjPVhVo,3784
|
|
82
85
|
sunholo/utils/__init__.py,sha256=G11nN_6ATjxpuMfG_BvcUr9UU8onPIgkpTK6CjOcbr8,48
|
|
83
|
-
sunholo/utils/
|
|
86
|
+
sunholo/utils/big_context.py,sha256=qHYtds4Ecf9eZRHVqXho4_q8Je7HD44-vS6RJ6s9Z0Q,5387
|
|
87
|
+
sunholo/utils/config.py,sha256=Ve1sb68Av9_SPGqXs33g5FAJSIQ3GODoeuUCW3MNCwU,8802
|
|
84
88
|
sunholo/utils/gcp.py,sha256=B2G1YKjeD7X9dqO86Jrp2vPuFwZ223Xl5Tg09Ndw-oc,5760
|
|
85
89
|
sunholo/utils/parsers.py,sha256=OrHmASqIbI45atVOhiGodgLvnfrzkvVzyHnSvAXD89I,3841
|
|
86
|
-
sunholo
|
|
87
|
-
sunholo
|
|
88
|
-
sunholo-0.
|
|
89
|
-
sunholo-0.
|
|
90
|
-
sunholo-0.
|
|
91
|
-
sunholo-0.
|
|
90
|
+
sunholo/vertex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
91
|
+
sunholo/vertex/init_vertex.py,sha256=JDMUaBRdednzbKF-5p33qqLit2LMsvgvWW-NRz0AqO0,1801
|
|
92
|
+
sunholo-0.58.2.dist-info/LICENSE.txt,sha256=SdE3QjnD3GEmqqg9EX3TM9f7WmtOzqS1KJve8rhbYmU,11345
|
|
93
|
+
sunholo-0.58.2.dist-info/METADATA,sha256=6N_BstncHOJiJJJ4N1QOHiKDxUB_Bc8UDnB2NA_xVb0,7894
|
|
94
|
+
sunholo-0.58.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
95
|
+
sunholo-0.58.2.dist-info/entry_points.txt,sha256=bZuN5AIHingMPt4Ro1b_T-FnQvZ3teBes-3OyO0asl4,49
|
|
96
|
+
sunholo-0.58.2.dist-info/top_level.txt,sha256=wt5tadn5--5JrZsjJz2LceoUvcrIvxjHJe-RxuudxAk,8
|
|
97
|
+
sunholo-0.58.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|