vectorwave 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +0 -0
- tests/batch/__init__.py +0 -0
- tests/batch/test_batch.py +98 -0
- tests/core/__init__.py +0 -0
- tests/core/test_decorator.py +345 -0
- tests/database/__init__.py +0 -0
- tests/database/test_db.py +468 -0
- tests/database/test_db_search.py +163 -0
- tests/exception/__init__.py +0 -0
- tests/models/__init__.py +0 -0
- tests/models/test_db_config.py +152 -0
- tests/monitoring/__init__.py +0 -0
- tests/monitoring/test_tracer.py +202 -0
- tests/prediction/__init__.py +0 -0
- tests/vectorizer/__init__.py +0 -0
- vectorwave/__init__.py +13 -0
- vectorwave/batch/__init__.py +0 -0
- vectorwave/batch/batch.py +68 -0
- vectorwave/core/__init__.py +0 -0
- vectorwave/core/core.py +0 -0
- vectorwave/core/decorator.py +131 -0
- vectorwave/database/__init__.py +0 -0
- vectorwave/database/db.py +328 -0
- vectorwave/database/db_search.py +122 -0
- vectorwave/exception/__init__.py +0 -0
- vectorwave/exception/exceptions.py +22 -0
- vectorwave/models/__init__.py +0 -0
- vectorwave/models/db_config.py +92 -0
- vectorwave/monitoring/__init__.py +0 -0
- vectorwave/monitoring/monitoring.py +0 -0
- vectorwave/monitoring/tracer.py +131 -0
- vectorwave/prediction/__init__.py +0 -0
- vectorwave/prediction/predictor.py +0 -0
- vectorwave/vectorizer/__init__.py +0 -0
- vectorwave/vectorizer/base.py +12 -0
- vectorwave/vectorizer/factory.py +49 -0
- vectorwave/vectorizer/huggingface_vectorizer.py +33 -0
- vectorwave/vectorizer/openai_vectorizer.py +35 -0
- vectorwave-0.1.3.dist-info/METADATA +352 -0
- vectorwave-0.1.3.dist-info/RECORD +44 -0
- vectorwave-0.1.3.dist-info/WHEEL +5 -0
- vectorwave-0.1.3.dist-info/licenses/LICENSE +21 -0
- vectorwave-0.1.3.dist-info/licenses/NOTICE +31 -0
- vectorwave-0.1.3.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import inspect
|
|
3
|
+
import time
|
|
4
|
+
import traceback
|
|
5
|
+
from functools import wraps
|
|
6
|
+
from contextvars import ContextVar
|
|
7
|
+
from typing import Optional, List, Dict, Any, Callable
|
|
8
|
+
from uuid import uuid4
|
|
9
|
+
from datetime import datetime, timezone
|
|
10
|
+
|
|
11
|
+
from ..batch.batch import get_batch_manager
|
|
12
|
+
from ..models.db_config import get_weaviate_settings, WeaviateSettings
|
|
13
|
+
|
|
14
|
+
# Create module-level logger
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
class TraceCollector:
|
|
18
|
+
def __init__(self, trace_id: str):
|
|
19
|
+
self.trace_id = trace_id
|
|
20
|
+
self.settings: WeaviateSettings = get_weaviate_settings()
|
|
21
|
+
self.batch = get_batch_manager()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
current_tracer_var: ContextVar[Optional[TraceCollector]] = ContextVar('current_tracer', default=None)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def trace_root() -> Callable:
|
|
28
|
+
"""
|
|
29
|
+
Decorator factory for the workflow's entry point function.
|
|
30
|
+
Creates and sets the TraceCollector in ContextVar.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def decorator(func: Callable) -> Callable:
|
|
34
|
+
@wraps(func)
|
|
35
|
+
def wrapper(*args, **kwargs):
|
|
36
|
+
if current_tracer_var.get() is not None:
|
|
37
|
+
return func(*args, **kwargs)
|
|
38
|
+
|
|
39
|
+
trace_id = kwargs.pop('trace_id', str(uuid4()))
|
|
40
|
+
tracer = TraceCollector(trace_id=trace_id)
|
|
41
|
+
token = current_tracer_var.set(tracer)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# ⭐️ Key: Here, func is the wrapper of @trace_span.
|
|
45
|
+
return func(*args, **kwargs)
|
|
46
|
+
finally:
|
|
47
|
+
current_tracer_var.reset(token)
|
|
48
|
+
|
|
49
|
+
return wrapper
|
|
50
|
+
|
|
51
|
+
return decorator
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def trace_span(
|
|
55
|
+
_func: Optional[Callable] = None,
|
|
56
|
+
*,
|
|
57
|
+
attributes_to_capture: Optional[List[str]] = None
|
|
58
|
+
) -> Callable:
|
|
59
|
+
"""
|
|
60
|
+
Decorator to capture function execution as a 'span'.
|
|
61
|
+
Can be used as @trace_span or @trace_span(attributes_to_capture=[...]).
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
def decorator(func: Callable) -> Callable:
|
|
65
|
+
@wraps(func)
|
|
66
|
+
def wrapper(*args, **kwargs):
|
|
67
|
+
tracer = current_tracer_var.get()
|
|
68
|
+
if not tracer:
|
|
69
|
+
return func(*args, **kwargs)
|
|
70
|
+
|
|
71
|
+
start_time = time.perf_counter()
|
|
72
|
+
status = "SUCCESS"
|
|
73
|
+
error_msg = None
|
|
74
|
+
result = None
|
|
75
|
+
|
|
76
|
+
captured_attributes = {}
|
|
77
|
+
if attributes_to_capture:
|
|
78
|
+
try:
|
|
79
|
+
# Directly checks the kwargs dictionary.
|
|
80
|
+
for attr_name in attributes_to_capture:
|
|
81
|
+
if attr_name in kwargs:
|
|
82
|
+
value = kwargs[attr_name]
|
|
83
|
+
if not isinstance(value, (str, int, float, bool, list, dict, type(None))):
|
|
84
|
+
value = str(value)
|
|
85
|
+
captured_attributes[attr_name] = value
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.warning("Failed to capture attributes for '%s': %s", func.__name__, e)
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
result = func(*args, **kwargs)
|
|
91
|
+
except Exception as e:
|
|
92
|
+
status = "ERROR"
|
|
93
|
+
error_msg = traceback.format_exc()
|
|
94
|
+
raise e
|
|
95
|
+
finally:
|
|
96
|
+
duration_ms = (time.perf_counter() - start_time) * 1000
|
|
97
|
+
|
|
98
|
+
span_properties = {
|
|
99
|
+
"trace_id": tracer.trace_id,
|
|
100
|
+
"span_id": str(uuid4()),
|
|
101
|
+
"function_name": func.__name__,
|
|
102
|
+
"timestamp_utc": datetime.now(timezone.utc).isoformat(),
|
|
103
|
+
"duration_ms": duration_ms,
|
|
104
|
+
"status": status,
|
|
105
|
+
"error_message": error_msg,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# 1. Apply global tags first.
|
|
109
|
+
if tracer.settings.global_custom_values:
|
|
110
|
+
span_properties.update(tracer.settings.global_custom_values)
|
|
111
|
+
|
|
112
|
+
# 2. Apply captured attributes second (overriding global values if necessary).
|
|
113
|
+
# (If 'run_id' was captured, this value (e.g., override-run-xyz) overrides the global value.)
|
|
114
|
+
span_properties.update(captured_attributes)
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
tracer.batch.add_object(
|
|
118
|
+
collection=tracer.settings.EXECUTION_COLLECTION_NAME,
|
|
119
|
+
properties=span_properties
|
|
120
|
+
)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
logger.error("Failed to log span for '%s' (trace_id: %s): %s", func.__name__, tracer.trace_id, e)
|
|
123
|
+
|
|
124
|
+
return result
|
|
125
|
+
|
|
126
|
+
return wrapper
|
|
127
|
+
|
|
128
|
+
if _func is None:
|
|
129
|
+
return decorator
|
|
130
|
+
else:
|
|
131
|
+
return decorator(_func)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
class BaseVectorizer(ABC):
|
|
5
|
+
|
|
6
|
+
@abstractmethod
|
|
7
|
+
def embed(self, text: str) -> List[float]:
|
|
8
|
+
pass
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
12
|
+
pass
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# [NEW] File: src/vectorwave/vectorizer/factory.py
|
|
2
|
+
from functools import lru_cache
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from ..models.db_config import get_weaviate_settings, WeaviateSettings
|
|
6
|
+
from .base import BaseVectorizer
|
|
7
|
+
from .huggingface_vectorizer import HuggingFaceVectorizer
|
|
8
|
+
from .openai_vectorizer import OpenAIVectorizer
|
|
9
|
+
|
|
10
|
+
@lru_cache()
|
|
11
|
+
def get_vectorizer() -> Optional[BaseVectorizer]:
|
|
12
|
+
"""
|
|
13
|
+
Reads the configuration file (.env) and returns an appropriate Python Vectorizer instance.
|
|
14
|
+
- "weaviate_module" or "none": Returns None as Weaviate handles processing.
|
|
15
|
+
- "huggingface", "openai_client": Returns the actual instance as Python handles processing.
|
|
16
|
+
"""
|
|
17
|
+
settings: WeaviateSettings = get_weaviate_settings()
|
|
18
|
+
vectorizer_name = settings.VECTORIZER.lower()
|
|
19
|
+
|
|
20
|
+
print(f"[VectorWave] Initializing vectorizer based on setting: '{vectorizer_name}'")
|
|
21
|
+
|
|
22
|
+
if vectorizer_name == "huggingface":
|
|
23
|
+
try:
|
|
24
|
+
return HuggingFaceVectorizer(model_name=settings.HF_MODEL_NAME)
|
|
25
|
+
except Exception as e:
|
|
26
|
+
print(f"Error: Failed to initialize HuggingFaceVectorizer: {e}")
|
|
27
|
+
return None
|
|
28
|
+
|
|
29
|
+
elif vectorizer_name == "openai_client":
|
|
30
|
+
if not settings.OPENAI_API_KEY:
|
|
31
|
+
print("Warning: VECTORIZER='openai_client' but OPENAI_API_KEY is not set. Vectorizer disabled.")
|
|
32
|
+
return None
|
|
33
|
+
try:
|
|
34
|
+
return OpenAIVectorizer(api_key=settings.OPENAI_API_KEY)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
print(f"Error: Failed to initialize OpenAIVectorizer: {e}")
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
elif vectorizer_name == "weaviate_module":
|
|
40
|
+
print("[VectorWave] Using Weaviate's internal module for vectorization.")
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
elif vectorizer_name == "none":
|
|
44
|
+
print("[VectorWave] Vectorization is disabled ('none').")
|
|
45
|
+
return None
|
|
46
|
+
|
|
47
|
+
else:
|
|
48
|
+
print(f"Warning: Unknown VECTORIZER setting: '{vectorizer_name}'. Disabling vectorizer.")
|
|
49
|
+
return None
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
from .base import BaseVectorizer
|
|
2
|
+
from typing import List
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
try:
|
|
6
|
+
from sentence_transformers import SentenceTransformer
|
|
7
|
+
except ImportError:
|
|
8
|
+
# Warning: The 'sentence-transformers' library is not installed.
|
|
9
|
+
print("Warning: The 'sentence-transformers' library is not installed.")
|
|
10
|
+
# To use HuggingFaceVectorizer, run 'pip install sentence-transformers'.
|
|
11
|
+
print("To use HuggingFaceVectorizer, run 'pip install sentence-transformers'.")
|
|
12
|
+
SentenceTransformer = None
|
|
13
|
+
|
|
14
|
+
class HuggingFaceVectorizer(BaseVectorizer):
|
|
15
|
+
"""[NEW] HuggingFace SentenceTransformer (Python Client) implementation"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, model_name: str):
|
|
18
|
+
if SentenceTransformer is None:
|
|
19
|
+
# Could not find the 'sentence-transformers' library.
|
|
20
|
+
raise ImportError("Could not find the 'sentence-transformers' library.")
|
|
21
|
+
|
|
22
|
+
# Force use of CPU (can be changed to 'cuda', etc., if needed)
|
|
23
|
+
self.model = SentenceTransformer(model_name, device='cpu')
|
|
24
|
+
print(f"[VectorWave] HuggingFaceVectorizer loaded model '{model_name}' on CPU.")
|
|
25
|
+
|
|
26
|
+
def embed(self, text: str) -> List[float]:
|
|
27
|
+
# convert_to_numpy=True is faster on CPU
|
|
28
|
+
vector = self.model.encode([text], convert_to_numpy=True)[0]
|
|
29
|
+
return vector.tolist()
|
|
30
|
+
|
|
31
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
32
|
+
vectors = self.model.encode(texts, convert_to_numpy=True)
|
|
33
|
+
return vectors.tolist()
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from .base import BaseVectorizer
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
try:
|
|
5
|
+
from openai import OpenAI
|
|
6
|
+
except ImportError:
|
|
7
|
+
# Warning: The 'openai' library is not installed.
|
|
8
|
+
print("Warning: The 'openai' library is not installed.")
|
|
9
|
+
# To use OpenAIVectorizer, run 'pip install openai'.
|
|
10
|
+
print("To use OpenAIVectorizer, run 'pip install openai'.")
|
|
11
|
+
OpenAI = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class OpenAIVectorizer(BaseVectorizer):
|
|
15
|
+
|
|
16
|
+
def __init__(self, api_key: str, model: str = "text-embedding-3-small"):
|
|
17
|
+
if OpenAI is None:
|
|
18
|
+
# Could not find the 'openai' library.
|
|
19
|
+
raise ImportError("Could not find the 'openai' library.")
|
|
20
|
+
if not api_key:
|
|
21
|
+
raise ValueError("OpenAI API key is required for OpenAIVectorizer.")
|
|
22
|
+
|
|
23
|
+
self.client = OpenAI(api_key=api_key)
|
|
24
|
+
self.model = model
|
|
25
|
+
print(f"[VectorWave] OpenAIVectorizer initialized with model '{self.model}'.")
|
|
26
|
+
|
|
27
|
+
def embed(self, text: str) -> List[float]:
|
|
28
|
+
text = text.replace("\n", " ")
|
|
29
|
+
response = self.client.embeddings.create(input=[text], model=self.model)
|
|
30
|
+
return response.data[0].embedding
|
|
31
|
+
|
|
32
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
33
|
+
texts = [t.replace("\n", " ") for t in texts]
|
|
34
|
+
response = self.client.embeddings.create(input=texts, model=self.model)
|
|
35
|
+
return [d.embedding for d in response.data]
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vectorwave
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: VectorWave: Seamless Auto-Vectorization Framework
|
|
5
|
+
Author-email: junyeonggim <junyeonggim5@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/republicofgamja/vtm
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
13
|
+
Classifier: Operating System :: OS Independent
|
|
14
|
+
Classifier: Development Status :: 3 - Alpha
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
License-File: NOTICE
|
|
20
|
+
Requires-Dist: weaviate-client>=4.0.0
|
|
21
|
+
Requires-Dist: pydantic-settings>=2.0.0
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# VectorWave: Seamless Auto-Vectorization Framework
|
|
26
|
+
|
|
27
|
+
[](https://www.google.com/search?q=LICENSE)
|
|
28
|
+
|
|
29
|
+
## 🌟 Overview
|
|
30
|
+
|
|
31
|
+
**VectorWave** is an innovative framework that uses a **decorator** to automatically save and manage the output of Python functions/methods in a **Vector Database (Vector DB)**. Developers can convert function outputs into intelligent vector data with a single line of code (`@vectorize`), without worrying about the complex processes of data collection, embedding generation, or storage in a Vector DB.
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## ✨ Features
|
|
36
|
+
|
|
37
|
+
* **`@vectorize` Decorator:**
|
|
38
|
+
1. **Static Data Collection:** Saves the function's source code, docstring, and metadata to the `VectorWaveFunctions` collection once when the script is loaded.
|
|
39
|
+
2. **Dynamic Data Logging:** Records the execution time, success/failure status, error logs, and 'dynamic tags' to the `VectorWaveExecutions` collection every time the function is called.
|
|
40
|
+
* **Distributed Tracing:** By combining the `@vectorize` and `@trace_span` decorators, you can analyze the execution of complex multi-step workflows, grouped under a single **`trace_id`**.
|
|
41
|
+
* **Search Interface:** Provides `search_functions` (for vector search) and `search_executions` (for log filtering) to facilitate the construction of RAG and monitoring systems.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## 🚀 Usage
|
|
46
|
+
|
|
47
|
+
VectorWave consists of 'storing' via decorators and 'searching' via functions, and now includes **execution flow tracing**.
|
|
48
|
+
|
|
49
|
+
### 1. (Required) Initialize the Database and Configuration
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
import time
|
|
53
|
+
from vectorwave import (
|
|
54
|
+
vectorize,
|
|
55
|
+
initialize_database,
|
|
56
|
+
search_functions,
|
|
57
|
+
search_executions
|
|
58
|
+
)
|
|
59
|
+
# [ADDITION] Import trace_span separately for distributed tracing.
|
|
60
|
+
from vectorwave.monitoring.tracer import trace_span
|
|
61
|
+
|
|
62
|
+
# This only needs to be called once when the script starts.
|
|
63
|
+
try:
|
|
64
|
+
client = initialize_database()
|
|
65
|
+
print("VectorWave DB initialized successfully.")
|
|
66
|
+
except Exception as e:
|
|
67
|
+
print(f"DB initialization failed: {e}")
|
|
68
|
+
exit()
|
|
69
|
+
````
|
|
70
|
+
|
|
71
|
+
### 2\. [Store] Use `@vectorize` with Distributed Tracing
|
|
72
|
+
|
|
73
|
+
The `@vectorize` acts as the **Root** for tracing, and `@trace_span` is used on internal functions to group the execution flow under a single `trace_id`.
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
# --- Child Span Function: Captures arguments ---
|
|
77
|
+
@trace_span(attributes_to_capture=['user_id', 'amount'])
|
|
78
|
+
def step_1_validate_payment(user_id: str, amount: int):
|
|
79
|
+
"""(Span) Payment validation. Records user_id and amount in the log."""
|
|
80
|
+
print(f" [SPAN 1] Validating payment for {user_id}...")
|
|
81
|
+
time.sleep(0.1)
|
|
82
|
+
return True
|
|
83
|
+
|
|
84
|
+
@trace_span(attributes_to_capture=['user_id', 'receipt_id'])
|
|
85
|
+
def step_2_send_receipt(user_id: str, receipt_id: str):
|
|
86
|
+
"""(Span) Sends the receipt."""
|
|
87
|
+
print(f" [SPAN 2] Sending receipt {receipt_id}...")
|
|
88
|
+
time.sleep(0.2)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# --- Root Function (@trace_root role) ---
|
|
92
|
+
@vectorize(
|
|
93
|
+
search_description="Charges a user in the payment system.",
|
|
94
|
+
sequence_narrative="Returns a receipt ID upon successful payment.",
|
|
95
|
+
team="billing", # <-- Custom Tag (recorded in all execution logs)
|
|
96
|
+
priority=1 # <-- Custom Tag (execution priority)
|
|
97
|
+
)
|
|
98
|
+
def process_payment(user_id: str, amount: int):
|
|
99
|
+
"""(Root Span) Executes the user payment workflow."""
|
|
100
|
+
print(f" [ROOT EXEC] process_payment: Starting workflow for {user_id}...")
|
|
101
|
+
|
|
102
|
+
# When calling child functions, the same trace_id is automatically inherited via ContextVar.
|
|
103
|
+
step_1_validate_payment(user_id=user_id, amount=amount)
|
|
104
|
+
|
|
105
|
+
receipt_id = f"receipt_{user_id}_{amount}"
|
|
106
|
+
step_2_send_receipt(user_id=user_id, receipt_id=receipt_id)
|
|
107
|
+
|
|
108
|
+
print(f" [ROOT DONE] process_payment")
|
|
109
|
+
return {"status": "success", "receipt_id": receipt_id}
|
|
110
|
+
|
|
111
|
+
# --- Execute the Function ---
|
|
112
|
+
print("Now calling 'process_payment'...")
|
|
113
|
+
# This single call records 3 execution logs (spans) in the DB,
|
|
114
|
+
# all grouped under one 'trace_id'.
|
|
115
|
+
process_payment("user_789", 5000)
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### 3\. [Search ①] Function Definition Search (for RAG)
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
# Search for functions related to 'payment' using natural language (vector search).
|
|
122
|
+
print("\n--- Searching for 'payment' functions ---")
|
|
123
|
+
payment_funcs = search_functions(
|
|
124
|
+
query="user payment processing",
|
|
125
|
+
limit=3
|
|
126
|
+
)
|
|
127
|
+
for func in payment_funcs:
|
|
128
|
+
print(f" - Function: {func['properties']['function_name']}")
|
|
129
|
+
print(f" - Description: {func['properties']['search_description']}")
|
|
130
|
+
print(f" - Similarity (Distance): {func['metadata'].distance:.4f}")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### 4\. [Search ②] Execution Log Search (Monitoring and Tracing)
|
|
134
|
+
|
|
135
|
+
The `search_executions` function can now search for all related execution logs (spans) based on the `trace_id`.
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
# 1. Find the Trace ID of a specific workflow (process_payment).
|
|
139
|
+
latest_payment_span = search_executions(
|
|
140
|
+
limit=1,
|
|
141
|
+
filters={"function_name": "process_payment"},
|
|
142
|
+
sort_by="timestamp_utc",
|
|
143
|
+
sort_ascending=False
|
|
144
|
+
)
|
|
145
|
+
trace_id = latest_payment_span[0]["trace_id"]
|
|
146
|
+
|
|
147
|
+
# 2. Search all spans belonging to that Trace ID, sorted chronologically.
|
|
148
|
+
print(f"\n--- Full Trace for ID ({trace_id[:8]}...) ---")
|
|
149
|
+
trace_spans = search_executions(
|
|
150
|
+
limit=10,
|
|
151
|
+
filters={"trace_id": trace_id},
|
|
152
|
+
sort_by="timestamp_utc",
|
|
153
|
+
sort_ascending=True # Ascending sort for workflow flow analysis
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
for i, span in enumerate(trace_spans):
|
|
157
|
+
print(f" - [Span {i+1}] {span['function_name']} ({span['duration_ms']:.2f}ms)")
|
|
158
|
+
# Captured arguments (user_id, amount, etc.) are displayed for the child spans.
|
|
159
|
+
|
|
160
|
+
# Example Output:
|
|
161
|
+
# - [Span 1] step_1_validate_payment (100.81ms)
|
|
162
|
+
# - [Span 2] step_2_send_receipt (202.06ms)
|
|
163
|
+
# - [Span 3] process_payment (333.18ms)
|
|
164
|
+
```
|
|
165
|
+
-----
|
|
166
|
+
|
|
167
|
+
## ⚙️ Configuration
|
|
168
|
+
|
|
169
|
+
VectorWave automatically reads Weaviate database connection info and **vectorization strategy** from **environment variables** or a `.env` file.
|
|
170
|
+
|
|
171
|
+
Create a `.env` file in your project's root directory (e.g., where `test_ex/example.py` is located) and set the required values.
|
|
172
|
+
|
|
173
|
+
### Vectorizer Strategy (VECTORIZER)
|
|
174
|
+
|
|
175
|
+
You can select the text vectorization method via the `VECTORIZER` environment variable in your `test_ex/.env` file.
|
|
176
|
+
|
|
177
|
+
| `VECTORIZER` Setting | Description | Required Additional Settings |
|
|
178
|
+
| :--- | :--- | :--- |
|
|
179
|
+
| **`huggingface`** | (Default Recommended) Uses the `sentence-transformers` library to vectorize on your local CPU. No API key is needed, making it great for immediate testing. | `HF_MODEL_NAME` (e.g., "sentence-transformers/all-MiniLM-L6-v2") |
|
|
180
|
+
| **`openai_client`** | (High-Performance) Uses the OpenAI Python client to vectorize with modern models like `text-embedding-3-small`. | `OPENAI_API_KEY` (A valid OpenAI API key) |
|
|
181
|
+
| **`weaviate_module`** | (Docker Delegate) Delegates the vectorization task to the Weaviate container's built-in module (e.g., `text2vec-openai`). | `WEAVIATE_VECTORIZER_MODULE`, `OPENAI_API_KEY` |
|
|
182
|
+
| **`none`** | Disables vectorization. Data will be stored without vectors. | None |
|
|
183
|
+
|
|
184
|
+
-----
|
|
185
|
+
|
|
186
|
+
### .env File Examples
|
|
187
|
+
|
|
188
|
+
Configure your `.env` file according to the strategy you want to use.
|
|
189
|
+
|
|
190
|
+
#### Example 1: Using `huggingface` (Local, No API Key)
|
|
191
|
+
|
|
192
|
+
Uses a `sentence-transformers` model on your local machine. Ideal for testing without API keys.
|
|
193
|
+
|
|
194
|
+
```ini
|
|
195
|
+
# .env (Using HuggingFace)
|
|
196
|
+
# --- Basic Weaviate Connection ---
|
|
197
|
+
WEAVIATE_HOST=localhost
|
|
198
|
+
WEAVIATE_PORT=8080
|
|
199
|
+
WEAVIATE_GRPC_PORT=50051
|
|
200
|
+
|
|
201
|
+
# --- [Strategy 1] HuggingFace Config ---
|
|
202
|
+
VECTORIZER="huggingface"
|
|
203
|
+
HF_MODEL_NAME="sentence-transformers/all-MiniLM-L6-v2"
|
|
204
|
+
|
|
205
|
+
# (OPENAI_API_KEY is not required for this mode)
|
|
206
|
+
OPENAI_API_KEY=sk-...
|
|
207
|
+
|
|
208
|
+
# --- [Advanced] Custom Properties ---
|
|
209
|
+
CUSTOM_PROPERTIES_FILE_PATH=.weaviate_properties
|
|
210
|
+
RUN_ID=test-run-001
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
#### Example 2: Using `openai_client` (Python Client, High-Performance)
|
|
214
|
+
|
|
215
|
+
Directly calls the OpenAI API via the `openai` Python library.
|
|
216
|
+
|
|
217
|
+
```ini
|
|
218
|
+
# .env (Using OpenAI Python Client)
|
|
219
|
+
# --- Basic Weaviate Connection ---
|
|
220
|
+
WEAVIATE_HOST=localhost
|
|
221
|
+
WEAVIATE_PORT=8080
|
|
222
|
+
WEAVIATE_GRPC_PORT=50051
|
|
223
|
+
|
|
224
|
+
# --- [Strategy 2] OpenAI Client Config ---
|
|
225
|
+
VECTORIZER="openai_client"
|
|
226
|
+
|
|
227
|
+
# [Required] You must enter a valid OpenAI API key.
|
|
228
|
+
OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
|
229
|
+
|
|
230
|
+
# (HF_MODEL_NAME is not used in this mode)
|
|
231
|
+
HF_MODEL_NAME=...
|
|
232
|
+
|
|
233
|
+
# --- [Advanced] Custom Properties ---
|
|
234
|
+
CUSTOM_PROPERTIES_FILE_PATH=.weaviate_properties
|
|
235
|
+
RUN_ID=test-run-001
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
#### Example 3: Using `weaviate_module` (Docker Delegate)
|
|
239
|
+
|
|
240
|
+
Delegates vectorization to the Weaviate Docker container instead of Python. (See `vw_docker.yml` config).
|
|
241
|
+
|
|
242
|
+
```ini
|
|
243
|
+
# .env (Delegating to Weaviate Module)
|
|
244
|
+
# --- Basic Weaviate Connection ---
|
|
245
|
+
WEAVIATE_HOST=localhost
|
|
246
|
+
WEAVIATE_PORT=8080
|
|
247
|
+
WEAVIATE_GRPC_PORT=50051
|
|
248
|
+
|
|
249
|
+
# --- [Strategy 3] Weaviate Module Config ---
|
|
250
|
+
VECTORIZER="weaviate_module"
|
|
251
|
+
WEAVIATE_VECTORIZER_MODULE=text2vec-openai
|
|
252
|
+
WEAVIATE_GENERATIVE_MODULE=generative-openai
|
|
253
|
+
|
|
254
|
+
# [Required] The Weaviate container will read this API key.
|
|
255
|
+
OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
|
256
|
+
|
|
257
|
+
# --- [Advanced] Custom Properties ---
|
|
258
|
+
CUSTOM_PROPERTIES_FILE_PATH=.weaviate_properties
|
|
259
|
+
RUN_ID=test-run-001
|
|
260
|
+
```
|
|
261
|
+
|
|
262
|
+
-----
|
|
263
|
+
|
|
264
|
+
### Custom Properties and Dynamic Execution Tagging
|
|
265
|
+
|
|
266
|
+
VectorWave can store user-defined metadata in addition to static data (function definitions) and dynamic data (execution logs). This works in two steps.
|
|
267
|
+
|
|
268
|
+
#### Step 1: Define Custom Schema (Tag "Allow-list")
|
|
269
|
+
|
|
270
|
+
Create a JSON file at the path specified by `CUSTOM_PROPERTIES_FILE_PATH` in your `.env` file (default: `.weaviate_properties`).
|
|
271
|
+
|
|
272
|
+
This file instructs VectorWave to add **new properties (columns)** to the Weaviate collections. This file acts as an **"allow-list"** for all custom tags.
|
|
273
|
+
|
|
274
|
+
**`.weaviate_properties` Example:**
|
|
275
|
+
|
|
276
|
+
```json
|
|
277
|
+
{
|
|
278
|
+
"run_id": {
|
|
279
|
+
"data_type": "TEXT",
|
|
280
|
+
"description": "The ID of the specific test run"
|
|
281
|
+
},
|
|
282
|
+
"experiment_id": {
|
|
283
|
+
"data_type": "TEXT",
|
|
284
|
+
"description": "Identifier for the experiment"
|
|
285
|
+
},
|
|
286
|
+
"team": {
|
|
287
|
+
"data_type": "TEXT",
|
|
288
|
+
"description": "The team responsible for this function"
|
|
289
|
+
},
|
|
290
|
+
"priority": {
|
|
291
|
+
"data_type": "INT",
|
|
292
|
+
"description": "Execution priority level"
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
* This definition will add `run_id`, `experiment_id`, `team`, and `priority` properties to both the `VectorWaveFunctions` and `VectorWaveExecutions` collections.
|
|
298
|
+
|
|
299
|
+
#### Step 2: Dynamic Execution Tagging (Adding Values)
|
|
300
|
+
|
|
301
|
+
When a function is executed, VectorWave adds tags to the `VectorWaveExecutions` log. These tags are collected and merged from two sources.
|
|
302
|
+
|
|
303
|
+
**1. Global Tags (Environment Variables)**
|
|
304
|
+
VectorWave looks for environment variables matching the **UPPERCASE name** of the keys defined in Step 1 (e.g., `RUN_ID`, `EXPERIMENT_ID`). Found values are loaded as `global_custom_values` and added to *all* execution logs. Ideal for run-wide metadata.
|
|
305
|
+
|
|
306
|
+
**2. Function-Specific Tags (Decorator)**
|
|
307
|
+
You can pass tags as keyword arguments (`**execution_tags`) directly to the `@vectorize` decorator. Ideal for function-specific metadata.
|
|
308
|
+
|
|
309
|
+
```python
|
|
310
|
+
# --- .env file ---
|
|
311
|
+
# RUN_ID=global-run-abc
|
|
312
|
+
# TEAM=default-team
|
|
313
|
+
|
|
314
|
+
@vectorize(
|
|
315
|
+
search_description="Process payment",
|
|
316
|
+
sequence_narrative="...",
|
|
317
|
+
team="billing", # <-- Function-specific tag
|
|
318
|
+
priority=1 # <-- Function-specific tag
|
|
319
|
+
)
|
|
320
|
+
def process_payment():
|
|
321
|
+
pass
|
|
322
|
+
|
|
323
|
+
@vectorize(
|
|
324
|
+
search_description="Another function",
|
|
325
|
+
sequence_narrative="...",
|
|
326
|
+
run_id="override-run-xyz" # <-- Overrides the global tag
|
|
327
|
+
)
|
|
328
|
+
def other_function():
|
|
329
|
+
pass
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
**Tag Merging and Validation Rules**
|
|
333
|
+
|
|
334
|
+
1. **Validation (Important):** Tags (global or function-specific) will **only** be saved to Weaviate if their key (e.g., `run_id`, `team`, `priority`) was first defined in the `.weaviate_properties` file (Step 1). Tags not defined in the schema are **ignored**, and a warning is printed at script startup.
|
|
335
|
+
|
|
336
|
+
2. **Priority (Override):** If a tag key is defined in both places (e.g., global `RUN_ID` in `.env` and `run_id="override-xyz"` in the decorator), the **function-specific tag from the decorator always wins**.
|
|
337
|
+
|
|
338
|
+
**Resulting Logs:**
|
|
339
|
+
|
|
340
|
+
* `process_payment()` execution log: `{"run_id": "global-run-abc", "team": "billing", "priority": 1}`
|
|
341
|
+
* `other_function()` execution log: `{"run_id": "override-run-xyz", "team": "default-team"}`
|
|
342
|
+
|
|
343
|
+
-----
|
|
344
|
+
|
|
345
|
+
## 🤝 Contributing
|
|
346
|
+
|
|
347
|
+
All forms of contribution are welcome, including bug reports, feature requests, and code contributions. For details, please refer to [CONTRIBUTING.md](https://www.google.com/search?q=httpsS://www.google.com/search%3Fq%3DCONTRIBUTING.md).
|
|
348
|
+
|
|
349
|
+
## 📜 License
|
|
350
|
+
|
|
351
|
+
This project is distributed under the MIT License. See the [LICENSE](https://www.google.com/search?q=httpsS://www.google.com/search%3Fq%3DLICENSE) file for details.
|
|
352
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
tests/batch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
tests/batch/test_batch.py,sha256=Urv7loLV5OZJoe7sU-NcWtR-ALLG_gw_5jLcoXWV5Sk,3336
|
|
4
|
+
tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
tests/core/test_decorator.py,sha256=uLlqBoGJ83Emu5qCdBmhvR12jd5tfDhWUzfCchgL6l8,12852
|
|
6
|
+
tests/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
tests/database/test_db.py,sha256=5SAiYqXa4OFAQmtStmni-wWsnczYHwGlrY5hweXspM0,16708
|
|
8
|
+
tests/database/test_db_search.py,sha256=fM75zvHhJ06meMbRN_T9cMoXVdcFaC5dwzjHu0nc3-g,5456
|
|
9
|
+
tests/exception/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
tests/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
+
tests/models/test_db_config.py,sha256=IfP8yLMdf6Im8c2YAEBiwOnOYF4yCMC55gsVOIwfz24,5149
|
|
12
|
+
tests/monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
tests/monitoring/test_tracer.py,sha256=alhHfMBTYJ-ECOYj_eB_OHO_lE0WjwkTM92g_VYtUyk,6384
|
|
14
|
+
tests/prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
tests/vectorizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
vectorwave/__init__.py,sha256=6EC-i4Ty_H_mZb4ZdsyryQpj_uobRf84mp8fPMNui3w,318
|
|
17
|
+
vectorwave/batch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
vectorwave/batch/batch.py,sha256=TGXrvRTAH-1dEU-sRaXqLi7w_4qyoLlr7VsAzf32g3M,2353
|
|
19
|
+
vectorwave/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
vectorwave/core/core.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
vectorwave/core/decorator.py,sha256=K3BKHuKeP8oCQEz86_W_idMGl0TzuZ68o2ZG1FBl6TA,4648
|
|
22
|
+
vectorwave/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
+
vectorwave/database/db.py,sha256=ll5YfE3Ts3rzbiwW-f8FbU9OvPE1mKoN8dKj3F53Sfk,12021
|
|
24
|
+
vectorwave/database/db_search.py,sha256=R7jONkMipHoTQwIh0uBH6O1WXxak3ow4ZIEwEgD54Io,4124
|
|
25
|
+
vectorwave/exception/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
+
vectorwave/exception/exceptions.py,sha256=A7NuY3NyctYlQRtDhU4uvDjdSWormddf-Pk-kR0vMsQ,592
|
|
27
|
+
vectorwave/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
vectorwave/models/db_config.py,sha256=brcwPkrs7LY3J9BfXWZsm_rZyeUNlGgBCbOM5m1DIs8,3402
|
|
29
|
+
vectorwave/monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
+
vectorwave/monitoring/monitoring.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
vectorwave/monitoring/tracer.py,sha256=cRKy8pNXKkaBgB2Hwr7UGNfgBC2kEqQ2Xil_5i4Hi1E,4524
|
|
32
|
+
vectorwave/prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
|
+
vectorwave/prediction/predictor.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
|
+
vectorwave/vectorizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
vectorwave/vectorizer/base.py,sha256=rKGnNgSBo53KUY2KstTMZBHBaUFfphdTKV9FNqTk0mc,268
|
|
36
|
+
vectorwave/vectorizer/factory.py,sha256=CQLiowI1jtAgCsf7WI5DfUtZ9LwNoJJQ4mwe0--7fok,1962
|
|
37
|
+
vectorwave/vectorizer/huggingface_vectorizer.py,sha256=3rIN19gHloNNv0Ex69mZivg5oxftUYCNAYqI3cnXdUA,1435
|
|
38
|
+
vectorwave/vectorizer/openai_vectorizer.py,sha256=unX6r-8XEFy1aLJqJyJYIVJgD9X7Ho8lgbNnGgKVujk,1363
|
|
39
|
+
vectorwave-0.1.3.dist-info/licenses/LICENSE,sha256=FOpD5rUxH-F-Dcf4NsQeGs_VDKnpKQFM-NHS-E1UTw8,1069
|
|
40
|
+
vectorwave-0.1.3.dist-info/licenses/NOTICE,sha256=I24P0Y4XwtWaKgt-nFANdZp6PwYkEeNZyAtfcJ9JAHg,1324
|
|
41
|
+
vectorwave-0.1.3.dist-info/METADATA,sha256=eTAJU_0a4tDdShW48o_ZCO34n5gQLsQaakHPBHrGuxQ,13269
|
|
42
|
+
vectorwave-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
43
|
+
vectorwave-0.1.3.dist-info/top_level.txt,sha256=Nce880E2pZRKbWF9XpB-FT5iZL9ETMDqSUVKF-_0m78,17
|
|
44
|
+
vectorwave-0.1.3.dist-info/RECORD,,
|