vectorwave 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. tests/__init__.py +0 -0
  2. tests/batch/__init__.py +0 -0
  3. tests/batch/test_batch.py +98 -0
  4. tests/core/__init__.py +0 -0
  5. tests/core/test_decorator.py +345 -0
  6. tests/database/__init__.py +0 -0
  7. tests/database/test_db.py +468 -0
  8. tests/database/test_db_search.py +163 -0
  9. tests/exception/__init__.py +0 -0
  10. tests/models/__init__.py +0 -0
  11. tests/models/test_db_config.py +152 -0
  12. tests/monitoring/__init__.py +0 -0
  13. tests/monitoring/test_tracer.py +202 -0
  14. tests/prediction/__init__.py +0 -0
  15. tests/vectorizer/__init__.py +0 -0
  16. vectorwave/__init__.py +13 -0
  17. vectorwave/batch/__init__.py +0 -0
  18. vectorwave/batch/batch.py +68 -0
  19. vectorwave/core/__init__.py +0 -0
  20. vectorwave/core/core.py +0 -0
  21. vectorwave/core/decorator.py +131 -0
  22. vectorwave/database/__init__.py +0 -0
  23. vectorwave/database/db.py +328 -0
  24. vectorwave/database/db_search.py +122 -0
  25. vectorwave/exception/__init__.py +0 -0
  26. vectorwave/exception/exceptions.py +22 -0
  27. vectorwave/models/__init__.py +0 -0
  28. vectorwave/models/db_config.py +92 -0
  29. vectorwave/monitoring/__init__.py +0 -0
  30. vectorwave/monitoring/monitoring.py +0 -0
  31. vectorwave/monitoring/tracer.py +131 -0
  32. vectorwave/prediction/__init__.py +0 -0
  33. vectorwave/prediction/predictor.py +0 -0
  34. vectorwave/vectorizer/__init__.py +0 -0
  35. vectorwave/vectorizer/base.py +12 -0
  36. vectorwave/vectorizer/factory.py +49 -0
  37. vectorwave/vectorizer/huggingface_vectorizer.py +33 -0
  38. vectorwave/vectorizer/openai_vectorizer.py +35 -0
  39. vectorwave-0.1.3.dist-info/METADATA +352 -0
  40. vectorwave-0.1.3.dist-info/RECORD +44 -0
  41. vectorwave-0.1.3.dist-info/WHEEL +5 -0
  42. vectorwave-0.1.3.dist-info/licenses/LICENSE +21 -0
  43. vectorwave-0.1.3.dist-info/licenses/NOTICE +31 -0
  44. vectorwave-0.1.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,131 @@
1
+ import logging
2
+ import inspect
3
+ import time
4
+ import traceback
5
+ from functools import wraps
6
+ from contextvars import ContextVar
7
+ from typing import Optional, List, Dict, Any, Callable
8
+ from uuid import uuid4
9
+ from datetime import datetime, timezone
10
+
11
+ from ..batch.batch import get_batch_manager
12
+ from ..models.db_config import get_weaviate_settings, WeaviateSettings
13
+
14
+ # Create module-level logger
15
+ logger = logging.getLogger(__name__)
16
+
17
+ class TraceCollector:
18
+ def __init__(self, trace_id: str):
19
+ self.trace_id = trace_id
20
+ self.settings: WeaviateSettings = get_weaviate_settings()
21
+ self.batch = get_batch_manager()
22
+
23
+
24
+ current_tracer_var: ContextVar[Optional[TraceCollector]] = ContextVar('current_tracer', default=None)
25
+
26
+
27
+ def trace_root() -> Callable:
28
+ """
29
+ Decorator factory for the workflow's entry point function.
30
+ Creates and sets the TraceCollector in ContextVar.
31
+ """
32
+
33
+ def decorator(func: Callable) -> Callable:
34
+ @wraps(func)
35
+ def wrapper(*args, **kwargs):
36
+ if current_tracer_var.get() is not None:
37
+ return func(*args, **kwargs)
38
+
39
+ trace_id = kwargs.pop('trace_id', str(uuid4()))
40
+ tracer = TraceCollector(trace_id=trace_id)
41
+ token = current_tracer_var.set(tracer)
42
+
43
+ try:
44
+ # ⭐️ Key: Here, func is the wrapper of @trace_span.
45
+ return func(*args, **kwargs)
46
+ finally:
47
+ current_tracer_var.reset(token)
48
+
49
+ return wrapper
50
+
51
+ return decorator
52
+
53
+
54
+ def trace_span(
55
+ _func: Optional[Callable] = None,
56
+ *,
57
+ attributes_to_capture: Optional[List[str]] = None
58
+ ) -> Callable:
59
+ """
60
+ Decorator to capture function execution as a 'span'.
61
+ Can be used as @trace_span or @trace_span(attributes_to_capture=[...]).
62
+ """
63
+
64
+ def decorator(func: Callable) -> Callable:
65
+ @wraps(func)
66
+ def wrapper(*args, **kwargs):
67
+ tracer = current_tracer_var.get()
68
+ if not tracer:
69
+ return func(*args, **kwargs)
70
+
71
+ start_time = time.perf_counter()
72
+ status = "SUCCESS"
73
+ error_msg = None
74
+ result = None
75
+
76
+ captured_attributes = {}
77
+ if attributes_to_capture:
78
+ try:
79
+ # Directly checks the kwargs dictionary.
80
+ for attr_name in attributes_to_capture:
81
+ if attr_name in kwargs:
82
+ value = kwargs[attr_name]
83
+ if not isinstance(value, (str, int, float, bool, list, dict, type(None))):
84
+ value = str(value)
85
+ captured_attributes[attr_name] = value
86
+ except Exception as e:
87
+ logger.warning("Failed to capture attributes for '%s': %s", func.__name__, e)
88
+
89
+ try:
90
+ result = func(*args, **kwargs)
91
+ except Exception as e:
92
+ status = "ERROR"
93
+ error_msg = traceback.format_exc()
94
+ raise e
95
+ finally:
96
+ duration_ms = (time.perf_counter() - start_time) * 1000
97
+
98
+ span_properties = {
99
+ "trace_id": tracer.trace_id,
100
+ "span_id": str(uuid4()),
101
+ "function_name": func.__name__,
102
+ "timestamp_utc": datetime.now(timezone.utc).isoformat(),
103
+ "duration_ms": duration_ms,
104
+ "status": status,
105
+ "error_message": error_msg,
106
+ }
107
+
108
+ # 1. Apply global tags first.
109
+ if tracer.settings.global_custom_values:
110
+ span_properties.update(tracer.settings.global_custom_values)
111
+
112
+ # 2. Apply captured attributes second (overriding global values if necessary).
113
+ # (If 'run_id' was captured, this value (e.g., override-run-xyz) overrides the global value.)
114
+ span_properties.update(captured_attributes)
115
+
116
+ try:
117
+ tracer.batch.add_object(
118
+ collection=tracer.settings.EXECUTION_COLLECTION_NAME,
119
+ properties=span_properties
120
+ )
121
+ except Exception as e:
122
+ logger.error("Failed to log span for '%s' (trace_id: %s): %s", func.__name__, tracer.trace_id, e)
123
+
124
+ return result
125
+
126
+ return wrapper
127
+
128
+ if _func is None:
129
+ return decorator
130
+ else:
131
+ return decorator(_func)
File without changes
File without changes
File without changes
@@ -0,0 +1,12 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List
3
+
4
+ class BaseVectorizer(ABC):
5
+
6
+ @abstractmethod
7
+ def embed(self, text: str) -> List[float]:
8
+ pass
9
+
10
+ @abstractmethod
11
+ def embed_batch(self, texts: List[str]) -> List[List[float]]:
12
+ pass
@@ -0,0 +1,49 @@
1
+ # [NEW] File: src/vectorwave/vectorizer/factory.py
2
+ from functools import lru_cache
3
+ from typing import Optional
4
+
5
+ from ..models.db_config import get_weaviate_settings, WeaviateSettings
6
+ from .base import BaseVectorizer
7
+ from .huggingface_vectorizer import HuggingFaceVectorizer
8
+ from .openai_vectorizer import OpenAIVectorizer
9
+
10
+ @lru_cache()
11
+ def get_vectorizer() -> Optional[BaseVectorizer]:
12
+ """
13
+ Reads the configuration file (.env) and returns an appropriate Python Vectorizer instance.
14
+ - "weaviate_module" or "none": Returns None as Weaviate handles processing.
15
+ - "huggingface", "openai_client": Returns the actual instance as Python handles processing.
16
+ """
17
+ settings: WeaviateSettings = get_weaviate_settings()
18
+ vectorizer_name = settings.VECTORIZER.lower()
19
+
20
+ print(f"[VectorWave] Initializing vectorizer based on setting: '{vectorizer_name}'")
21
+
22
+ if vectorizer_name == "huggingface":
23
+ try:
24
+ return HuggingFaceVectorizer(model_name=settings.HF_MODEL_NAME)
25
+ except Exception as e:
26
+ print(f"Error: Failed to initialize HuggingFaceVectorizer: {e}")
27
+ return None
28
+
29
+ elif vectorizer_name == "openai_client":
30
+ if not settings.OPENAI_API_KEY:
31
+ print("Warning: VECTORIZER='openai_client' but OPENAI_API_KEY is not set. Vectorizer disabled.")
32
+ return None
33
+ try:
34
+ return OpenAIVectorizer(api_key=settings.OPENAI_API_KEY)
35
+ except Exception as e:
36
+ print(f"Error: Failed to initialize OpenAIVectorizer: {e}")
37
+ return None
38
+
39
+ elif vectorizer_name == "weaviate_module":
40
+ print("[VectorWave] Using Weaviate's internal module for vectorization.")
41
+ return None
42
+
43
+ elif vectorizer_name == "none":
44
+ print("[VectorWave] Vectorization is disabled ('none').")
45
+ return None
46
+
47
+ else:
48
+ print(f"Warning: Unknown VECTORIZER setting: '{vectorizer_name}'. Disabling vectorizer.")
49
+ return None
@@ -0,0 +1,33 @@
1
+ from .base import BaseVectorizer
2
+ from typing import List
3
+ import os
4
+
5
+ try:
6
+ from sentence_transformers import SentenceTransformer
7
+ except ImportError:
8
+ # Warning: The 'sentence-transformers' library is not installed.
9
+ print("Warning: The 'sentence-transformers' library is not installed.")
10
+ # To use HuggingFaceVectorizer, run 'pip install sentence-transformers'.
11
+ print("To use HuggingFaceVectorizer, run 'pip install sentence-transformers'.")
12
+ SentenceTransformer = None
13
+
14
+ class HuggingFaceVectorizer(BaseVectorizer):
15
+ """[NEW] HuggingFace SentenceTransformer (Python Client) implementation"""
16
+
17
+ def __init__(self, model_name: str):
18
+ if SentenceTransformer is None:
19
+ # Could not find the 'sentence-transformers' library.
20
+ raise ImportError("Could not find the 'sentence-transformers' library.")
21
+
22
+ # Force use of CPU (can be changed to 'cuda', etc., if needed)
23
+ self.model = SentenceTransformer(model_name, device='cpu')
24
+ print(f"[VectorWave] HuggingFaceVectorizer loaded model '{model_name}' on CPU.")
25
+
26
+ def embed(self, text: str) -> List[float]:
27
+ # convert_to_numpy=True is faster on CPU
28
+ vector = self.model.encode([text], convert_to_numpy=True)[0]
29
+ return vector.tolist()
30
+
31
+ def embed_batch(self, texts: List[str]) -> List[List[float]]:
32
+ vectors = self.model.encode(texts, convert_to_numpy=True)
33
+ return vectors.tolist()
@@ -0,0 +1,35 @@
1
+ from .base import BaseVectorizer
2
+ from typing import List
3
+
4
+ try:
5
+ from openai import OpenAI
6
+ except ImportError:
7
+ # Warning: The 'openai' library is not installed.
8
+ print("Warning: The 'openai' library is not installed.")
9
+ # To use OpenAIVectorizer, run 'pip install openai'.
10
+ print("To use OpenAIVectorizer, run 'pip install openai'.")
11
+ OpenAI = None
12
+
13
+
14
+ class OpenAIVectorizer(BaseVectorizer):
15
+
16
+ def __init__(self, api_key: str, model: str = "text-embedding-3-small"):
17
+ if OpenAI is None:
18
+ # Could not find the 'openai' library.
19
+ raise ImportError("Could not find the 'openai' library.")
20
+ if not api_key:
21
+ raise ValueError("OpenAI API key is required for OpenAIVectorizer.")
22
+
23
+ self.client = OpenAI(api_key=api_key)
24
+ self.model = model
25
+ print(f"[VectorWave] OpenAIVectorizer initialized with model '{self.model}'.")
26
+
27
+ def embed(self, text: str) -> List[float]:
28
+ text = text.replace("\n", " ")
29
+ response = self.client.embeddings.create(input=[text], model=self.model)
30
+ return response.data[0].embedding
31
+
32
+ def embed_batch(self, texts: List[str]) -> List[List[float]]:
33
+ texts = [t.replace("\n", " ") for t in texts]
34
+ response = self.client.embeddings.create(input=texts, model=self.model)
35
+ return [d.embedding for d in response.data]
@@ -0,0 +1,352 @@
1
+ Metadata-Version: 2.4
2
+ Name: vectorwave
3
+ Version: 0.1.3
4
+ Summary: VectorWave: Seamless Auto-Vectorization Framework
5
+ Author-email: junyeonggim <junyeonggim5@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Repository, https://github.com/republicofgamja/vtm
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Classifier: Programming Language :: Python :: 3.13
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Intended Audience :: Developers
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ License-File: NOTICE
20
+ Requires-Dist: weaviate-client>=4.0.0
21
+ Requires-Dist: pydantic-settings>=2.0.0
22
+ Dynamic: license-file
23
+
24
+
25
+ # VectorWave: Seamless Auto-Vectorization Framework
26
+
27
+ [](https://www.google.com/search?q=LICENSE)
28
+
29
+ ## 🌟 Overview
30
+
31
+ **VectorWave** is an innovative framework that uses a **decorator** to automatically save and manage the output of Python functions/methods in a **Vector Database (Vector DB)**. Developers can convert function outputs into intelligent vector data with a single line of code (`@vectorize`), without worrying about the complex processes of data collection, embedding generation, or storage in a Vector DB.
32
+
33
+ ---
34
+
35
+ ## ✨ Features
36
+
37
+ * **`@vectorize` Decorator:**
38
+ 1. **Static Data Collection:** Saves the function's source code, docstring, and metadata to the `VectorWaveFunctions` collection once when the script is loaded.
39
+ 2. **Dynamic Data Logging:** Records the execution time, success/failure status, error logs, and 'dynamic tags' to the `VectorWaveExecutions` collection every time the function is called.
40
+ * **Distributed Tracing:** By combining the `@vectorize` and `@trace_span` decorators, you can analyze the execution of complex multi-step workflows, grouped under a single **`trace_id`**.
41
+ * **Search Interface:** Provides `search_functions` (for vector search) and `search_executions` (for log filtering) to facilitate the construction of RAG and monitoring systems.
42
+
43
+ ---
44
+
45
+ ## 🚀 Usage
46
+
47
+ VectorWave consists of 'storing' via decorators and 'searching' via functions, and now includes **execution flow tracing**.
48
+
49
+ ### 1. (Required) Initialize the Database and Configuration
50
+
51
+ ```python
52
+ import time
53
+ from vectorwave import (
54
+ vectorize,
55
+ initialize_database,
56
+ search_functions,
57
+ search_executions
58
+ )
59
+ # [ADDITION] Import trace_span separately for distributed tracing.
60
+ from vectorwave.monitoring.tracer import trace_span
61
+
62
+ # This only needs to be called once when the script starts.
63
+ try:
64
+ client = initialize_database()
65
+ print("VectorWave DB initialized successfully.")
66
+ except Exception as e:
67
+ print(f"DB initialization failed: {e}")
68
+ exit()
69
+ ````
70
+
71
+ ### 2\. [Store] Use `@vectorize` with Distributed Tracing
72
+
73
+ The `@vectorize` acts as the **Root** for tracing, and `@trace_span` is used on internal functions to group the execution flow under a single `trace_id`.
74
+
75
+ ```python
76
+ # --- Child Span Function: Captures arguments ---
77
+ @trace_span(attributes_to_capture=['user_id', 'amount'])
78
+ def step_1_validate_payment(user_id: str, amount: int):
79
+ """(Span) Payment validation. Records user_id and amount in the log."""
80
+ print(f" [SPAN 1] Validating payment for {user_id}...")
81
+ time.sleep(0.1)
82
+ return True
83
+
84
+ @trace_span(attributes_to_capture=['user_id', 'receipt_id'])
85
+ def step_2_send_receipt(user_id: str, receipt_id: str):
86
+ """(Span) Sends the receipt."""
87
+ print(f" [SPAN 2] Sending receipt {receipt_id}...")
88
+ time.sleep(0.2)
89
+
90
+
91
+ # --- Root Function (@trace_root role) ---
92
+ @vectorize(
93
+ search_description="Charges a user in the payment system.",
94
+ sequence_narrative="Returns a receipt ID upon successful payment.",
95
+ team="billing", # <-- Custom Tag (recorded in all execution logs)
96
+ priority=1 # <-- Custom Tag (execution priority)
97
+ )
98
+ def process_payment(user_id: str, amount: int):
99
+ """(Root Span) Executes the user payment workflow."""
100
+ print(f" [ROOT EXEC] process_payment: Starting workflow for {user_id}...")
101
+
102
+ # When calling child functions, the same trace_id is automatically inherited via ContextVar.
103
+ step_1_validate_payment(user_id=user_id, amount=amount)
104
+
105
+ receipt_id = f"receipt_{user_id}_{amount}"
106
+ step_2_send_receipt(user_id=user_id, receipt_id=receipt_id)
107
+
108
+ print(f" [ROOT DONE] process_payment")
109
+ return {"status": "success", "receipt_id": receipt_id}
110
+
111
+ # --- Execute the Function ---
112
+ print("Now calling 'process_payment'...")
113
+ # This single call records 3 execution logs (spans) in the DB,
114
+ # all grouped under one 'trace_id'.
115
+ process_payment("user_789", 5000)
116
+ ```
117
+
118
+ ### 3\. [Search ①] Function Definition Search (for RAG)
119
+
120
+ ```python
121
+ # Search for functions related to 'payment' using natural language (vector search).
122
+ print("\n--- Searching for 'payment' functions ---")
123
+ payment_funcs = search_functions(
124
+ query="user payment processing",
125
+ limit=3
126
+ )
127
+ for func in payment_funcs:
128
+ print(f" - Function: {func['properties']['function_name']}")
129
+ print(f" - Description: {func['properties']['search_description']}")
130
+ print(f" - Similarity (Distance): {func['metadata'].distance:.4f}")
131
+ ```
132
+
133
+ ### 4\. [Search ②] Execution Log Search (Monitoring and Tracing)
134
+
135
+ The `search_executions` function can now search for all related execution logs (spans) based on the `trace_id`.
136
+
137
+ ```python
138
+ # 1. Find the Trace ID of a specific workflow (process_payment).
139
+ latest_payment_span = search_executions(
140
+ limit=1,
141
+ filters={"function_name": "process_payment"},
142
+ sort_by="timestamp_utc",
143
+ sort_ascending=False
144
+ )
145
+ trace_id = latest_payment_span[0]["trace_id"]
146
+
147
+ # 2. Search all spans belonging to that Trace ID, sorted chronologically.
148
+ print(f"\n--- Full Trace for ID ({trace_id[:8]}...) ---")
149
+ trace_spans = search_executions(
150
+ limit=10,
151
+ filters={"trace_id": trace_id},
152
+ sort_by="timestamp_utc",
153
+ sort_ascending=True # Ascending sort for workflow flow analysis
154
+ )
155
+
156
+ for i, span in enumerate(trace_spans):
157
+ print(f" - [Span {i+1}] {span['function_name']} ({span['duration_ms']:.2f}ms)")
158
+ # Captured arguments (user_id, amount, etc.) are displayed for the child spans.
159
+
160
+ # Example Output:
161
+ # - [Span 1] step_1_validate_payment (100.81ms)
162
+ # - [Span 2] step_2_send_receipt (202.06ms)
163
+ # - [Span 3] process_payment (333.18ms)
164
+ ```
165
+ -----
166
+
167
+ ## ⚙️ Configuration
168
+
169
+ VectorWave automatically reads Weaviate database connection info and **vectorization strategy** from **environment variables** or a `.env` file.
170
+
171
+ Create a `.env` file in your project's root directory (e.g., where `test_ex/example.py` is located) and set the required values.
172
+
173
+ ### Vectorizer Strategy (VECTORIZER)
174
+
175
+ You can select the text vectorization method via the `VECTORIZER` environment variable in your `test_ex/.env` file.
176
+
177
+ | `VECTORIZER` Setting | Description | Required Additional Settings |
178
+ | :--- | :--- | :--- |
179
+ | **`huggingface`** | (Default Recommended) Uses the `sentence-transformers` library to vectorize on your local CPU. No API key is needed, making it great for immediate testing. | `HF_MODEL_NAME` (e.g., "sentence-transformers/all-MiniLM-L6-v2") |
180
+ | **`openai_client`** | (High-Performance) Uses the OpenAI Python client to vectorize with modern models like `text-embedding-3-small`. | `OPENAI_API_KEY` (A valid OpenAI API key) |
181
+ | **`weaviate_module`** | (Docker Delegate) Delegates the vectorization task to the Weaviate container's built-in module (e.g., `text2vec-openai`). | `WEAVIATE_VECTORIZER_MODULE`, `OPENAI_API_KEY` |
182
+ | **`none`** | Disables vectorization. Data will be stored without vectors. | None |
183
+
184
+ -----
185
+
186
+ ### .env File Examples
187
+
188
+ Configure your `.env` file according to the strategy you want to use.
189
+
190
+ #### Example 1: Using `huggingface` (Local, No API Key)
191
+
192
+ Uses a `sentence-transformers` model on your local machine. Ideal for testing without API keys.
193
+
194
+ ```ini
195
+ # .env (Using HuggingFace)
196
+ # --- Basic Weaviate Connection ---
197
+ WEAVIATE_HOST=localhost
198
+ WEAVIATE_PORT=8080
199
+ WEAVIATE_GRPC_PORT=50051
200
+
201
+ # --- [Strategy 1] HuggingFace Config ---
202
+ VECTORIZER="huggingface"
203
+ HF_MODEL_NAME="sentence-transformers/all-MiniLM-L6-v2"
204
+
205
+ # (OPENAI_API_KEY is not required for this mode)
206
+ OPENAI_API_KEY=sk-...
207
+
208
+ # --- [Advanced] Custom Properties ---
209
+ CUSTOM_PROPERTIES_FILE_PATH=.weaviate_properties
210
+ RUN_ID=test-run-001
211
+ ```
212
+
213
+ #### Example 2: Using `openai_client` (Python Client, High-Performance)
214
+
215
+ Directly calls the OpenAI API via the `openai` Python library.
216
+
217
+ ```ini
218
+ # .env (Using OpenAI Python Client)
219
+ # --- Basic Weaviate Connection ---
220
+ WEAVIATE_HOST=localhost
221
+ WEAVIATE_PORT=8080
222
+ WEAVIATE_GRPC_PORT=50051
223
+
224
+ # --- [Strategy 2] OpenAI Client Config ---
225
+ VECTORIZER="openai_client"
226
+
227
+ # [Required] You must enter a valid OpenAI API key.
228
+ OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
229
+
230
+ # (HF_MODEL_NAME is not used in this mode)
231
+ HF_MODEL_NAME=...
232
+
233
+ # --- [Advanced] Custom Properties ---
234
+ CUSTOM_PROPERTIES_FILE_PATH=.weaviate_properties
235
+ RUN_ID=test-run-001
236
+ ```
237
+
238
+ #### Example 3: Using `weaviate_module` (Docker Delegate)
239
+
240
+ Delegates vectorization to the Weaviate Docker container instead of Python. (See `vw_docker.yml` config).
241
+
242
+ ```ini
243
+ # .env (Delegating to Weaviate Module)
244
+ # --- Basic Weaviate Connection ---
245
+ WEAVIATE_HOST=localhost
246
+ WEAVIATE_PORT=8080
247
+ WEAVIATE_GRPC_PORT=50051
248
+
249
+ # --- [Strategy 3] Weaviate Module Config ---
250
+ VECTORIZER="weaviate_module"
251
+ WEAVIATE_VECTORIZER_MODULE=text2vec-openai
252
+ WEAVIATE_GENERATIVE_MODULE=generative-openai
253
+
254
+ # [Required] The Weaviate container will read this API key.
255
+ OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
256
+
257
+ # --- [Advanced] Custom Properties ---
258
+ CUSTOM_PROPERTIES_FILE_PATH=.weaviate_properties
259
+ RUN_ID=test-run-001
260
+ ```
261
+
262
+ -----
263
+
264
+ ### Custom Properties and Dynamic Execution Tagging
265
+
266
+ VectorWave can store user-defined metadata in addition to static data (function definitions) and dynamic data (execution logs). This works in two steps.
267
+
268
+ #### Step 1: Define Custom Schema (Tag "Allow-list")
269
+
270
+ Create a JSON file at the path specified by `CUSTOM_PROPERTIES_FILE_PATH` in your `.env` file (default: `.weaviate_properties`).
271
+
272
+ This file instructs VectorWave to add **new properties (columns)** to the Weaviate collections. This file acts as an **"allow-list"** for all custom tags.
273
+
274
+ **`.weaviate_properties` Example:**
275
+
276
+ ```json
277
+ {
278
+ "run_id": {
279
+ "data_type": "TEXT",
280
+ "description": "The ID of the specific test run"
281
+ },
282
+ "experiment_id": {
283
+ "data_type": "TEXT",
284
+ "description": "Identifier for the experiment"
285
+ },
286
+ "team": {
287
+ "data_type": "TEXT",
288
+ "description": "The team responsible for this function"
289
+ },
290
+ "priority": {
291
+ "data_type": "INT",
292
+ "description": "Execution priority level"
293
+ }
294
+ }
295
+ ```
296
+
297
+ * This definition will add `run_id`, `experiment_id`, `team`, and `priority` properties to both the `VectorWaveFunctions` and `VectorWaveExecutions` collections.
298
+
299
+ #### Step 2: Dynamic Execution Tagging (Adding Values)
300
+
301
+ When a function is executed, VectorWave adds tags to the `VectorWaveExecutions` log. These tags are collected and merged from two sources.
302
+
303
+ **1. Global Tags (Environment Variables)**
304
+ VectorWave looks for environment variables matching the **UPPERCASE name** of the keys defined in Step 1 (e.g., `RUN_ID`, `EXPERIMENT_ID`). Found values are loaded as `global_custom_values` and added to *all* execution logs. Ideal for run-wide metadata.
305
+
306
+ **2. Function-Specific Tags (Decorator)**
307
+ You can pass tags as keyword arguments (`**execution_tags`) directly to the `@vectorize` decorator. Ideal for function-specific metadata.
308
+
309
+ ```python
310
+ # --- .env file ---
311
+ # RUN_ID=global-run-abc
312
+ # TEAM=default-team
313
+
314
+ @vectorize(
315
+ search_description="Process payment",
316
+ sequence_narrative="...",
317
+ team="billing", # <-- Function-specific tag
318
+ priority=1 # <-- Function-specific tag
319
+ )
320
+ def process_payment():
321
+ pass
322
+
323
+ @vectorize(
324
+ search_description="Another function",
325
+ sequence_narrative="...",
326
+ run_id="override-run-xyz" # <-- Overrides the global tag
327
+ )
328
+ def other_function():
329
+ pass
330
+ ```
331
+
332
+ **Tag Merging and Validation Rules**
333
+
334
+ 1. **Validation (Important):** Tags (global or function-specific) will **only** be saved to Weaviate if their key (e.g., `run_id`, `team`, `priority`) was first defined in the `.weaviate_properties` file (Step 1). Tags not defined in the schema are **ignored**, and a warning is printed at script startup.
335
+
336
+ 2. **Priority (Override):** If a tag key is defined in both places (e.g., global `RUN_ID` in `.env` and `run_id="override-xyz"` in the decorator), the **function-specific tag from the decorator always wins**.
337
+
338
+ **Resulting Logs:**
339
+
340
+ * `process_payment()` execution log: `{"run_id": "global-run-abc", "team": "billing", "priority": 1}`
341
+ * `other_function()` execution log: `{"run_id": "override-run-xyz", "team": "default-team"}`
342
+
343
+ -----
344
+
345
+ ## 🤝 Contributing
346
+
347
+ All forms of contribution are welcome, including bug reports, feature requests, and code contributions. For details, please refer to [CONTRIBUTING.md](https://www.google.com/search?q=httpsS://www.google.com/search%3Fq%3DCONTRIBUTING.md).
348
+
349
+ ## 📜 License
350
+
351
+ This project is distributed under the MIT License. See the [LICENSE](https://www.google.com/search?q=httpsS://www.google.com/search%3Fq%3DLICENSE) file for details.
352
+
@@ -0,0 +1,44 @@
1
+ tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ tests/batch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ tests/batch/test_batch.py,sha256=Urv7loLV5OZJoe7sU-NcWtR-ALLG_gw_5jLcoXWV5Sk,3336
4
+ tests/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ tests/core/test_decorator.py,sha256=uLlqBoGJ83Emu5qCdBmhvR12jd5tfDhWUzfCchgL6l8,12852
6
+ tests/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ tests/database/test_db.py,sha256=5SAiYqXa4OFAQmtStmni-wWsnczYHwGlrY5hweXspM0,16708
8
+ tests/database/test_db_search.py,sha256=fM75zvHhJ06meMbRN_T9cMoXVdcFaC5dwzjHu0nc3-g,5456
9
+ tests/exception/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
+ tests/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ tests/models/test_db_config.py,sha256=IfP8yLMdf6Im8c2YAEBiwOnOYF4yCMC55gsVOIwfz24,5149
12
+ tests/monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ tests/monitoring/test_tracer.py,sha256=alhHfMBTYJ-ECOYj_eB_OHO_lE0WjwkTM92g_VYtUyk,6384
14
+ tests/prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ tests/vectorizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ vectorwave/__init__.py,sha256=6EC-i4Ty_H_mZb4ZdsyryQpj_uobRf84mp8fPMNui3w,318
17
+ vectorwave/batch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ vectorwave/batch/batch.py,sha256=TGXrvRTAH-1dEU-sRaXqLi7w_4qyoLlr7VsAzf32g3M,2353
19
+ vectorwave/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ vectorwave/core/core.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
+ vectorwave/core/decorator.py,sha256=K3BKHuKeP8oCQEz86_W_idMGl0TzuZ68o2ZG1FBl6TA,4648
22
+ vectorwave/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
+ vectorwave/database/db.py,sha256=ll5YfE3Ts3rzbiwW-f8FbU9OvPE1mKoN8dKj3F53Sfk,12021
24
+ vectorwave/database/db_search.py,sha256=R7jONkMipHoTQwIh0uBH6O1WXxak3ow4ZIEwEgD54Io,4124
25
+ vectorwave/exception/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ vectorwave/exception/exceptions.py,sha256=A7NuY3NyctYlQRtDhU4uvDjdSWormddf-Pk-kR0vMsQ,592
27
+ vectorwave/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ vectorwave/models/db_config.py,sha256=brcwPkrs7LY3J9BfXWZsm_rZyeUNlGgBCbOM5m1DIs8,3402
29
+ vectorwave/monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
+ vectorwave/monitoring/monitoring.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
+ vectorwave/monitoring/tracer.py,sha256=cRKy8pNXKkaBgB2Hwr7UGNfgBC2kEqQ2Xil_5i4Hi1E,4524
32
+ vectorwave/prediction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ vectorwave/prediction/predictor.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ vectorwave/vectorizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ vectorwave/vectorizer/base.py,sha256=rKGnNgSBo53KUY2KstTMZBHBaUFfphdTKV9FNqTk0mc,268
36
+ vectorwave/vectorizer/factory.py,sha256=CQLiowI1jtAgCsf7WI5DfUtZ9LwNoJJQ4mwe0--7fok,1962
37
+ vectorwave/vectorizer/huggingface_vectorizer.py,sha256=3rIN19gHloNNv0Ex69mZivg5oxftUYCNAYqI3cnXdUA,1435
38
+ vectorwave/vectorizer/openai_vectorizer.py,sha256=unX6r-8XEFy1aLJqJyJYIVJgD9X7Ho8lgbNnGgKVujk,1363
39
+ vectorwave-0.1.3.dist-info/licenses/LICENSE,sha256=FOpD5rUxH-F-Dcf4NsQeGs_VDKnpKQFM-NHS-E1UTw8,1069
40
+ vectorwave-0.1.3.dist-info/licenses/NOTICE,sha256=I24P0Y4XwtWaKgt-nFANdZp6PwYkEeNZyAtfcJ9JAHg,1324
41
+ vectorwave-0.1.3.dist-info/METADATA,sha256=eTAJU_0a4tDdShW48o_ZCO34n5gQLsQaakHPBHrGuxQ,13269
42
+ vectorwave-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
43
+ vectorwave-0.1.3.dist-info/top_level.txt,sha256=Nce880E2pZRKbWF9XpB-FT5iZL9ETMDqSUVKF-_0m78,17
44
+ vectorwave-0.1.3.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+