vectorwave 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. tests/__init__.py +0 -0
  2. tests/batch/__init__.py +0 -0
  3. tests/batch/test_batch.py +98 -0
  4. tests/core/__init__.py +0 -0
  5. tests/core/test_decorator.py +345 -0
  6. tests/database/__init__.py +0 -0
  7. tests/database/test_db.py +468 -0
  8. tests/database/test_db_search.py +163 -0
  9. tests/exception/__init__.py +0 -0
  10. tests/models/__init__.py +0 -0
  11. tests/models/test_db_config.py +152 -0
  12. tests/monitoring/__init__.py +0 -0
  13. tests/monitoring/test_tracer.py +202 -0
  14. tests/prediction/__init__.py +0 -0
  15. tests/vectorizer/__init__.py +0 -0
  16. vectorwave/__init__.py +13 -0
  17. vectorwave/batch/__init__.py +0 -0
  18. vectorwave/batch/batch.py +68 -0
  19. vectorwave/core/__init__.py +0 -0
  20. vectorwave/core/core.py +0 -0
  21. vectorwave/core/decorator.py +131 -0
  22. vectorwave/database/__init__.py +0 -0
  23. vectorwave/database/db.py +328 -0
  24. vectorwave/database/db_search.py +122 -0
  25. vectorwave/exception/__init__.py +0 -0
  26. vectorwave/exception/exceptions.py +22 -0
  27. vectorwave/models/__init__.py +0 -0
  28. vectorwave/models/db_config.py +92 -0
  29. vectorwave/monitoring/__init__.py +0 -0
  30. vectorwave/monitoring/monitoring.py +0 -0
  31. vectorwave/monitoring/tracer.py +131 -0
  32. vectorwave/prediction/__init__.py +0 -0
  33. vectorwave/prediction/predictor.py +0 -0
  34. vectorwave/vectorizer/__init__.py +0 -0
  35. vectorwave/vectorizer/base.py +12 -0
  36. vectorwave/vectorizer/factory.py +49 -0
  37. vectorwave/vectorizer/huggingface_vectorizer.py +33 -0
  38. vectorwave/vectorizer/openai_vectorizer.py +35 -0
  39. vectorwave-0.1.3.dist-info/METADATA +352 -0
  40. vectorwave-0.1.3.dist-info/RECORD +44 -0
  41. vectorwave-0.1.3.dist-info/WHEEL +5 -0
  42. vectorwave-0.1.3.dist-info/licenses/LICENSE +21 -0
  43. vectorwave-0.1.3.dist-info/licenses/NOTICE +31 -0
  44. vectorwave-0.1.3.dist-info/top_level.txt +2 -0
@@ -0,0 +1,328 @@
1
+ import logging
2
+ import weaviate
3
+ import weaviate.classes.config as wvc # (wvc = Weaviate Classes Config)
4
+ import weaviate.config as wvc_config
5
+ from weaviate.config import AdditionalConfig
6
+ from vectorwave.models.db_config import WeaviateSettings
7
+ from vectorwave.exception.exceptions import (
8
+ WeaviateConnectionError,
9
+ WeaviateNotReadyError,
10
+ SchemaCreationError
11
+ )
12
+ from functools import lru_cache
13
+ from weaviate.exceptions import WeaviateConnectionError as WeaviateClientConnectionError
14
+ from vectorwave.models.db_config import get_weaviate_settings
15
+ from vectorwave.vectorizer.factory import get_vectorizer
16
+
17
+ # Create module-level logger
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Code based on Weaviate v4 (latest) client.
21
+
22
+ def get_weaviate_client(settings: WeaviateSettings) -> weaviate.WeaviateClient:
23
+ """
24
+ Creates and returns a Weaviate client.
25
+
26
+ [Raises]
27
+ - WeaviateConnectionError: If connection fails.
28
+ - WeaviateNotReadyError: If connected, but the server is not ready.
29
+ """
30
+
31
+ client: weaviate.WeaviateClient
32
+
33
+ try:
34
+ client = weaviate.connect_to_local(
35
+ host=settings.WEAVIATE_HOST,
36
+ port=settings.WEAVIATE_PORT,
37
+ grpc_port=settings.WEAVIATE_GRPC_PORT,
38
+ additional_config=AdditionalConfig(
39
+ dynamic=True,
40
+ batch_size=20,
41
+ timeout_retries=3
42
+ )
43
+ )
44
+ except WeaviateClientConnectionError as e:
45
+ raise WeaviateConnectionError(f"Failed to connect to Weaviate: {e}")
46
+ except Exception as e:
47
+ raise WeaviateConnectionError(f"An unknown error occurred while connecting to Weaviate: {e}")
48
+
49
+ if not client.is_ready():
50
+ raise WeaviateNotReadyError("Connected to Weaviate, but the server is not ready.")
51
+
52
+ logger.info("Weaviate client connected successfully")
53
+ return client
54
+
55
+
56
+ @lru_cache()
57
+ def get_cached_client() -> weaviate.WeaviateClient:
58
+ """
59
+ Singleton factory: Gets settings and returns a single client instance.
60
+ This function IS cached.
61
+ """
62
+ logger.debug("Creating and caching new Weaviate client instance")
63
+ settings = get_weaviate_settings()
64
+ client = get_weaviate_client(settings)
65
+ return client
66
+
67
+
68
+ def create_vectorwave_schema(client: weaviate.WeaviateClient, settings: WeaviateSettings):
69
+ """
70
+ Defines and creates the VectorWaveFunctions collection schema.
71
+ Now includes custom properties loaded from the settings (via .weaviate_properties file).
72
+
73
+ [Raises]
74
+ - SchemaCreationError: If an error occurs during schema creation.
75
+ """
76
+ collection_name = settings.COLLECTION_NAME
77
+
78
+ # 1. Check if the collection already exists
79
+ if client.collections.exists(collection_name):
80
+ logger.info("Collection '%s' already exists, skipping creation", collection_name)
81
+ return client.collections.get(collection_name)
82
+
83
+ # 2. If it doesn't exist, define and create the collection
84
+ logger.info("Creating collection '%s'", collection_name)
85
+
86
+ # 3. Define Base Properties
87
+ base_properties = [
88
+ wvc.Property(
89
+ name="function_name",
90
+ data_type=wvc.DataType.TEXT,
91
+ description="The name of the vectorized function"
92
+ ),
93
+ wvc.Property(
94
+ name="module_name",
95
+ data_type=wvc.DataType.TEXT,
96
+ description="The Python module path where the function is defined"
97
+ ),
98
+ wvc.Property(
99
+ name="docstring",
100
+ data_type=wvc.DataType.TEXT,
101
+ description="The function's Docstring (description)"
102
+ ),
103
+ wvc.Property(
104
+ name="source_code",
105
+ data_type=wvc.DataType.TEXT,
106
+ description="The actual source code of the function"
107
+ ),
108
+ wvc.Property(
109
+ name="search_description",
110
+ data_type=wvc.DataType.TEXT,
111
+ description="User-provided description for similarity search (from @vectorize)"
112
+ ),
113
+ wvc.Property(
114
+ name="sequence_narrative",
115
+ data_type=wvc.DataType.TEXT,
116
+ description="User-provided context about what happens next (from @vectorize)"
117
+ ),
118
+ ]
119
+
120
+ # 4. Parse Custom Properties (loaded from JSON file via settings object)
121
+ custom_properties = []
122
+ if settings.custom_properties:
123
+ logger.info(
124
+ "Adding %d custom properties to '%s': %s",
125
+ len(settings.custom_properties),
126
+ collection_name,
127
+ list(settings.custom_properties.keys())
128
+ )
129
+
130
+ for name, prop_details in settings.custom_properties.items():
131
+ if not isinstance(prop_details, dict):
132
+ raise SchemaCreationError(f"Custom property '{name}' in config file must be a dictionary.")
133
+
134
+ # Get data_type (Required)
135
+ dtype_str = prop_details.get("data_type")
136
+ if not dtype_str:
137
+ raise SchemaCreationError(f"Custom property '{name}' in config file is missing 'data_type'.")
138
+
139
+ # Get description (Optional)
140
+ description = prop_details.get("description")
141
+
142
+ try:
143
+ # Convert string (e.g., "TEXT") to Weaviate Enum (wvc.DataType.TEXT)
144
+ data_type = getattr(wvc.DataType, dtype_str.upper())
145
+
146
+ custom_properties.append(
147
+ wvc.Property(
148
+ name=name,
149
+ data_type=data_type,
150
+ description=description
151
+ )
152
+ )
153
+ except AttributeError:
154
+ raise SchemaCreationError(
155
+ f"Invalid data_type '{dtype_str}' for custom property '{name}'. "
156
+ f"Use a valid wvc.DataType string (e.g., 'TEXT', 'INT', 'NUMBER')."
157
+ )
158
+ except Exception as e:
159
+ raise SchemaCreationError(f"Error processing custom property '{name}': {e}")
160
+
161
+ # 5. Combine properties
162
+ all_properties = base_properties + custom_properties
163
+
164
+ vector_config = None
165
+ vectorizer_name_setting = settings.VECTORIZER.lower()
166
+
167
+ logger.info("Configuring vectorizer: %s", vectorizer_name_setting)
168
+
169
+ if vectorizer_name_setting == "huggingface" or vectorizer_name_setting == "openai_client":
170
+ print(f"Python-based vectorizer ('{vectorizer_name_setting}') is active.")
171
+ print("Setting Weaviate schema vectorizer to 'none'.")
172
+ vector_config = wvc.Configure.Vectorizer.none()
173
+
174
+ elif vectorizer_name_setting == "weaviate_module":
175
+ module_name = settings.WEAVIATE_VECTORIZER_MODULE.lower()
176
+ print(f"Using Weaviate internal module: '{module_name}'")
177
+
178
+ if module_name == "text2vec-openai":
179
+ vector_config = wvc.Configure.Vectorizer.text2vec_openai(
180
+ vectorize_collection_name=settings.IS_VECTORIZE_COLLECTION_NAME
181
+ )
182
+ # (필요시 다른 Weaviate 모듈도 여기에 추가)
183
+ else:
184
+ raise SchemaCreationError(
185
+ f"Unsupported WEAVIATE_VECTORIZER_MODULE: '{module_name}'.")
186
+
187
+ elif vectorizer_name_setting == "none":
188
+ # 벡터화 비활성화
189
+ print("Vectorizer is set to 'none'.")
190
+ vector_config = wvc.Configure.Vectorizer.none()
191
+
192
+ else:
193
+ raise SchemaCreationError(
194
+ f"Invalid VECTORIZER setting: '{vectorizer_name_setting}'.")
195
+
196
+ generative_config = None
197
+ if settings.WEAVIATE_GENERATIVE_MODULE.lower() == "generative-openai":
198
+ generative_config = wvc.Configure.Generative.openai()
199
+
200
+ try:
201
+ vectorwave_collection = client.collections.create(
202
+ name=collection_name,
203
+ properties=all_properties,
204
+
205
+ # 7. Vectorizer Configuration
206
+ vector_config=vector_config,
207
+
208
+ # 8. Generative Configuration (for RAG, etc.)
209
+ generative_config=generative_config
210
+ )
211
+ return vectorwave_collection
212
+
213
+ except Exception as e:
214
+ # Raise a specific exception instead of returning None
215
+ raise SchemaCreationError(f"Error during schema creation: {e}")
216
+
217
+
218
+ def create_execution_schema(client: weaviate.WeaviateClient, settings: WeaviateSettings):
219
+ """
220
+ Defines and creates the VectorWaveExecutions (dynamic) collection schema.
221
+ """
222
+ collection_name = settings.EXECUTION_COLLECTION_NAME
223
+
224
+ if client.collections.exists(collection_name):
225
+ logger.info("Collection '%s' already exists, skipping creation", collection_name)
226
+ return client.collections.get(collection_name)
227
+
228
+ logger.info("Creating collection '%s'", collection_name)
229
+
230
+ properties = [
231
+ wvc.Property(
232
+ name="trace_id",
233
+ data_type=wvc.DataType.TEXT,
234
+ description="The unique ID for the entire trace/workflow"
235
+ ),
236
+ wvc.Property(
237
+ name="span_id",
238
+ data_type=wvc.DataType.TEXT,
239
+ description="The unique ID for this specific span/function execution"
240
+ ),
241
+ wvc.Property(
242
+ name="function_name",
243
+ data_type=wvc.DataType.TEXT,
244
+ description="Name of the executed function (span name)"
245
+ ),
246
+ wvc.Property(
247
+ name="function_uuid",
248
+ data_type=wvc.DataType.UUID,
249
+ description="The UUID of the executed function definition"
250
+ ),
251
+ wvc.Property(
252
+ name="timestamp_utc",
253
+ data_type=wvc.DataType.DATE,
254
+ description="The UTC timestamp when the execution started"
255
+ ),
256
+ wvc.Property(
257
+ name="duration_ms",
258
+ data_type=wvc.DataType.NUMBER,
259
+ description="Total execution time in milliseconds"
260
+ ),
261
+ wvc.Property(
262
+ name="status",
263
+ data_type=wvc.DataType.TEXT, # "SUCCESS" or "ERROR"
264
+ description="Execution status"
265
+ ),
266
+ wvc.Property(
267
+ name="error_message",
268
+ data_type=wvc.DataType.TEXT,
269
+ description="Error message and traceback if status is 'ERROR'"
270
+ ),
271
+ ]
272
+
273
+ if settings.custom_properties:
274
+ logger.info(
275
+ "Adding %d custom properties: %s",
276
+ len(settings.custom_properties),
277
+ list(settings.custom_properties.keys())
278
+ )
279
+ for name, prop_details in settings.custom_properties.items():
280
+ try:
281
+ if not isinstance(prop_details, dict):
282
+ raise ValueError("Property details must be a dictionary.")
283
+
284
+ dtype_str = prop_details.get("data_type")
285
+ if not dtype_str:
286
+ raise ValueError("data_type is missing.")
287
+
288
+ data_type = getattr(wvc.DataType, dtype_str.upper())
289
+ description = prop_details.get("description")
290
+
291
+ properties.append(
292
+ wvc.Property(
293
+ name=name,
294
+ data_type=data_type,
295
+ description=description
296
+ )
297
+ )
298
+ except Exception as e:
299
+ logger.warning("Skipping custom property '%s' for '%s': %s", name, collection_name, e)
300
+
301
+ try:
302
+ execution_collection = client.collections.create(
303
+ name=collection_name,
304
+ properties=properties,
305
+ vector_config=wvc.Configure.Vectorizer.none(),
306
+ # vector_index_config=wvc.Configure.VectorIndex.none()
307
+ )
308
+ logger.info("Collection '%s' created successfully", collection_name)
309
+ return execution_collection
310
+ except Exception as e:
311
+ raise SchemaCreationError(f"Error during execution schema creation: {e}")
312
+
313
+
314
+ def initialize_database():
315
+ """
316
+ Helper function to initialize both the client and the two schemas.
317
+ """
318
+ try:
319
+ settings = get_weaviate_settings()
320
+ client = get_cached_client()
321
+ if client:
322
+ create_vectorwave_schema(client, settings)
323
+ create_execution_schema(client, settings)
324
+ return client
325
+ except Exception as e:
326
+ logger.error("Failed to initialize VectorWave database: %s", e)
327
+ return None
328
+
@@ -0,0 +1,122 @@
1
+ import logging
2
+ import weaviate
3
+ import weaviate.classes as wvc
4
+ from typing import Dict, Any, Optional, List
5
+
6
+ from weaviate.collections.classes.filters import _Filters
7
+
8
+ from ..models.db_config import get_weaviate_settings, WeaviateSettings
9
+ from .db import get_cached_client
10
+ from ..exception.exceptions import WeaviateConnectionError
11
+ from ..vectorizer.factory import get_vectorizer
12
+
13
+ import uuid
14
+ from datetime import datetime
15
+
16
+ # Create module-level logger
17
+ logger = logging.getLogger(__name__)
18
+
19
+ def _build_weaviate_filters(filters: Optional[Dict[str, Any]]) -> _Filters | None:
20
+ if not filters:
21
+ return None
22
+ filter_list = [
23
+ wvc.query.Filter.by_property(key).equal(value)
24
+ for key, value in filters.items()
25
+ ]
26
+ if not filter_list:
27
+ return None
28
+ return wvc.query.Filter.all_of(filter_list)
29
+
30
+
31
+ def search_functions(query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
32
+ """
33
+ Searches function definitions from the [VectorWaveFunctions] collection using natural language (nearText).
34
+ """
35
+ try:
36
+ settings: WeaviateSettings = get_weaviate_settings()
37
+ client: weaviate.WeaviateClient = get_cached_client()
38
+
39
+ collection = client.collections.get(settings.COLLECTION_NAME)
40
+ weaviate_filter = _build_weaviate_filters(filters)
41
+
42
+ vectorizer = get_vectorizer()
43
+
44
+ if vectorizer:
45
+ print("[VectorWave] Searching with Python client (near_vector)...")
46
+ try:
47
+ query_vector = vectorizer.embed(query)
48
+ except Exception as e:
49
+ print(f"Error vectorizing query with Python client: {e}")
50
+ raise WeaviateConnectionError(f"Query vectorization failed: {e}")
51
+
52
+ response = collection.query.near_vector(
53
+ near_vector=query_vector,
54
+ limit=limit,
55
+ filters=weaviate_filter,
56
+ return_metadata=wvc.query.MetadataQuery(distance=True)
57
+ )
58
+
59
+ else:
60
+ print("[VectorWave] Searching with Weaviate module (near_text)...")
61
+ response = collection.query.near_text(
62
+ query=query,
63
+ limit=limit,
64
+ filters=weaviate_filter,
65
+ return_metadata=wvc.query.MetadataQuery(distance=True)
66
+ )
67
+
68
+ results = [
69
+ {
70
+ "properties": obj.properties,
71
+ "metadata": obj.metadata,
72
+ "uuid": obj.uuid
73
+ }
74
+ for obj in response.objects
75
+ ]
76
+ return results
77
+
78
+ except Exception as e:
79
+ logger.error("Error during Weaviate search: %s", e)
80
+ raise WeaviateConnectionError(f"Failed to execute 'search_functions': {e}")
81
+
82
+
83
+ def search_executions(
84
+ limit: int = 10,
85
+ filters: Optional[Dict[str, Any]] = None,
86
+ sort_by: Optional[str] = "timestamp_utc",
87
+ sort_ascending: bool = False
88
+ ) -> List[Dict[str, Any]]:
89
+ """
90
+ Searches execution logs from the [VectorWaveExecutions] collection using filtering and sorting.
91
+ """
92
+ try:
93
+ settings: WeaviateSettings = get_weaviate_settings()
94
+ client: weaviate.WeaviateClient = get_cached_client()
95
+
96
+ collection = client.collections.get(settings.EXECUTION_COLLECTION_NAME)
97
+ weaviate_filter = _build_weaviate_filters(filters)
98
+ weaviate_sort = None
99
+
100
+ if sort_by:
101
+ weaviate_sort = wvc.query.Sort.by_property(
102
+ name=sort_by,
103
+ ascending=sort_ascending
104
+ )
105
+
106
+ response = collection.query.fetch_objects(
107
+ limit=limit,
108
+ filters=weaviate_filter,
109
+ sort=weaviate_sort
110
+ )
111
+ results = []
112
+ for obj in response.objects:
113
+ props = obj.properties.copy()
114
+ for key, value in props.items():
115
+ if isinstance(value, uuid.UUID) or isinstance(value, datetime):
116
+ props[key] = str(value)
117
+ results.append(props)
118
+
119
+ return results
120
+
121
+ except Exception as e:
122
+ raise WeaviateConnectionError(f"Failed to execute 'search_executions': {e}")
File without changes
@@ -0,0 +1,22 @@
1
+ """
2
+ Defines custom exceptions for the VectorWave project.
3
+ """
4
+
5
+ class VectorWaveError(Exception):
6
+ """Base exception class for the VectorWave library."""
7
+ pass
8
+
9
+
10
+ class WeaviateConnectionError(VectorWaveError):
11
+ """Raised when an error occurs while attempting to connect to the Weaviate server."""
12
+ pass
13
+
14
+
15
+ class WeaviateNotReadyError(VectorWaveError):
16
+ """Raised when connected to Weaviate, but the server is not in a ready state."""
17
+ pass
18
+
19
+
20
+ class SchemaCreationError(VectorWaveError):
21
+ """Raised when an error occurs during Weaviate collection schema creation."""
22
+ pass
File without changes
@@ -0,0 +1,92 @@
1
+ import logging
2
+ from pydantic_settings import BaseSettings, SettingsConfigDict
3
+ from functools import lru_cache
4
+ from typing import Dict, Optional, Any
5
+ import json
6
+ import os
7
+
8
+ # Create module-level logger
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class WeaviateSettings(BaseSettings):
12
+ """
13
+ Manages Weaviate database connection settings.
14
+
15
+ Reads values from environment variables or a .env file.
16
+ (e.g., WEAVIATE_HOST=10.0.0.1)
17
+ """
18
+ # If environment variables are not set, these default values will be used.
19
+ WEAVIATE_HOST: str = "localhost"
20
+ WEAVIATE_PORT: int = 8080
21
+ WEAVIATE_GRPC_PORT: int = 50051
22
+ COLLECTION_NAME: str = "VectorWaveFunctions"
23
+ EXECUTION_COLLECTION_NAME: str = "VectorWaveExecutions"
24
+ IS_VECTORIZE_COLLECTION_NAME: bool = True
25
+
26
+ # "weaviate_module", "huggingface", "openai_client", "none"
27
+ VECTORIZER: str = "weaviate_module"
28
+
29
+
30
+ WEAVIATE_VECTORIZER_MODULE: str = "text2vec-openai"
31
+
32
+ WEAVIATE_GENERATIVE_MODULE: str = "generative-openai"
33
+
34
+ # [신규] Python 클라이언트용 설정
35
+ OPENAI_API_KEY: Optional[str] = None
36
+ HF_MODEL_NAME: str = "sentence-transformers/all-MiniLM-L6-v2"
37
+
38
+ CUSTOM_PROPERTIES_FILE_PATH: str = ".weaviate_properties"
39
+ custom_properties: Optional[Dict[str, Dict[str, Any]]] = None
40
+ global_custom_values: Optional[Dict[str, Any]] = None
41
+ model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8",extra='ignore')
42
+
43
+
44
+ # @lru_cache ensures this function creates the Settings object only once (Singleton pattern)
45
+ # and reuses the cached object on subsequent calls.
46
+ @lru_cache()
47
+ def get_weaviate_settings() -> WeaviateSettings:
48
+ """
49
+ Factory function that returns the settings object.
50
+ """
51
+ settings = WeaviateSettings()
52
+
53
+ file_path = settings.CUSTOM_PROPERTIES_FILE_PATH
54
+
55
+ if file_path and os.path.exists(file_path):
56
+ logger.info("Loading custom properties schema from '%s'", file_path)
57
+ try:
58
+ with open(file_path, 'r', encoding='utf-8') as f:
59
+ loaded_data = json.load(f)
60
+
61
+ if isinstance(loaded_data, dict):
62
+ settings.custom_properties = loaded_data
63
+ else:
64
+ logger.warning(
65
+ "Content in '%s' is not a valid dictionary (JSON root), custom properties will not be loaded",
66
+ file_path
67
+ )
68
+ settings.custom_properties = None
69
+
70
+ except json.JSONDecodeError as e:
71
+ logger.warning("Could not parse JSON from '%s': %s", file_path, e)
72
+ settings.custom_properties = None
73
+ except Exception as e:
74
+ logger.warning("Could not read file '%s': %s", file_path, e)
75
+ settings.custom_properties = None
76
+
77
+ elif file_path:
78
+ logger.debug("Custom properties file not found at '%s', skipping", file_path)
79
+
80
+ if settings.custom_properties:
81
+ settings.global_custom_values = {}
82
+ logger.debug("Loading global custom values from environment variables")
83
+
84
+ for prop_name in settings.custom_properties.keys():
85
+ env_var_name = prop_name.upper()
86
+ value = os.environ.get(env_var_name)
87
+
88
+ if value:
89
+ settings.global_custom_values[prop_name] = value
90
+ logger.debug("Loaded global value for '%s' from env var '%s'", prop_name, env_var_name)
91
+
92
+ return settings
File without changes
File without changes