PyPI - vectorwave - Versions diffs - 0.1.3__py3-none-any.whl - Mend

vectorwave 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

tests/__init__.py +0 -0
tests/batch/__init__.py +0 -0
tests/batch/test_batch.py +98 -0
tests/core/__init__.py +0 -0
tests/core/test_decorator.py +345 -0
tests/database/__init__.py +0 -0
tests/database/test_db.py +468 -0
tests/database/test_db_search.py +163 -0
tests/exception/__init__.py +0 -0
tests/models/__init__.py +0 -0
tests/models/test_db_config.py +152 -0
tests/monitoring/__init__.py +0 -0
tests/monitoring/test_tracer.py +202 -0
tests/prediction/__init__.py +0 -0
tests/vectorizer/__init__.py +0 -0
vectorwave/__init__.py +13 -0
vectorwave/batch/__init__.py +0 -0
vectorwave/batch/batch.py +68 -0
vectorwave/core/__init__.py +0 -0
vectorwave/core/core.py +0 -0
vectorwave/core/decorator.py +131 -0
vectorwave/database/__init__.py +0 -0
vectorwave/database/db.py +328 -0
vectorwave/database/db_search.py +122 -0
vectorwave/exception/__init__.py +0 -0
vectorwave/exception/exceptions.py +22 -0
vectorwave/models/__init__.py +0 -0
vectorwave/models/db_config.py +92 -0
vectorwave/monitoring/__init__.py +0 -0
vectorwave/monitoring/monitoring.py +0 -0
vectorwave/monitoring/tracer.py +131 -0
vectorwave/prediction/__init__.py +0 -0
vectorwave/prediction/predictor.py +0 -0
vectorwave/vectorizer/__init__.py +0 -0
vectorwave/vectorizer/base.py +12 -0
vectorwave/vectorizer/factory.py +49 -0
vectorwave/vectorizer/huggingface_vectorizer.py +33 -0
vectorwave/vectorizer/openai_vectorizer.py +35 -0
vectorwave-0.1.3.dist-info/METADATA +352 -0
vectorwave-0.1.3.dist-info/RECORD +44 -0
vectorwave-0.1.3.dist-info/WHEEL +5 -0
vectorwave-0.1.3.dist-info/licenses/LICENSE +21 -0
vectorwave-0.1.3.dist-info/licenses/NOTICE +31 -0
vectorwave-0.1.3.dist-info/top_level.txt +2 -0

tests/models/test_db_config.py ADDED Viewed

@@ -0,0 +1,152 @@
+import pytest
+import json
+from unittest.mock import patch, mock_open
+from json import JSONDecodeError
+# Function to test
+from vectorwave.models.db_config import get_weaviate_settings
+# --- Mock Data ---
+# Mock content for a successfully loaded .weaviate_properties file
+MOCK_JSON_DATA = """
+{
+  "run_id": {
+    "data_type": "TEXT",
+    "description": "Test run ID"
+  },
+  "experiment_id": {
+    "data_type": "INT",
+    "description": "Identifier for the experiment"
+  }
+}
+"""
+# Mock content for a malformed .weaviate_properties file (invalid JSON)
+MOCK_INVALID_JSON = """
+{
+  "run_id": {
+    "data_type": "TEXT"
+  }
+""" # Missing closing '}'
+# --- Test Cases ---
+@patch('os.path.exists', return_value=True)
+@patch('builtins.open', new_callable=mock_open, read_data=MOCK_JSON_DATA)
+def test_get_settings_loads_custom_props_success(mock_open_file, mock_exists):
+    """
+    Case 1: .weaviate_properties file exists and JSON is valid
+    - settings.custom_properties should be loaded correctly as a dictionary
+    """
+    # Arrange
+    # Clear the @lru_cache to bypass caching for this test
+    get_weaviate_settings.cache_clear()
+    # Act
+    settings = get_weaviate_settings()
+    # Assert
+    # Verify that the default path (.weaviate_properties) was checked
+    mock_exists.assert_called_with(".weaviate_properties")
+    # Verify the file was opened in 'r' mode
+    mock_open_file.assert_called_with(".weaviate_properties", 'r', encoding='utf-8')
+    assert settings.custom_properties is not None
+    assert "run_id" in settings.custom_properties
+    assert settings.custom_properties["run_id"]["data_type"] == "TEXT"
+    assert settings.custom_properties["run_id"]["description"] == "Test run ID"
+    assert "experiment_id" in settings.custom_properties
+@patch('os.path.exists', return_value=False)
+def test_get_settings_file_not_found(mock_exists, caplog):
+    """
+    Case 2: .weaviate_properties file does not exist
+    - settings.custom_properties should be None
+    - A 'file not found' message should be logged at DEBUG level
+    """
+    import logging
+    # Arrange
+    caplog.set_level(logging.DEBUG)  # DEBUG 레벨로 설정 (중요!)
+    get_weaviate_settings.cache_clear()
+    # Act
+    settings = get_weaviate_settings()
+    # Assert
+    mock_exists.assert_called_with(".weaviate_properties")
+    assert settings.custom_properties is None
+    # Check if 'file not found' message was logged
+    assert "file not found" in caplog.text.lower() or "not found" in caplog.text
+@patch('os.path.exists', return_value=True)
+@patch('builtins.open', new_callable=mock_open, read_data=MOCK_INVALID_JSON)
+@patch('json.load', side_effect=JSONDecodeError("Mock JSON Decode Error", "", 0))
+def test_get_settings_invalid_json(mock_json_load, mock_open_file, mock_exists, caplog):
+    """
+    Case 3: File exists but JSON format is invalid
+    - settings.custom_properties should be None
+    - A 'Could not parse JSON' warning should be logged
+    """
+    import logging
+    # Arrange
+    caplog.set_level(logging.WARNING)
+    get_weaviate_settings.cache_clear()
+    # Act
+    settings = get_weaviate_settings()
+    # Assert
+    mock_exists.assert_called_once()
+    mock_open_file.assert_called_once()
+    mock_json_load.assert_called_once()  # json.load was called but failed (due to side_effect)
+    assert settings.custom_properties is None  # Should be None due to parsing failure
+    # Check if 'Could not parse JSON' warning was logged
+    assert "Could not parse JSON" in caplog.text
+    # Also check the log level
+    warning_logs = [r for r in caplog.records if "parse JSON" in r.message]
+    assert len(warning_logs) > 0
+    assert warning_logs[0].levelname == "WARNING"
+@patch('os.path.exists', return_value=True)
+@patch('builtins.open', new_callable=mock_open, read_data=MOCK_JSON_DATA)
+@patch('os.environ.get') # os.environ.get을 모킹합니다
+def test_get_settings_loads_global_custom_values(mock_env_get, mock_open_file, mock_exists):
+    """
+    Case 4: Test if the value of the "RUN_ID" environment variable is loaded
+    into global_custom_values for "run_id" defined in .weaviate_properties
+    """
+    # 1. Arrange
+    # MOCK_JSON_DATA defines "run_id" and "experiment_id".
+    # Set os.environ.get("RUN_ID") to return "test-run-123".
+    # Set os.environ.get("EXPERIMENT_ID") to return None.
+    def mock_env_side_effect(key):
+        if key == "RUN_ID":
+            return "test-run-123"
+        return None
+    mock_env_get.side_effect = mock_env_side_effect
+    get_weaviate_settings.cache_clear()
+    # 2. Act
+    settings = get_weaviate_settings()
+    # 3. Assert
+    # .weaviate_properties should be loaded correctly.
+    assert settings.custom_properties is not None
+    assert "run_id" in settings.custom_properties
+    # Check if global_custom_values was loaded correctly.
+    assert settings.global_custom_values is not None
+    assert "run_id" in settings.global_custom_values
+    assert settings.global_custom_values["run_id"] == "test-run-123"
+    # "EXPERIMENT_ID" should not be included as os.environ.get returned None.
+    assert "experiment_id" not in settings.global_custom_values

tests/monitoring/__init__.py ADDED Viewed

File without changes

tests/monitoring/test_tracer.py ADDED Viewed

@@ -0,0 +1,202 @@
+import pytest
+from unittest.mock import MagicMock
+import time
+from vectorwave.monitoring.tracer import trace_root, trace_span
+from vectorwave.models.db_config import WeaviateSettings
+# --- Import real functions for cache clearing ---
+from vectorwave.batch.batch import get_batch_manager as real_get_batch_manager
+from vectorwave.database.db import get_cached_client as real_get_cached_client
+from vectorwave.models.db_config import get_weaviate_settings as real_get_settings
+# Module paths to mock (adjust to your project structure if needed)
+TRACER_MODULE_PATH = "vectorwave.monitoring.tracer"
+BATCH_MODULE_PATH = "vectorwave.batch.batch"
+@pytest.fixture
+def mock_tracer_deps(monkeypatch):
+    """
+    Mocks dependencies for tracer.py (batch, settings).
+    """
+    # 1. Mock BatchManager
+    mock_batch_instance = MagicMock()
+    mock_batch_instance.add_object = MagicMock()
+    mock_get_batch_manager = MagicMock(return_value=mock_batch_instance)
+    # 2. Mock Settings (including global tags)
+    mock_settings = WeaviateSettings(
+        COLLECTION_NAME="TestFunctions",
+        EXECUTION_COLLECTION_NAME="TestExecutions",
+        custom_properties=None,  # Not important for this test
+        global_custom_values={"run_id": "global-run-abc", "env": "test"}
+    )
+    mock_get_settings = MagicMock(return_value=mock_settings)
+    mock_client = MagicMock()
+    mock_get_client = MagicMock(return_value=mock_client)
+    # --- Patch dependencies for tracer.py ---
+    monkeypatch.setattr(f"{TRACER_MODULE_PATH}.get_batch_manager", mock_get_batch_manager)
+    monkeypatch.setattr(f"{TRACER_MODULE_PATH}.get_weaviate_settings", mock_get_settings)
+    # Patch dependencies inside batch.py to prevent BatchManager init failure
+    monkeypatch.setattr(f"{BATCH_MODULE_PATH}.get_weaviate_client", mock_get_client)
+    monkeypatch.setattr(f"{BATCH_MODULE_PATH}.get_weaviate_settings", mock_get_settings)
+    # 5. Clear caches
+    real_get_batch_manager.cache_clear()
+    real_get_cached_client.cache_clear()
+    real_get_settings.cache_clear()
+    return {
+        "batch": mock_batch_instance,
+        "settings": mock_settings
+    }
+def test_trace_root_and_span_success(mock_tracer_deps):
+    """
+    Case 1: Success (Root + Span) - The span should be recorded successfully.
+    """
+    mock_batch = mock_tracer_deps["batch"]
+    @trace_span
+    def my_inner_span(x):
+        return f"result: {x}"
+    @trace_root()
+    def my_workflow_root():
+        return my_inner_span(x=10)
+    # --- Act ---
+    result = my_workflow_root()
+    # --- Assert ---
+    assert result == "result: 10"
+    mock_batch.add_object.assert_called_once()
+    args, kwargs = mock_batch.add_object.call_args
+    props = kwargs["properties"]
+    assert kwargs["collection"] == "TestExecutions"
+    assert props["status"] == "SUCCESS"
+    assert props["function_name"] == "my_inner_span"
+    assert props["error_message"] is None
+    assert "trace_id" in props
+    assert props["run_id"] == "global-run-abc"
+    assert props["env"] == "test"
+def test_trace_span_failure(mock_tracer_deps):
+    """
+    Case 2: Failure (Root + Failing Span) - The span should be recorded with an ERROR status.
+    """
+    mock_batch = mock_tracer_deps["batch"]
+    @trace_span
+    def my_failing_span():
+        raise ValueError("This is a test error")
+    @trace_root()
+    def my_workflow_root_fail():
+        my_failing_span()
+    # --- Act & Assert (Exception) ---
+    with pytest.raises(ValueError, match="This is a test error"):
+        my_workflow_root_fail()
+    # --- Assert (Log) ---
+    mock_batch.add_object.assert_called_once()
+    args, kwargs = mock_batch.add_object.call_args
+    props = kwargs["properties"]
+    assert props["status"] == "ERROR"
+    assert "ValueError: This is a test error" in props["error_message"]
+    assert props["function_name"] == "my_failing_span"
+    assert props["run_id"] == "global-run-abc"
+def test_span_without_root_does_nothing(mock_tracer_deps):
+    """
+    Case 3: Tracing disabled (Span only) - If there's no Root, nothing should be recorded.
+    """
+    mock_batch = mock_tracer_deps["batch"]
+    @trace_span
+    def my_lonely_span():
+        return "lonely_result"
+    # --- Act ---
+    result = my_lonely_span()
+    # --- Assert ---
+    assert result == "lonely_result"
+    mock_batch.add_object.assert_not_called()
+def test_span_captures_attributes_and_overrides_globals(mock_tracer_deps):
+    """
+    Case 4/5: Attribute Capturing and Overriding
+    """
+    mock_batch = mock_tracer_deps["batch"]
+    class MyObject:
+        def __str__(self): return "MyObjectInstance"
+    @trace_span(attributes_to_capture=["team", "priority", "run_id", "user_obj"])
+    def my_span_with_attrs(team, priority, run_id, user_obj, other_arg="default"):
+        return "captured"
+    @trace_root()
+    def my_workflow_root_attrs():
+        return my_span_with_attrs(
+            team="backend",
+            priority=1,
+            run_id="override-run-xyz",  # <-- This should override "global-run-abc"
+            user_obj=MyObject(),
+            other_arg="should-be-ignored"
+        )
+    # --- Act ---
+    my_workflow_root_attrs()
+    # --- Assert ---
+    mock_batch.add_object.assert_called_once()
+    props = mock_batch.add_object.call_args.kwargs["properties"]
+    assert props["team"] == "backend"
+    assert props["priority"] == 1
+    assert props["user_obj"] == "MyObjectInstance"
+    assert props["run_id"] == "override-run-xyz"  # Overridden
+    assert props["env"] == "test"  # Non-overridden global remains
+    assert "other_arg" not in props
+def test_root_accepts_custom_trace_id(mock_tracer_deps):
+    """
+    Bonus: Test case for manually providing a 'trace_id'.
+    (This is the test that was fixed)
+    """
+    mock_batch = mock_tracer_deps["batch"]
+    @trace_span
+    def my_inner_span():
+        pass
+    @trace_root()
+    def my_workflow_root_custom_id():  # <-- ✅ FIXED: Removed 'trace_id' arg
+        my_inner_span()
+    # --- Act ---
+    # The decorator wrapper still receives 'trace_id' from this call
+    my_workflow_root_custom_id(trace_id="my-custom-trace-id-123")
+    # --- Assert ---
+    mock_batch.add_object.assert_called_once()
+    props = mock_batch.add_object.call_args.kwargs["properties"]
+    # Check if the trace_id was popped and injected correctly
+    assert props["trace_id"] == "my-custom-trace-id-123"

tests/prediction/__init__.py ADDED Viewed

File without changes

tests/vectorizer/__init__.py ADDED Viewed

File without changes

vectorwave/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from .core.decorator import vectorize
+from .database.db import initialize_database
+from .database.db_search import search_functions, search_executions
+from .monitoring.tracer import trace_span
+__all__ = [
+    'vectorize',
+    'initialize_database',
+    'search_functions',
+    'search_executions',
+    'trace_span'
+]

vectorwave/batch/__init__.py ADDED Viewed

File without changes

vectorwave/batch/batch.py ADDED Viewed

@@ -0,0 +1,68 @@
+import weaviate
+import atexit
+import logging
+from functools import lru_cache
+from typing import Optional, List
+from ..models.db_config import get_weaviate_settings, WeaviateSettings
+from ..database.db import get_weaviate_client
+from ..exception.exceptions import WeaviateConnectionError
+# Create module-level logger
+logger = logging.getLogger(__name__)
+class WeaviateBatchManager:
+    """
+    A singleton class that manages Weaviate batch imports.
+    """
+    def __init__(self):
+        self._initialized = False
+        logger.debug("Initializing WeaviateBatchManager")
+        self.client: weaviate.WeaviateClient = None
+        try:
+            # (get_weaviate_settings is reused as it is handled by lru_cache)
+            self.settings: WeaviateSettings = get_weaviate_settings()
+            self.client: weaviate.WeaviateClient = get_weaviate_client(self.settings)
+            if not self.client:
+                raise WeaviateConnectionError("Client is None, cannot configure batch.")
+            # self.client.batch.configure(
+            #     batch_size=20,
+            #     dynamic=True,
+            #     timeout_retries=3,
+            # )
+            # Register atexit: Automatically calls self.flush() on script exit
+            # atexit.register(self.flush)
+            self._initialized = True
+            logger.info("WeaviateBatchManager initialized successfully")
+        except Exception as e:
+            # Prevents VectorWave from stopping the main app upon DB connection failure
+            logger.error("Failed to initialize WeaviateBatchManager: %s", e)
+    def add_object(self, collection: str, properties: dict, uuid: str = None, vector: Optional[List[float]] = None):
+        """
+        Adds an object to the Weaviate batch queue.
+        """
+        if not self._initialized or not self.client:
+            logger.warning("Batch manager not initialized, skipping add_object")
+            return
+        try:
+            self.client.collections.get(collection).data.insert(
+                properties=properties,
+                uuid=uuid,
+                vector=vector
+            )
+        except Exception as e:
+            logger.error("Failed to add object to batch (collection '%s'): %s", collection, e)
+@lru_cache(None)
+def get_batch_manager() -> WeaviateBatchManager:
+    return WeaviateBatchManager()

vectorwave/core/__init__.py ADDED Viewed

File without changes

vectorwave/core/core.py ADDED Viewed

File without changes

vectorwave/core/decorator.py ADDED Viewed

@@ -0,0 +1,131 @@
+# vtm/src/vectorwave/core/decorator.py
+import logging
+import inspect
+from functools import wraps
+from weaviate.util import generate_uuid5
+from ..batch.batch import get_batch_manager
+from ..models.db_config import get_weaviate_settings
+from ..monitoring.tracer import trace_root, trace_span
+from ..vectorizer.factory import get_vectorizer
+# Create module-level logger
+logger = logging.getLogger(__name__)
+def vectorize(search_description: str,
+              sequence_narrative: str,
+              **execution_tags):
+    """
+    VectorWave Decorator
+    (1) Collects function definitions (static data) once on script load.
+    (2) Records function execution (dynamic data) every time the function is called.
+    """
+    def decorator(func):
+        func_uuid = None
+        valid_execution_tags = {}
+        try:
+            module_name = func.__module__
+            function_name = func.__name__
+            func_identifier = f"{module_name}.{function_name}"
+            func_uuid = generate_uuid5(func_identifier)
+            static_properties = {
+                "function_name": function_name,
+                "module_name": module_name,
+                "docstring": inspect.getdoc(func) or "",
+                "source_code": inspect.getsource(func),
+                "search_description": search_description,
+                "sequence_narrative": sequence_narrative
+            }
+            batch = get_batch_manager()
+            settings = get_weaviate_settings()
+            vectorizer = get_vectorizer()
+            vector_to_add = None
+            if vectorizer:
+                try:
+                    print(f"[VectorWave] Vectorizing '{function_name}' using Python vectorizer...")
+                    vector_to_add = vectorizer.embed(search_description)
+                except Exception as e:
+                    print(f"Warning: Failed to vectorize '{function_name}' with Python client: {e}")
+            if execution_tags:
+                if not settings.custom_properties:
+                    logger.warning(
+                        f"Function '{function_name}' provided execution_tags {list(execution_tags.keys())} "
+                        f"but no .weaviate_properties file was loaded. These tags will be IGNORED."
+                    )
+                else:
+                    allowed_keys = set(settings.custom_properties.keys())
+                    for key, value in execution_tags.items():
+                        if key in allowed_keys:
+                            valid_execution_tags[key] = value
+                        else:
+                            logger.warning(
+                                "Function '%s' has undefined execution_tag: '%s'. "
+                                "This tag will be IGNORED. Please add it to your .weaviate_properties file.",
+                                function_name,
+                                key
+                            )
+            batch.add_object(
+                collection=settings.COLLECTION_NAME,
+                properties=static_properties,
+                uuid=func_uuid,
+                vector=vector_to_add
+            )
+        except Exception as e:
+            logger.error("Error in @vectorize setup for '%s': %s", func.__name__, e)
+            @wraps(func)
+            def original_func_wrapper(*args, **kwargs):
+                return func(*args, **kwargs)
+            return original_func_wrapper
+        # 2a. The *inner* wrapper to be wrapped by @trace_span
+        # This function receives all tags including full_kwargs from @trace_span.
+        @trace_root()
+        @trace_span(attributes_to_capture=['function_uuid', 'team', 'priority', 'run_id'])
+        @wraps(func)
+        def inner_wrapper(*args, **kwargs):
+            original_kwargs = kwargs.copy()
+            keys_to_remove = list(valid_execution_tags.keys())
+            keys_to_remove.append('function_uuid')
+            for key in execution_tags.keys():
+                if key not in keys_to_remove:
+                    keys_to_remove.append(key)
+            for key in keys_to_remove:
+                original_kwargs.pop(key, None)
+            return func(*args, **original_kwargs)
+        @wraps(func)
+        def outer_wrapper(*args, **kwargs):
+            full_kwargs = kwargs.copy()
+            full_kwargs.update(valid_execution_tags)
+            full_kwargs['function_uuid'] = func_uuid
+            # 2. Call the *inner* wrapper with the full_kwargs
+            #    This call passes through the @trace_root -> @trace_span decorators.
+            return inner_wrapper(*args, **full_kwargs)
+        return outer_wrapper
+    return decorator

vectorwave/database/__init__.py ADDED Viewed

File without changes