velesdb-common 1.9.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- velesdb_common-1.9.3/PKG-INFO +7 -0
- velesdb_common-1.9.3/pyproject.toml +18 -0
- velesdb_common-1.9.3/setup.cfg +4 -0
- velesdb_common-1.9.3/src/velesdb_common/__init__.py +80 -0
- velesdb_common-1.9.3/src/velesdb_common/graph.py +97 -0
- velesdb_common-1.9.3/src/velesdb_common/ids.py +40 -0
- velesdb_common-1.9.3/src/velesdb_common/memory.py +33 -0
- velesdb_common-1.9.3/src/velesdb_common/security.py +398 -0
- velesdb_common-1.9.3/src/velesdb_common.egg-info/PKG-INFO +7 -0
- velesdb_common-1.9.3/src/velesdb_common.egg-info/SOURCES.txt +14 -0
- velesdb_common-1.9.3/src/velesdb_common.egg-info/dependency_links.txt +1 -0
- velesdb_common-1.9.3/src/velesdb_common.egg-info/top_level.txt +1 -0
- velesdb_common-1.9.3/tests/test_graph.py +34 -0
- velesdb_common-1.9.3/tests/test_ids.py +27 -0
- velesdb_common-1.9.3/tests/test_memory.py +17 -0
- velesdb_common-1.9.3/tests/test_security.py +47 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "velesdb-common"
|
|
7
|
+
version = "1.9.3"
|
|
8
|
+
description = "Shared utilities for VelesDB Python integrations (internal use)"
|
|
9
|
+
license = {text = "MIT"}
|
|
10
|
+
authors = [
|
|
11
|
+
{name = "VelesDB Team", email = "contact@wiscale.fr"}
|
|
12
|
+
]
|
|
13
|
+
requires-python = ">=3.9"
|
|
14
|
+
# No runtime dependencies beyond the stdlib — kept deliberately lean.
|
|
15
|
+
dependencies = []
|
|
16
|
+
|
|
17
|
+
[tool.setuptools.packages.find]
|
|
18
|
+
where = ["src"]
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""velesdb_common — shared utilities for VelesDB Python integrations.
|
|
2
|
+
|
|
3
|
+
This package contains code that is identical across the LangChain and
|
|
4
|
+
LlamaIndex integration packages. It is *not* a public API; downstream
|
|
5
|
+
users should import from ``langchain_velesdb`` or ``llamaindex_velesdb``
|
|
6
|
+
directly.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from velesdb_common.ids import make_initial_id_counter, stable_hash_id
|
|
10
|
+
from velesdb_common.memory import format_procedural_results
|
|
11
|
+
from velesdb_common.security import (
|
|
12
|
+
SecurityError,
|
|
13
|
+
ALLOWED_METRICS,
|
|
14
|
+
ALLOWED_STORAGE_MODES,
|
|
15
|
+
DEFAULT_TIMEOUT_MS,
|
|
16
|
+
MAX_BATCH_SIZE,
|
|
17
|
+
MAX_DIMENSION,
|
|
18
|
+
MAX_K_VALUE,
|
|
19
|
+
MAX_PATH_LENGTH,
|
|
20
|
+
MAX_QUERY_LENGTH,
|
|
21
|
+
MAX_SPARSE_VECTOR_SIZE,
|
|
22
|
+
MAX_TEXT_LENGTH,
|
|
23
|
+
MIN_DIMENSION,
|
|
24
|
+
validate_batch_size,
|
|
25
|
+
validate_collection_name,
|
|
26
|
+
validate_dimension,
|
|
27
|
+
validate_k,
|
|
28
|
+
validate_metric,
|
|
29
|
+
validate_path,
|
|
30
|
+
validate_query,
|
|
31
|
+
validate_sparse_vector,
|
|
32
|
+
validate_storage_mode,
|
|
33
|
+
validate_text,
|
|
34
|
+
validate_timeout,
|
|
35
|
+
validate_url,
|
|
36
|
+
validate_weight,
|
|
37
|
+
)
|
|
38
|
+
from velesdb_common.graph import (
|
|
39
|
+
build_graph_rest_payload,
|
|
40
|
+
is_timeout_exception,
|
|
41
|
+
open_native_graph,
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
# ids
|
|
46
|
+
"make_initial_id_counter",
|
|
47
|
+
"stable_hash_id",
|
|
48
|
+
# memory
|
|
49
|
+
"format_procedural_results",
|
|
50
|
+
# security
|
|
51
|
+
"SecurityError",
|
|
52
|
+
"ALLOWED_METRICS",
|
|
53
|
+
"ALLOWED_STORAGE_MODES",
|
|
54
|
+
"DEFAULT_TIMEOUT_MS",
|
|
55
|
+
"MAX_BATCH_SIZE",
|
|
56
|
+
"MAX_DIMENSION",
|
|
57
|
+
"MAX_K_VALUE",
|
|
58
|
+
"MAX_PATH_LENGTH",
|
|
59
|
+
"MAX_QUERY_LENGTH",
|
|
60
|
+
"MAX_SPARSE_VECTOR_SIZE",
|
|
61
|
+
"MAX_TEXT_LENGTH",
|
|
62
|
+
"MIN_DIMENSION",
|
|
63
|
+
"validate_batch_size",
|
|
64
|
+
"validate_collection_name",
|
|
65
|
+
"validate_dimension",
|
|
66
|
+
"validate_k",
|
|
67
|
+
"validate_metric",
|
|
68
|
+
"validate_path",
|
|
69
|
+
"validate_query",
|
|
70
|
+
"validate_sparse_vector",
|
|
71
|
+
"validate_storage_mode",
|
|
72
|
+
"validate_text",
|
|
73
|
+
"validate_timeout",
|
|
74
|
+
"validate_url",
|
|
75
|
+
"validate_weight",
|
|
76
|
+
# graph
|
|
77
|
+
"build_graph_rest_payload",
|
|
78
|
+
"is_timeout_exception",
|
|
79
|
+
"open_native_graph",
|
|
80
|
+
]
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Graph traversal helpers shared across VelesDB Python integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, List
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def build_graph_rest_payload(
|
|
9
|
+
source_id: int,
|
|
10
|
+
max_depth: int,
|
|
11
|
+
expand_k: int,
|
|
12
|
+
rel_types: List[str],
|
|
13
|
+
) -> dict:
|
|
14
|
+
"""Build the JSON payload for a VelesDB REST graph traversal request.
|
|
15
|
+
|
|
16
|
+
Shared by both the LangChain and LlamaIndex graph retrievers.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
source_id: Starting node ID for the traversal.
|
|
20
|
+
max_depth: Maximum traversal depth.
|
|
21
|
+
expand_k: Maximum neighbours to request (limit = expand_k * 2).
|
|
22
|
+
rel_types: Relationship type filters (empty list means all types).
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Dict suitable for ``requests.post(..., json=payload)``.
|
|
26
|
+
"""
|
|
27
|
+
return {
|
|
28
|
+
"source": source_id,
|
|
29
|
+
"strategy": "bfs",
|
|
30
|
+
"max_depth": max_depth,
|
|
31
|
+
"limit": expand_k * 2,
|
|
32
|
+
"rel_types": rel_types,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def is_timeout_exception(exc: Exception) -> bool:
|
|
37
|
+
"""Return True if *exc* represents a network or operation timeout.
|
|
38
|
+
|
|
39
|
+
Checks for ``requests.exceptions.Timeout`` (when *requests* is
|
|
40
|
+
installed) and the stdlib ``TimeoutError``.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
exc: The exception to inspect.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
True if the exception is a timeout, False otherwise.
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
import requests
|
|
50
|
+
|
|
51
|
+
if isinstance(exc, requests.exceptions.Timeout):
|
|
52
|
+
return True
|
|
53
|
+
except ImportError:
|
|
54
|
+
# requests is an optional dependency
|
|
55
|
+
pass
|
|
56
|
+
return isinstance(exc, TimeoutError)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def open_native_graph(db_path: str, collection_name: str) -> Any:
|
|
60
|
+
"""Open a native VelesDB graph collection.
|
|
61
|
+
|
|
62
|
+
Shared implementation for both integrations.
|
|
63
|
+
|
|
64
|
+
Note:
|
|
65
|
+
The returned graph collection holds an internal reference to the
|
|
66
|
+
``Database`` object created here. The database remains open and
|
|
67
|
+
alive as long as the returned collection is reachable (Python
|
|
68
|
+
reference counting keeps it alive). Callers do not need to
|
|
69
|
+
retain a separate ``Database`` handle.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
db_path: Filesystem path to the VelesDB database directory.
|
|
73
|
+
collection_name: Name of the graph collection to open.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
PyGraphCollection instance.
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
ImportError: If the *velesdb* package is not installed.
|
|
80
|
+
ValueError: If the graph collection cannot be found.
|
|
81
|
+
"""
|
|
82
|
+
try:
|
|
83
|
+
import velesdb # type: ignore[import]
|
|
84
|
+
except ImportError as exc:
|
|
85
|
+
raise ImportError(
|
|
86
|
+
"The 'velesdb' package is required for native mode. "
|
|
87
|
+
"Install it with: pip install velesdb"
|
|
88
|
+
) from exc
|
|
89
|
+
|
|
90
|
+
db = velesdb.Database(db_path)
|
|
91
|
+
graph = db.get_graph_collection(collection_name)
|
|
92
|
+
if graph is None:
|
|
93
|
+
raise ValueError(
|
|
94
|
+
f"Graph collection '{collection_name}' not found in database "
|
|
95
|
+
f"at '{db_path}'"
|
|
96
|
+
)
|
|
97
|
+
return graph
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""ID generation utilities shared across VelesDB Python integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import random
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def make_initial_id_counter() -> int:
|
|
11
|
+
"""Generate an initial counter value for ID generation.
|
|
12
|
+
|
|
13
|
+
Uses the current millisecond timestamp plus a large random offset to
|
|
14
|
+
prevent collisions between concurrent instances or process restarts.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
A positive integer suitable as an ID counter seed.
|
|
18
|
+
"""
|
|
19
|
+
return int(time.time() * 1000) + random.randint(1_000_000, 9_999_999)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def stable_hash_id(value: str) -> int:
|
|
23
|
+
"""Generate a stable numeric ID from a string using SHA-256.
|
|
24
|
+
|
|
25
|
+
Python's ``hash()`` is non-deterministic across processes. This
|
|
26
|
+
function uses SHA-256 for consistent IDs across runs.
|
|
27
|
+
|
|
28
|
+
Uses 63 bits from SHA-256 for a very low collision probability while
|
|
29
|
+
keeping a positive integer that is compatible with VelesDB point IDs
|
|
30
|
+
(positive i64 range).
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
value: String to hash.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Positive 63-bit integer ID.
|
|
37
|
+
"""
|
|
38
|
+
hash_bytes = hashlib.sha256(value.encode("utf-8")).digest()
|
|
39
|
+
# Use 8 bytes (64 bits) and clear the sign bit to stay in positive i64 range.
|
|
40
|
+
return int.from_bytes(hash_bytes[:8], byteorder="big") & 0x7FFFFFFFFFFFFFFF
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Memory result helpers shared across VelesDB Python integrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, List
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def format_procedural_results(results: List[Any]) -> List[Dict[str, Any]]:
|
|
9
|
+
"""Normalise raw procedural-recall results into a consistent dict format.
|
|
10
|
+
|
|
11
|
+
Both the LangChain and LlamaIndex procedural memory classes receive the
|
|
12
|
+
same raw result list from VelesDB and project it to the same four keys.
|
|
13
|
+
This function is the single canonical implementation of that projection.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
results: Raw result list returned by
|
|
17
|
+
``procedural.recall(embedding, top_k=..., min_confidence=...)``.
|
|
18
|
+
Each element must expose ``"name"``, ``"steps"``,
|
|
19
|
+
``"confidence"``, and ``"score"`` keys.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
List of dicts with exactly the keys ``name``, ``steps``,
|
|
23
|
+
``confidence``, and ``score``.
|
|
24
|
+
"""
|
|
25
|
+
return [
|
|
26
|
+
{
|
|
27
|
+
"name": r["name"],
|
|
28
|
+
"steps": r["steps"],
|
|
29
|
+
"confidence": r["confidence"],
|
|
30
|
+
"score": r["score"],
|
|
31
|
+
}
|
|
32
|
+
for r in results
|
|
33
|
+
]
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
"""Security utilities shared across all VelesDB Python integrations.
|
|
2
|
+
|
|
3
|
+
Provides input validation, sanitisation, and security constants used by both
|
|
4
|
+
the ``langchain_velesdb`` and ``llamaindex_velesdb`` packages.
|
|
5
|
+
|
|
6
|
+
All public names are re-exported from ``velesdb_common`` for convenience.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import math
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
# Security constants
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
MAX_QUERY_LENGTH = 10_000 # Max characters for VelesQL queries
|
|
21
|
+
MAX_TEXT_LENGTH = 1_000_000 # Max characters per document (1 MB)
|
|
22
|
+
MAX_BATCH_SIZE = 10_000 # Max documents per batch operation
|
|
23
|
+
MAX_K_VALUE = 10_000 # Max top_k for search
|
|
24
|
+
MAX_DIMENSION = 65_536 # Max vector dimension (reasonable for any model)
|
|
25
|
+
MIN_DIMENSION = 1
|
|
26
|
+
MAX_PATH_LENGTH = 4096 # Max path length
|
|
27
|
+
ALLOWED_METRICS = frozenset({"cosine", "euclidean", "dot", "hamming", "jaccard"})
|
|
28
|
+
ALLOWED_STORAGE_MODES = frozenset({"full", "sq8", "binary"})
|
|
29
|
+
MAX_SPARSE_VECTOR_SIZE = 100_000 # Max entries in a sparse vector
|
|
30
|
+
DEFAULT_TIMEOUT_MS = 30_000 # 30 seconds maximum timeout
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SecurityError(ValueError):
|
|
34
|
+
"""Raised when a security validation fails."""
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------------------------------------------------------
|
|
38
|
+
# Path validation
|
|
39
|
+
# ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
def validate_path(path: str) -> str:
|
|
42
|
+
"""Validate and normalise a filesystem path.
|
|
43
|
+
|
|
44
|
+
Prevents path traversal attacks and validates path safety.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
path: The path to validate.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Normalised absolute path.
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
SecurityError: If path is invalid or potentially malicious.
|
|
54
|
+
"""
|
|
55
|
+
if not path:
|
|
56
|
+
raise SecurityError("Path cannot be empty")
|
|
57
|
+
|
|
58
|
+
if len(path) > MAX_PATH_LENGTH:
|
|
59
|
+
raise SecurityError(f"Path exceeds maximum length of {MAX_PATH_LENGTH}")
|
|
60
|
+
|
|
61
|
+
# Check for null bytes (path injection) before normalising
|
|
62
|
+
if "\x00" in path:
|
|
63
|
+
raise SecurityError("Path contains null bytes")
|
|
64
|
+
|
|
65
|
+
# Check for suspicious patterns
|
|
66
|
+
suspicious_patterns = [
|
|
67
|
+
r"\.\.[/\\]", # Parent directory traversal
|
|
68
|
+
r"^[/\\]{2}", # UNC paths (network shares)
|
|
69
|
+
]
|
|
70
|
+
for pattern in suspicious_patterns:
|
|
71
|
+
if re.search(pattern, path):
|
|
72
|
+
raise SecurityError("Suspicious path pattern detected")
|
|
73
|
+
|
|
74
|
+
# Normalise the path
|
|
75
|
+
try:
|
|
76
|
+
normalized = os.path.normpath(path)
|
|
77
|
+
abs_path = os.path.abspath(normalized)
|
|
78
|
+
except (ValueError, OSError) as exc:
|
|
79
|
+
raise SecurityError(f"Invalid path: {exc}") from exc
|
|
80
|
+
|
|
81
|
+
return abs_path
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# ---------------------------------------------------------------------------
|
|
85
|
+
# Numeric / range validators
|
|
86
|
+
# ---------------------------------------------------------------------------
|
|
87
|
+
|
|
88
|
+
def validate_dimension(dimension: int) -> int:
|
|
89
|
+
"""Validate vector dimension.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
dimension: Vector dimension to validate.
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Validated dimension.
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
SecurityError: If dimension is out of valid range.
|
|
99
|
+
"""
|
|
100
|
+
if not isinstance(dimension, int):
|
|
101
|
+
raise SecurityError(
|
|
102
|
+
f"Dimension must be an integer, got {type(dimension).__name__}"
|
|
103
|
+
)
|
|
104
|
+
if dimension < MIN_DIMENSION:
|
|
105
|
+
raise SecurityError(f"Dimension must be at least {MIN_DIMENSION}")
|
|
106
|
+
if dimension > MAX_DIMENSION:
|
|
107
|
+
raise SecurityError(f"Dimension exceeds maximum of {MAX_DIMENSION}")
|
|
108
|
+
return dimension
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def validate_k(k: int, param_name: str = "k") -> int:
|
|
112
|
+
"""Validate top-k parameter.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
k: Number of results to return.
|
|
116
|
+
param_name: Parameter name for error messages.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Validated k value.
|
|
120
|
+
|
|
121
|
+
Raises:
|
|
122
|
+
SecurityError: If k is invalid.
|
|
123
|
+
"""
|
|
124
|
+
if not isinstance(k, int):
|
|
125
|
+
raise SecurityError(
|
|
126
|
+
f"{param_name} must be an integer, got {type(k).__name__}"
|
|
127
|
+
)
|
|
128
|
+
if k < 1:
|
|
129
|
+
raise SecurityError(f"{param_name} must be at least 1")
|
|
130
|
+
if k > MAX_K_VALUE:
|
|
131
|
+
raise SecurityError(f"{param_name} exceeds maximum of {MAX_K_VALUE}")
|
|
132
|
+
return k
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def validate_batch_size(size: int) -> int:
|
|
136
|
+
"""Validate batch operation size.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
size: Number of items in batch.
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Validated size.
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
SecurityError: If size is negative or exceeds limit.
|
|
146
|
+
"""
|
|
147
|
+
if size < 0:
|
|
148
|
+
raise SecurityError(f"Batch size must be non-negative, got {size}")
|
|
149
|
+
if size > MAX_BATCH_SIZE:
|
|
150
|
+
raise SecurityError(
|
|
151
|
+
f"Batch size {size} exceeds maximum of {MAX_BATCH_SIZE}. "
|
|
152
|
+
f"Process in smaller batches."
|
|
153
|
+
)
|
|
154
|
+
return size
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def validate_weight(weight: float, name: str = "weight") -> float:
|
|
158
|
+
"""Validate a weight parameter (0.0 to 1.0).
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
weight: Weight value.
|
|
162
|
+
name: Parameter name for error messages.
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
Validated weight.
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
SecurityError: If weight is out of range.
|
|
169
|
+
"""
|
|
170
|
+
if isinstance(weight, bool):
|
|
171
|
+
raise SecurityError("Weight must be a number, not bool")
|
|
172
|
+
if not isinstance(weight, (int, float)):
|
|
173
|
+
raise SecurityError(f"{name} must be a number, got {type(weight).__name__}")
|
|
174
|
+
if weight < 0.0 or weight > 1.0:
|
|
175
|
+
raise SecurityError(f"{name} must be between 0.0 and 1.0, got {weight}")
|
|
176
|
+
return float(weight)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def validate_timeout(timeout_ms: int) -> int:
|
|
180
|
+
"""Validate timeout in milliseconds.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
timeout_ms: Timeout value in milliseconds.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Validated timeout.
|
|
187
|
+
|
|
188
|
+
Raises:
|
|
189
|
+
SecurityError: If timeout is invalid.
|
|
190
|
+
"""
|
|
191
|
+
if not isinstance(timeout_ms, int):
|
|
192
|
+
raise SecurityError(
|
|
193
|
+
f"Timeout must be an integer, got {type(timeout_ms).__name__}"
|
|
194
|
+
)
|
|
195
|
+
if timeout_ms < 1:
|
|
196
|
+
raise SecurityError("Timeout must be at least 1ms")
|
|
197
|
+
if timeout_ms > DEFAULT_TIMEOUT_MS:
|
|
198
|
+
raise SecurityError(
|
|
199
|
+
f"Timeout exceeds maximum of {DEFAULT_TIMEOUT_MS}ms (30s)"
|
|
200
|
+
)
|
|
201
|
+
return timeout_ms
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
# ---------------------------------------------------------------------------
|
|
205
|
+
# String / enum validators
|
|
206
|
+
# ---------------------------------------------------------------------------
|
|
207
|
+
|
|
208
|
+
def validate_text(text: str, max_length: int = MAX_TEXT_LENGTH) -> str:
|
|
209
|
+
"""Validate text content.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
text: Text to validate.
|
|
213
|
+
max_length: Maximum allowed length.
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
Validated text.
|
|
217
|
+
|
|
218
|
+
Raises:
|
|
219
|
+
SecurityError: If text is invalid.
|
|
220
|
+
"""
|
|
221
|
+
if not isinstance(text, str):
|
|
222
|
+
raise SecurityError(f"Text must be a string, got {type(text).__name__}")
|
|
223
|
+
if len(text) > max_length:
|
|
224
|
+
raise SecurityError(f"Text exceeds maximum length of {max_length}")
|
|
225
|
+
return text
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def validate_query(query: str) -> str:
|
|
229
|
+
"""Validate a VelesQL query string.
|
|
230
|
+
|
|
231
|
+
Args:
|
|
232
|
+
query: VelesQL query to validate.
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Validated query.
|
|
236
|
+
|
|
237
|
+
Raises:
|
|
238
|
+
SecurityError: If query is invalid or potentially dangerous.
|
|
239
|
+
"""
|
|
240
|
+
if not isinstance(query, str):
|
|
241
|
+
raise SecurityError(f"Query must be a string, got {type(query).__name__}")
|
|
242
|
+
if len(query) > MAX_QUERY_LENGTH:
|
|
243
|
+
raise SecurityError(f"Query exceeds maximum length of {MAX_QUERY_LENGTH}")
|
|
244
|
+
# Null bytes could cause truncation issues
|
|
245
|
+
if "\x00" in query:
|
|
246
|
+
raise SecurityError("Query contains null bytes")
|
|
247
|
+
return query
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def validate_metric(metric: str) -> str:
|
|
251
|
+
"""Validate distance metric.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
metric: Distance metric name.
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Validated metric (lowercase).
|
|
258
|
+
|
|
259
|
+
Raises:
|
|
260
|
+
SecurityError: If metric is not allowed.
|
|
261
|
+
"""
|
|
262
|
+
if not isinstance(metric, str):
|
|
263
|
+
raise SecurityError(f"Metric must be a string, got {type(metric).__name__}")
|
|
264
|
+
metric_lower = metric.lower()
|
|
265
|
+
if metric_lower not in ALLOWED_METRICS:
|
|
266
|
+
raise SecurityError(
|
|
267
|
+
f"Invalid metric '{metric}'. Allowed: {', '.join(sorted(ALLOWED_METRICS))}"
|
|
268
|
+
)
|
|
269
|
+
return metric_lower
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def validate_storage_mode(mode: str) -> str:
|
|
273
|
+
"""Validate vector storage mode.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
mode: Storage mode name.
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
Validated storage mode (lowercase).
|
|
280
|
+
|
|
281
|
+
Raises:
|
|
282
|
+
SecurityError: If storage mode is not allowed.
|
|
283
|
+
"""
|
|
284
|
+
if not isinstance(mode, str):
|
|
285
|
+
raise SecurityError(
|
|
286
|
+
f"Storage mode must be a string, got {type(mode).__name__}"
|
|
287
|
+
)
|
|
288
|
+
mode_lower = mode.lower()
|
|
289
|
+
if mode_lower not in ALLOWED_STORAGE_MODES:
|
|
290
|
+
raise SecurityError(
|
|
291
|
+
f"Invalid storage mode '{mode}'. "
|
|
292
|
+
f"Allowed: {', '.join(sorted(ALLOWED_STORAGE_MODES))}"
|
|
293
|
+
)
|
|
294
|
+
return mode_lower
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def validate_collection_name(name: str) -> str:
|
|
298
|
+
"""Validate collection name.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
name: Collection name.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Validated name.
|
|
305
|
+
|
|
306
|
+
Raises:
|
|
307
|
+
SecurityError: If name is invalid.
|
|
308
|
+
"""
|
|
309
|
+
if not isinstance(name, str):
|
|
310
|
+
raise SecurityError(
|
|
311
|
+
f"Collection name must be a string, got {type(name).__name__}"
|
|
312
|
+
)
|
|
313
|
+
if not name:
|
|
314
|
+
raise SecurityError("Collection name cannot be empty")
|
|
315
|
+
if len(name) > 256:
|
|
316
|
+
raise SecurityError("Collection name exceeds maximum length of 256")
|
|
317
|
+
# Only allow alphanumeric, underscore, hyphen
|
|
318
|
+
if not re.match(r"^[a-zA-Z0-9_-]+$", name):
|
|
319
|
+
raise SecurityError(
|
|
320
|
+
"Collection name can only contain alphanumeric characters, "
|
|
321
|
+
"underscores, and hyphens"
|
|
322
|
+
)
|
|
323
|
+
return name
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def validate_url(url: str) -> str:
|
|
327
|
+
"""Validate server URL.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
url: Server URL.
|
|
331
|
+
|
|
332
|
+
Returns:
|
|
333
|
+
Validated URL.
|
|
334
|
+
|
|
335
|
+
Raises:
|
|
336
|
+
SecurityError: If URL is invalid or potentially dangerous.
|
|
337
|
+
"""
|
|
338
|
+
if not isinstance(url, str):
|
|
339
|
+
raise SecurityError(f"URL must be a string, got {type(url).__name__}")
|
|
340
|
+
if not url:
|
|
341
|
+
raise SecurityError("URL cannot be empty")
|
|
342
|
+
# Only allow http/https
|
|
343
|
+
if not url.startswith(("http://", "https://")):
|
|
344
|
+
raise SecurityError("URL must start with http:// or https://")
|
|
345
|
+
# Check for common injection patterns
|
|
346
|
+
if any(c in url for c in ("\n", "\r", "\x00")):
|
|
347
|
+
raise SecurityError("URL contains invalid characters")
|
|
348
|
+
return url
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
# ---------------------------------------------------------------------------
|
|
352
|
+
# Sparse vector validators
|
|
353
|
+
# ---------------------------------------------------------------------------
|
|
354
|
+
|
|
355
|
+
def _validate_sparse_entry(key: Any, value: Any) -> None:
|
|
356
|
+
"""Validate a single sparse vector key-value pair."""
|
|
357
|
+
if isinstance(key, bool) or not isinstance(key, int):
|
|
358
|
+
raise SecurityError(
|
|
359
|
+
f"Sparse vector keys must be int (term IDs), "
|
|
360
|
+
f"got {type(key).__name__} for key {key!r}"
|
|
361
|
+
)
|
|
362
|
+
if not isinstance(value, (int, float)) or isinstance(value, bool):
|
|
363
|
+
raise SecurityError(
|
|
364
|
+
f"Sparse vector values must be int or float (weights), "
|
|
365
|
+
f"got {type(value).__name__} for key {key}"
|
|
366
|
+
)
|
|
367
|
+
if isinstance(value, float) and not math.isfinite(value):
|
|
368
|
+
raise SecurityError(
|
|
369
|
+
f"Sparse vector weights must be finite, got {value} for key {key}"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def validate_sparse_vector(sparse_vector: Any) -> dict:
|
|
374
|
+
"""Validate a sparse vector dict.
|
|
375
|
+
|
|
376
|
+
Sparse vectors map integer term IDs to float weights.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
sparse_vector: Dict mapping int keys to int/float values.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Validated sparse vector dict.
|
|
383
|
+
|
|
384
|
+
Raises:
|
|
385
|
+
SecurityError: If sparse vector is invalid.
|
|
386
|
+
"""
|
|
387
|
+
if not isinstance(sparse_vector, dict):
|
|
388
|
+
raise SecurityError(
|
|
389
|
+
f"Sparse vector must be a dict, got {type(sparse_vector).__name__}"
|
|
390
|
+
)
|
|
391
|
+
if len(sparse_vector) > MAX_SPARSE_VECTOR_SIZE:
|
|
392
|
+
raise SecurityError(
|
|
393
|
+
f"Sparse vector has {len(sparse_vector)} entries, "
|
|
394
|
+
f"exceeds maximum of {MAX_SPARSE_VECTOR_SIZE}"
|
|
395
|
+
)
|
|
396
|
+
for key, value in sparse_vector.items():
|
|
397
|
+
_validate_sparse_entry(key, value)
|
|
398
|
+
return sparse_vector
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
src/velesdb_common/__init__.py
|
|
3
|
+
src/velesdb_common/graph.py
|
|
4
|
+
src/velesdb_common/ids.py
|
|
5
|
+
src/velesdb_common/memory.py
|
|
6
|
+
src/velesdb_common/security.py
|
|
7
|
+
src/velesdb_common.egg-info/PKG-INFO
|
|
8
|
+
src/velesdb_common.egg-info/SOURCES.txt
|
|
9
|
+
src/velesdb_common.egg-info/dependency_links.txt
|
|
10
|
+
src/velesdb_common.egg-info/top_level.txt
|
|
11
|
+
tests/test_graph.py
|
|
12
|
+
tests/test_ids.py
|
|
13
|
+
tests/test_memory.py
|
|
14
|
+
tests/test_security.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
velesdb_common
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from velesdb_common.graph import (
|
|
3
|
+
build_graph_rest_payload,
|
|
4
|
+
is_timeout_exception,
|
|
5
|
+
)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_build_graph_rest_payload_basic():
|
|
9
|
+
payload = build_graph_rest_payload("node-1", max_depth=3, expand_k=10, rel_types=[])
|
|
10
|
+
assert payload["source"] == "node-1"
|
|
11
|
+
assert payload["max_depth"] == 3
|
|
12
|
+
assert payload["limit"] == 20 # expand_k * 2
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_build_graph_rest_payload_with_rel_types():
|
|
16
|
+
payload = build_graph_rest_payload(
|
|
17
|
+
"node-1", max_depth=2, expand_k=5, rel_types=["KNOWS", "LIKES"]
|
|
18
|
+
)
|
|
19
|
+
assert payload["rel_types"] == ["KNOWS", "LIKES"]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_build_graph_rest_payload_empty_rel_types():
|
|
23
|
+
payload = build_graph_rest_payload("node-1", max_depth=1, expand_k=10, rel_types=[])
|
|
24
|
+
assert payload["rel_types"] == []
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_is_timeout_exception_with_timeout():
|
|
28
|
+
exc = TimeoutError("Connection timed out")
|
|
29
|
+
assert is_timeout_exception(exc) is True
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_is_timeout_exception_with_other():
|
|
33
|
+
exc = ValueError("Some error")
|
|
34
|
+
assert is_timeout_exception(exc) is False
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from velesdb_common.ids import make_initial_id_counter, stable_hash_id
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_make_initial_id_counter_returns_positive_int():
|
|
5
|
+
counter = make_initial_id_counter()
|
|
6
|
+
assert isinstance(counter, int)
|
|
7
|
+
assert counter > 0
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_make_initial_id_counter_unique():
|
|
11
|
+
counters = {make_initial_id_counter() for _ in range(100)}
|
|
12
|
+
# At least 90% unique (accounting for rare collisions)
|
|
13
|
+
assert len(counters) >= 90
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_stable_hash_id_deterministic():
|
|
17
|
+
assert stable_hash_id("hello") == stable_hash_id("hello")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_stable_hash_id_different_inputs():
|
|
21
|
+
assert stable_hash_id("hello") != stable_hash_id("world")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def test_stable_hash_id_returns_positive_int():
|
|
25
|
+
result = stable_hash_id("test")
|
|
26
|
+
assert isinstance(result, int)
|
|
27
|
+
assert result > 0
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from velesdb_common.memory import format_procedural_results
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def test_format_procedural_results_basic():
|
|
5
|
+
results = [
|
|
6
|
+
{"name": "proc1", "steps": ["a", "b"], "confidence": 0.9, "score": 0.85},
|
|
7
|
+
]
|
|
8
|
+
formatted = format_procedural_results(results)
|
|
9
|
+
assert len(formatted) == 1
|
|
10
|
+
assert formatted[0]["name"] == "proc1"
|
|
11
|
+
assert formatted[0]["steps"] == ["a", "b"]
|
|
12
|
+
assert formatted[0]["confidence"] == 0.9
|
|
13
|
+
assert formatted[0]["score"] == 0.85
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_format_procedural_results_empty():
|
|
17
|
+
assert format_procedural_results([]) == []
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from velesdb_common.security import (
|
|
3
|
+
validate_batch_size,
|
|
4
|
+
validate_weight,
|
|
5
|
+
SecurityError,
|
|
6
|
+
validate_k,
|
|
7
|
+
validate_text,
|
|
8
|
+
ALLOWED_METRICS,
|
|
9
|
+
ALLOWED_STORAGE_MODES,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_validate_batch_size_rejects_negative():
|
|
14
|
+
with pytest.raises(SecurityError, match="non-negative"):
|
|
15
|
+
validate_batch_size(-1)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_validate_batch_size_accepts_zero():
|
|
19
|
+
assert validate_batch_size(0) == 0
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_validate_batch_size_accepts_valid():
|
|
23
|
+
assert validate_batch_size(100) == 100
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_validate_weight_rejects_bool_true():
|
|
27
|
+
with pytest.raises(SecurityError, match="not bool"):
|
|
28
|
+
validate_weight(True)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_validate_weight_rejects_bool_false():
|
|
32
|
+
with pytest.raises(SecurityError, match="not bool"):
|
|
33
|
+
validate_weight(False)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_validate_weight_accepts_valid_float():
|
|
37
|
+
assert validate_weight(0.5) == 0.5
|
|
38
|
+
assert validate_weight(0.0) == 0.0
|
|
39
|
+
assert validate_weight(1.0) == 1.0
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def test_allowed_metrics_is_frozenset():
|
|
43
|
+
assert isinstance(ALLOWED_METRICS, frozenset)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_allowed_storage_modes_is_frozenset():
|
|
47
|
+
assert isinstance(ALLOWED_STORAGE_MODES, frozenset)
|