usearch 2.23.0__cp314-cp314t-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- usearch/__init__.py +152 -0
- usearch/client.py +120 -0
- usearch/compiled.cpython-314t-darwin.so +0 -0
- usearch/eval.py +512 -0
- usearch/index.py +1721 -0
- usearch/io.py +138 -0
- usearch/numba.py +110 -0
- usearch/py.typed +0 -0
- usearch/server.py +131 -0
- usearch-2.23.0.dist-info/METADATA +602 -0
- usearch-2.23.0.dist-info/RECORD +14 -0
- usearch-2.23.0.dist-info/WHEEL +6 -0
- usearch-2.23.0.dist-info/licenses/LICENSE +201 -0
- usearch-2.23.0.dist-info/top_level.txt +1 -0
usearch/__init__.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import ctypes
|
|
4
|
+
import platform
|
|
5
|
+
import warnings
|
|
6
|
+
import urllib.request
|
|
7
|
+
from typing import Optional, Tuple
|
|
8
|
+
from urllib.error import HTTPError
|
|
9
|
+
|
|
10
|
+
#! Load SimSIMD before the USearch compiled module
|
|
11
|
+
#! We can't just use the `import simsimd` as on Linux and Windows (unlike MacOS),
|
|
12
|
+
#! the symbols are not automatically loaded into the global namespace.
|
|
13
|
+
try:
|
|
14
|
+
import simsimd
|
|
15
|
+
|
|
16
|
+
# Cross-platform check for Windows
|
|
17
|
+
if sys.platform == "win32":
|
|
18
|
+
# Add the directory where the `.dll` is located
|
|
19
|
+
dll_directory = os.path.dirname(simsimd.__file__)
|
|
20
|
+
os.add_dll_directory(dll_directory)
|
|
21
|
+
|
|
22
|
+
# Load SimSIMD library using `ctypes` without `RTLD_GLOBAL`
|
|
23
|
+
simsimd_lib = ctypes.CDLL(simsimd.__file__)
|
|
24
|
+
|
|
25
|
+
else:
|
|
26
|
+
# Non-Windows: Use `RTLD_GLOBAL` for Unix-based systems (Linux/macOS)
|
|
27
|
+
simsimd_lib = ctypes.CDLL(simsimd.__file__, mode=ctypes.RTLD_GLOBAL)
|
|
28
|
+
|
|
29
|
+
except ImportError:
|
|
30
|
+
pass # If the user doesn't want SimSIMD, we assume they know what they're doing
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
from usearch.compiled import (
|
|
34
|
+
VERSION_MAJOR,
|
|
35
|
+
VERSION_MINOR,
|
|
36
|
+
VERSION_PATCH,
|
|
37
|
+
# Default values:
|
|
38
|
+
DEFAULT_CONNECTIVITY,
|
|
39
|
+
DEFAULT_EXPANSION_ADD,
|
|
40
|
+
DEFAULT_EXPANSION_SEARCH,
|
|
41
|
+
# Dependencies:
|
|
42
|
+
USES_OPENMP,
|
|
43
|
+
USES_FP16LIB,
|
|
44
|
+
USES_SIMSIMD,
|
|
45
|
+
USES_SIMSIMD_DYNAMIC_DISPATCH,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
__version__ = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class BinaryManager:
|
|
52
|
+
def __init__(self, version: Optional[str] = None):
|
|
53
|
+
if version is None:
|
|
54
|
+
version = __version__
|
|
55
|
+
self.version = version or __version__
|
|
56
|
+
|
|
57
|
+
@staticmethod
|
|
58
|
+
def determine_download_dir():
|
|
59
|
+
# Check if running within a virtual environment
|
|
60
|
+
virtual_env = os.getenv("VIRTUAL_ENV")
|
|
61
|
+
if virtual_env:
|
|
62
|
+
# Use a subdirectory within the virtual environment for binaries
|
|
63
|
+
return os.path.join(virtual_env, "bin", "usearch_binaries")
|
|
64
|
+
else:
|
|
65
|
+
# Fallback to a directory in the user's home folder
|
|
66
|
+
home_dir = os.path.expanduser("~")
|
|
67
|
+
return os.path.join(home_dir, ".usearch", "binaries")
|
|
68
|
+
|
|
69
|
+
@staticmethod
|
|
70
|
+
def determine_download_url(version: str, filename: str) -> str:
|
|
71
|
+
base_url = "https://github.com/unum-cloud/usearch/releases/download"
|
|
72
|
+
url = f"{base_url}/v{version}/{filename}"
|
|
73
|
+
return url
|
|
74
|
+
|
|
75
|
+
def get_binary_name(self) -> Tuple[str, str]:
|
|
76
|
+
version = self.version
|
|
77
|
+
os_map = {"Linux": "linux", "Windows": "windows", "Darwin": "macos"}
|
|
78
|
+
arch_map = {
|
|
79
|
+
"x86_64": "amd64" if platform.system() != "Darwin" else "x86_64",
|
|
80
|
+
"AMD64": "amd64",
|
|
81
|
+
"arm64": "arm64",
|
|
82
|
+
"aarch64": "arm64",
|
|
83
|
+
"x86": "x86",
|
|
84
|
+
}
|
|
85
|
+
os_part = os_map.get(platform.system(), "")
|
|
86
|
+
arch = platform.machine()
|
|
87
|
+
arch_part = arch_map.get(arch, "")
|
|
88
|
+
extension = {"Linux": "so", "Windows": "dll", "Darwin": "dylib"}.get(platform.system(), "")
|
|
89
|
+
source_filename = f"usearch_sqlite_{os_part}_{arch_part}_{version}.{extension}"
|
|
90
|
+
target_filename = f"usearch_sqlite.{extension}"
|
|
91
|
+
return source_filename, target_filename
|
|
92
|
+
|
|
93
|
+
def sqlite_found_or_downloaded(self) -> Optional[str]:
|
|
94
|
+
"""
|
|
95
|
+
Attempts to locate the pre-installed `usearch_sqlite` binary.
|
|
96
|
+
If not found, downloads it from GitHub.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
The path to the binary if found or downloaded, otherwise None.
|
|
100
|
+
"""
|
|
101
|
+
# Search local directories
|
|
102
|
+
local_dirs = ["build", "build_artifacts", "build_release", "build_debug"]
|
|
103
|
+
source_filename, target_filename = self.get_binary_name()
|
|
104
|
+
|
|
105
|
+
# Check local development directories first
|
|
106
|
+
for local_dir in local_dirs:
|
|
107
|
+
|
|
108
|
+
local_path = os.path.join(local_dir, target_filename)
|
|
109
|
+
if os.path.exists(local_path):
|
|
110
|
+
path_wout_extension, _, _ = local_path.rpartition(".")
|
|
111
|
+
return path_wout_extension
|
|
112
|
+
|
|
113
|
+
# Most build systems on POSIX would prefix the library name with "lib"
|
|
114
|
+
local_path = os.path.join(local_dir, "lib" + target_filename)
|
|
115
|
+
if os.path.exists(local_path):
|
|
116
|
+
path_wout_extension, _, _ = local_path.rpartition(".")
|
|
117
|
+
return path_wout_extension
|
|
118
|
+
|
|
119
|
+
# Check local installation directories, in case the build is already installed
|
|
120
|
+
download_dir = self.determine_download_dir()
|
|
121
|
+
local_path = os.path.join(download_dir, target_filename)
|
|
122
|
+
if not os.path.exists(local_path):
|
|
123
|
+
|
|
124
|
+
# If not found locally, warn the user and download from GitHub
|
|
125
|
+
warnings.warn("Will download `usearch_sqlite` binary from GitHub.", UserWarning)
|
|
126
|
+
try:
|
|
127
|
+
source_url = self.determine_download_url(self.version, source_filename)
|
|
128
|
+
os.makedirs(download_dir, exist_ok=True)
|
|
129
|
+
urllib.request.urlretrieve(source_url, local_path)
|
|
130
|
+
except HTTPError as e:
|
|
131
|
+
# If the download fails due to HTTPError (e.g., 404 Not Found), like a missing lib version
|
|
132
|
+
if e.code == 404:
|
|
133
|
+
warnings.warn(f"Download failed: {e.url} could not be found.", UserWarning)
|
|
134
|
+
else:
|
|
135
|
+
warnings.warn(f"Download failed with HTTP error: {e.code} {e.reason}", UserWarning)
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
# Handle the case where binary_path does not exist after supposed successful download
|
|
139
|
+
if os.path.exists(local_path):
|
|
140
|
+
path_wout_extension, _, _ = local_path.rpartition(".")
|
|
141
|
+
return path_wout_extension
|
|
142
|
+
else:
|
|
143
|
+
warnings.warn("Failed to download `usearch_sqlite` binary from GitHub.", UserWarning)
|
|
144
|
+
return None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def sqlite_path(version: str = None) -> str:
|
|
148
|
+
manager = BinaryManager(version=version)
|
|
149
|
+
result = manager.sqlite_found_or_downloaded()
|
|
150
|
+
if result is None:
|
|
151
|
+
raise FileNotFoundError("Failed to find or download `usearch_sqlite` binary.")
|
|
152
|
+
return result
|
usearch/client.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from typing import Union, Optional, List
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
from ucall.client import Client
|
|
5
|
+
|
|
6
|
+
from usearch.index import Matches
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _vector_to_ascii(vector: np.ndarray) -> Optional[str]:
|
|
10
|
+
if vector.dtype != np.int8 and vector.dtype != np.uint8 and vector.dtype != np.byte:
|
|
11
|
+
return None
|
|
12
|
+
if not np.all((vector >= 0) | (vector <= 100)):
|
|
13
|
+
return None
|
|
14
|
+
|
|
15
|
+
# Let's map [0, 100] to the range from [23, 123],
|
|
16
|
+
# poking 60 and replacing with the 124.
|
|
17
|
+
vector += 23
|
|
18
|
+
vector[vector == 60] = 124
|
|
19
|
+
ascii_vector = str(vector)
|
|
20
|
+
return ascii_vector
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class IndexClient:
|
|
24
|
+
def __init__(self, uri: str = "127.0.0.1", port: int = 8545, use_http: bool = True) -> None:
|
|
25
|
+
self.client = Client(uri=uri, port=port, use_http=use_http)
|
|
26
|
+
|
|
27
|
+
def add_one(self, key: int, vector: np.ndarray):
|
|
28
|
+
assert isinstance(key, int)
|
|
29
|
+
assert isinstance(vector, np.ndarray)
|
|
30
|
+
vector = vector.flatten()
|
|
31
|
+
ascii_vector = _vector_to_ascii(vector)
|
|
32
|
+
if ascii_vector:
|
|
33
|
+
self.client.add_ascii(key=key, string=ascii_vector)
|
|
34
|
+
else:
|
|
35
|
+
self.client.add_one(key=key, vectors=vector)
|
|
36
|
+
|
|
37
|
+
def add_many(self, keys: np.ndarray, vectors: np.ndarray):
|
|
38
|
+
assert isinstance(keys, int)
|
|
39
|
+
assert isinstance(vectors, np.ndarray)
|
|
40
|
+
assert keys.ndim == 1 and vectors.ndim == 2
|
|
41
|
+
assert keys.shape[0] == vectors.shape[0]
|
|
42
|
+
self.client.add_many(keys=keys, vectors=vectors)
|
|
43
|
+
|
|
44
|
+
def add(self, keys: Union[np.ndarray, int], vectors: np.ndarray):
|
|
45
|
+
if isinstance(keys, int) or len(keys) == 1:
|
|
46
|
+
return self.add_one(keys, vectors)
|
|
47
|
+
else:
|
|
48
|
+
return self.add_many(keys, vectors)
|
|
49
|
+
|
|
50
|
+
def search_one(self, vector: np.ndarray, count: int) -> Matches:
|
|
51
|
+
matches: List[dict] = []
|
|
52
|
+
vector = vector.flatten()
|
|
53
|
+
ascii_vector = _vector_to_ascii(vector)
|
|
54
|
+
if ascii_vector:
|
|
55
|
+
matches = self.client.search_ascii(string=ascii_vector, count=count)
|
|
56
|
+
else:
|
|
57
|
+
matches = self.client.search_one(vector=vector, count=count)
|
|
58
|
+
|
|
59
|
+
print(matches.data)
|
|
60
|
+
matches = matches.json
|
|
61
|
+
|
|
62
|
+
keys = np.array((1, count), dtype=np.uint32)
|
|
63
|
+
distances = np.array((1, count), dtype=np.float32)
|
|
64
|
+
counts = np.array((1), dtype=np.uint32)
|
|
65
|
+
for col, result in enumerate(matches):
|
|
66
|
+
keys[0, col] = result["key"]
|
|
67
|
+
distances[0, col] = result["distance"]
|
|
68
|
+
counts[0] = len(matches)
|
|
69
|
+
|
|
70
|
+
return keys, distances, counts
|
|
71
|
+
|
|
72
|
+
def search_many(self, vectors: np.ndarray, count: int) -> Matches:
|
|
73
|
+
batch_size: int = vectors.shape[0]
|
|
74
|
+
list_of_matches: List[List[dict]] = self.client.search_many(vectors=vectors, count=count)
|
|
75
|
+
|
|
76
|
+
keys = np.array((batch_size, count), dtype=np.uint32)
|
|
77
|
+
distances = np.array((batch_size, count), dtype=np.float32)
|
|
78
|
+
counts = np.array((batch_size), dtype=np.uint32)
|
|
79
|
+
for row, matches in enumerate(list_of_matches):
|
|
80
|
+
for col, result in enumerate(matches):
|
|
81
|
+
keys[row, col] = result["key"]
|
|
82
|
+
distances[row, col] = result["distance"]
|
|
83
|
+
counts[row] = len(results)
|
|
84
|
+
|
|
85
|
+
return keys, distances, counts
|
|
86
|
+
|
|
87
|
+
def search(self, vectors: np.ndarray, count: int) -> Matches:
|
|
88
|
+
if vectors.ndim == 1 or (vectors.ndim == 2 and vectors.shape[0] == 1):
|
|
89
|
+
return self.search_one(vectors, count)
|
|
90
|
+
else:
|
|
91
|
+
return self.search_many(vectors, count)
|
|
92
|
+
|
|
93
|
+
def __len__(self):
|
|
94
|
+
return self.client.size().json()
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def ndim(self):
|
|
98
|
+
return self.client.ndim().json()
|
|
99
|
+
|
|
100
|
+
def capacity(self):
|
|
101
|
+
return self.client.capacity().json()
|
|
102
|
+
|
|
103
|
+
def connectivity(self):
|
|
104
|
+
return self.client.connectivity().json()
|
|
105
|
+
|
|
106
|
+
def load(self, path: str):
|
|
107
|
+
raise NotImplementedError()
|
|
108
|
+
|
|
109
|
+
def view(self, path: str):
|
|
110
|
+
raise NotImplementedError()
|
|
111
|
+
|
|
112
|
+
def save(self, path: str):
|
|
113
|
+
raise NotImplementedError()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
if __name__ == "__main__":
|
|
117
|
+
index = IndexClient()
|
|
118
|
+
index.add(42, np.array([0.4] * 256, dtype=np.float32))
|
|
119
|
+
results = index.search(np.array([0.4] * 256, dtype=np.float32), 10)
|
|
120
|
+
print(results)
|
|
Binary file
|