usearch 2.23.0__cp314-cp314t-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
usearch/__init__.py ADDED
@@ -0,0 +1,152 @@
1
+ import os
2
+ import sys
3
+ import ctypes
4
+ import platform
5
+ import warnings
6
+ import urllib.request
7
+ from typing import Optional, Tuple
8
+ from urllib.error import HTTPError
9
+
10
+ #! Load SimSIMD before the USearch compiled module
11
+ #! We can't just use the `import simsimd` as on Linux and Windows (unlike MacOS),
12
+ #! the symbols are not automatically loaded into the global namespace.
13
+ try:
14
+ import simsimd
15
+
16
+ # Cross-platform check for Windows
17
+ if sys.platform == "win32":
18
+ # Add the directory where the `.dll` is located
19
+ dll_directory = os.path.dirname(simsimd.__file__)
20
+ os.add_dll_directory(dll_directory)
21
+
22
+ # Load SimSIMD library using `ctypes` without `RTLD_GLOBAL`
23
+ simsimd_lib = ctypes.CDLL(simsimd.__file__)
24
+
25
+ else:
26
+ # Non-Windows: Use `RTLD_GLOBAL` for Unix-based systems (Linux/macOS)
27
+ simsimd_lib = ctypes.CDLL(simsimd.__file__, mode=ctypes.RTLD_GLOBAL)
28
+
29
+ except ImportError:
30
+ pass # If the user doesn't want SimSIMD, we assume they know what they're doing
31
+
32
+
33
+ from usearch.compiled import (
34
+ VERSION_MAJOR,
35
+ VERSION_MINOR,
36
+ VERSION_PATCH,
37
+ # Default values:
38
+ DEFAULT_CONNECTIVITY,
39
+ DEFAULT_EXPANSION_ADD,
40
+ DEFAULT_EXPANSION_SEARCH,
41
+ # Dependencies:
42
+ USES_OPENMP,
43
+ USES_FP16LIB,
44
+ USES_SIMSIMD,
45
+ USES_SIMSIMD_DYNAMIC_DISPATCH,
46
+ )
47
+
48
+ __version__ = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}"
49
+
50
+
51
+ class BinaryManager:
52
+ def __init__(self, version: Optional[str] = None):
53
+ if version is None:
54
+ version = __version__
55
+ self.version = version or __version__
56
+
57
+ @staticmethod
58
+ def determine_download_dir():
59
+ # Check if running within a virtual environment
60
+ virtual_env = os.getenv("VIRTUAL_ENV")
61
+ if virtual_env:
62
+ # Use a subdirectory within the virtual environment for binaries
63
+ return os.path.join(virtual_env, "bin", "usearch_binaries")
64
+ else:
65
+ # Fallback to a directory in the user's home folder
66
+ home_dir = os.path.expanduser("~")
67
+ return os.path.join(home_dir, ".usearch", "binaries")
68
+
69
+ @staticmethod
70
+ def determine_download_url(version: str, filename: str) -> str:
71
+ base_url = "https://github.com/unum-cloud/usearch/releases/download"
72
+ url = f"{base_url}/v{version}/{filename}"
73
+ return url
74
+
75
+ def get_binary_name(self) -> Tuple[str, str]:
76
+ version = self.version
77
+ os_map = {"Linux": "linux", "Windows": "windows", "Darwin": "macos"}
78
+ arch_map = {
79
+ "x86_64": "amd64" if platform.system() != "Darwin" else "x86_64",
80
+ "AMD64": "amd64",
81
+ "arm64": "arm64",
82
+ "aarch64": "arm64",
83
+ "x86": "x86",
84
+ }
85
+ os_part = os_map.get(platform.system(), "")
86
+ arch = platform.machine()
87
+ arch_part = arch_map.get(arch, "")
88
+ extension = {"Linux": "so", "Windows": "dll", "Darwin": "dylib"}.get(platform.system(), "")
89
+ source_filename = f"usearch_sqlite_{os_part}_{arch_part}_{version}.{extension}"
90
+ target_filename = f"usearch_sqlite.{extension}"
91
+ return source_filename, target_filename
92
+
93
+ def sqlite_found_or_downloaded(self) -> Optional[str]:
94
+ """
95
+ Attempts to locate the pre-installed `usearch_sqlite` binary.
96
+ If not found, downloads it from GitHub.
97
+
98
+ Returns:
99
+ The path to the binary if found or downloaded, otherwise None.
100
+ """
101
+ # Search local directories
102
+ local_dirs = ["build", "build_artifacts", "build_release", "build_debug"]
103
+ source_filename, target_filename = self.get_binary_name()
104
+
105
+ # Check local development directories first
106
+ for local_dir in local_dirs:
107
+
108
+ local_path = os.path.join(local_dir, target_filename)
109
+ if os.path.exists(local_path):
110
+ path_wout_extension, _, _ = local_path.rpartition(".")
111
+ return path_wout_extension
112
+
113
+ # Most build systems on POSIX would prefix the library name with "lib"
114
+ local_path = os.path.join(local_dir, "lib" + target_filename)
115
+ if os.path.exists(local_path):
116
+ path_wout_extension, _, _ = local_path.rpartition(".")
117
+ return path_wout_extension
118
+
119
+ # Check local installation directories, in case the build is already installed
120
+ download_dir = self.determine_download_dir()
121
+ local_path = os.path.join(download_dir, target_filename)
122
+ if not os.path.exists(local_path):
123
+
124
+ # If not found locally, warn the user and download from GitHub
125
+ warnings.warn("Will download `usearch_sqlite` binary from GitHub.", UserWarning)
126
+ try:
127
+ source_url = self.determine_download_url(self.version, source_filename)
128
+ os.makedirs(download_dir, exist_ok=True)
129
+ urllib.request.urlretrieve(source_url, local_path)
130
+ except HTTPError as e:
131
+ # If the download fails due to HTTPError (e.g., 404 Not Found), like a missing lib version
132
+ if e.code == 404:
133
+ warnings.warn(f"Download failed: {e.url} could not be found.", UserWarning)
134
+ else:
135
+ warnings.warn(f"Download failed with HTTP error: {e.code} {e.reason}", UserWarning)
136
+ return None
137
+
138
+ # Handle the case where binary_path does not exist after supposed successful download
139
+ if os.path.exists(local_path):
140
+ path_wout_extension, _, _ = local_path.rpartition(".")
141
+ return path_wout_extension
142
+ else:
143
+ warnings.warn("Failed to download `usearch_sqlite` binary from GitHub.", UserWarning)
144
+ return None
145
+
146
+
147
+ def sqlite_path(version: str = None) -> str:
148
+ manager = BinaryManager(version=version)
149
+ result = manager.sqlite_found_or_downloaded()
150
+ if result is None:
151
+ raise FileNotFoundError("Failed to find or download `usearch_sqlite` binary.")
152
+ return result
usearch/client.py ADDED
@@ -0,0 +1,120 @@
1
+ from typing import Union, Optional, List
2
+
3
+ import numpy as np
4
+ from ucall.client import Client
5
+
6
+ from usearch.index import Matches
7
+
8
+
9
+ def _vector_to_ascii(vector: np.ndarray) -> Optional[str]:
10
+ if vector.dtype != np.int8 and vector.dtype != np.uint8 and vector.dtype != np.byte:
11
+ return None
12
+ if not np.all((vector >= 0) | (vector <= 100)):
13
+ return None
14
+
15
+ # Let's map [0, 100] to the range from [23, 123],
16
+ # poking 60 and replacing with the 124.
17
+ vector += 23
18
+ vector[vector == 60] = 124
19
+ ascii_vector = str(vector)
20
+ return ascii_vector
21
+
22
+
23
+ class IndexClient:
24
+ def __init__(self, uri: str = "127.0.0.1", port: int = 8545, use_http: bool = True) -> None:
25
+ self.client = Client(uri=uri, port=port, use_http=use_http)
26
+
27
+ def add_one(self, key: int, vector: np.ndarray):
28
+ assert isinstance(key, int)
29
+ assert isinstance(vector, np.ndarray)
30
+ vector = vector.flatten()
31
+ ascii_vector = _vector_to_ascii(vector)
32
+ if ascii_vector:
33
+ self.client.add_ascii(key=key, string=ascii_vector)
34
+ else:
35
+ self.client.add_one(key=key, vectors=vector)
36
+
37
+ def add_many(self, keys: np.ndarray, vectors: np.ndarray):
38
+ assert isinstance(keys, int)
39
+ assert isinstance(vectors, np.ndarray)
40
+ assert keys.ndim == 1 and vectors.ndim == 2
41
+ assert keys.shape[0] == vectors.shape[0]
42
+ self.client.add_many(keys=keys, vectors=vectors)
43
+
44
+ def add(self, keys: Union[np.ndarray, int], vectors: np.ndarray):
45
+ if isinstance(keys, int) or len(keys) == 1:
46
+ return self.add_one(keys, vectors)
47
+ else:
48
+ return self.add_many(keys, vectors)
49
+
50
+ def search_one(self, vector: np.ndarray, count: int) -> Matches:
51
+ matches: List[dict] = []
52
+ vector = vector.flatten()
53
+ ascii_vector = _vector_to_ascii(vector)
54
+ if ascii_vector:
55
+ matches = self.client.search_ascii(string=ascii_vector, count=count)
56
+ else:
57
+ matches = self.client.search_one(vector=vector, count=count)
58
+
59
+ print(matches.data)
60
+ matches = matches.json
61
+
62
+ keys = np.array((1, count), dtype=np.uint32)
63
+ distances = np.array((1, count), dtype=np.float32)
64
+ counts = np.array((1), dtype=np.uint32)
65
+ for col, result in enumerate(matches):
66
+ keys[0, col] = result["key"]
67
+ distances[0, col] = result["distance"]
68
+ counts[0] = len(matches)
69
+
70
+ return keys, distances, counts
71
+
72
+ def search_many(self, vectors: np.ndarray, count: int) -> Matches:
73
+ batch_size: int = vectors.shape[0]
74
+ list_of_matches: List[List[dict]] = self.client.search_many(vectors=vectors, count=count)
75
+
76
+ keys = np.array((batch_size, count), dtype=np.uint32)
77
+ distances = np.array((batch_size, count), dtype=np.float32)
78
+ counts = np.array((batch_size), dtype=np.uint32)
79
+ for row, matches in enumerate(list_of_matches):
80
+ for col, result in enumerate(matches):
81
+ keys[row, col] = result["key"]
82
+ distances[row, col] = result["distance"]
83
+ counts[row] = len(results)
84
+
85
+ return keys, distances, counts
86
+
87
+ def search(self, vectors: np.ndarray, count: int) -> Matches:
88
+ if vectors.ndim == 1 or (vectors.ndim == 2 and vectors.shape[0] == 1):
89
+ return self.search_one(vectors, count)
90
+ else:
91
+ return self.search_many(vectors, count)
92
+
93
+ def __len__(self):
94
+ return self.client.size().json()
95
+
96
+ @property
97
+ def ndim(self):
98
+ return self.client.ndim().json()
99
+
100
+ def capacity(self):
101
+ return self.client.capacity().json()
102
+
103
+ def connectivity(self):
104
+ return self.client.connectivity().json()
105
+
106
+ def load(self, path: str):
107
+ raise NotImplementedError()
108
+
109
+ def view(self, path: str):
110
+ raise NotImplementedError()
111
+
112
+ def save(self, path: str):
113
+ raise NotImplementedError()
114
+
115
+
116
+ if __name__ == "__main__":
117
+ index = IndexClient()
118
+ index.add(42, np.array([0.4] * 256, dtype=np.float32))
119
+ results = index.search(np.array([0.4] * 256, dtype=np.float32), 10)
120
+ print(results)
Binary file