zvec 0.3.0__cp314-cp314-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- _zvec.cp314-win_amd64.pyd +0 -0
- bin/zvec_c_api.dll +0 -0
- include/include/zvec/ailego/buffer/buffer_manager.h +263 -0
- include/include/zvec/ailego/buffer/buffer_pool.h +173 -0
- include/include/zvec/ailego/buffer/concurrentqueue.h +4410 -0
- include/include/zvec/ailego/container/blob.h +131 -0
- include/include/zvec/ailego/container/cube.h +414 -0
- include/include/zvec/ailego/container/heap.h +234 -0
- include/include/zvec/ailego/container/hypercube.h +268 -0
- include/include/zvec/ailego/container/params.h +776 -0
- include/include/zvec/ailego/container/vector.h +1055 -0
- include/include/zvec/ailego/encoding/json/mod_json.h +1382 -0
- include/include/zvec/ailego/encoding/json/mod_json_plus.h +3446 -0
- include/include/zvec/ailego/encoding/json.h +17 -0
- include/include/zvec/ailego/hash/crc32c.h +35 -0
- include/include/zvec/ailego/hash/jump_hash.h +34 -0
- include/include/zvec/ailego/internal/platform.h +516 -0
- include/include/zvec/ailego/io/file.h +310 -0
- include/include/zvec/ailego/io/mmap_file.h +256 -0
- include/include/zvec/ailego/logger/logger.h +184 -0
- include/include/zvec/ailego/math_batch/utils.h +22 -0
- include/include/zvec/ailego/parallel/thread_pool.h +402 -0
- include/include/zvec/ailego/parallel/thread_queue.h +291 -0
- include/include/zvec/ailego/pattern/closure.h +530 -0
- include/include/zvec/ailego/pattern/expected.hpp +2605 -0
- include/include/zvec/ailego/pattern/factory.h +172 -0
- include/include/zvec/ailego/pattern/singleton.h +51 -0
- include/include/zvec/ailego/string/string_concat_helper.h +120 -0
- include/include/zvec/ailego/string/string_view.h +65 -0
- include/include/zvec/ailego/utility/file_helper.h +98 -0
- include/include/zvec/ailego/utility/float_helper.h +237 -0
- include/include/zvec/ailego/utility/string_helper.h +321 -0
- include/include/zvec/ailego/utility/string_helper_impl.h +173 -0
- include/include/zvec/ailego/utility/time_helper.h +206 -0
- include/include/zvec/ailego/utility/type_helper.h +128 -0
- include/include/zvec/c_api.h +3251 -0
- include/include/zvec/core/framework/index_builder.h +61 -0
- include/include/zvec/core/framework/index_bundle.h +275 -0
- include/include/zvec/core/framework/index_cluster.h +291 -0
- include/include/zvec/core/framework/index_context.h +265 -0
- include/include/zvec/core/framework/index_converter.h +231 -0
- include/include/zvec/core/framework/index_document.h +317 -0
- include/include/zvec/core/framework/index_dumper.h +163 -0
- include/include/zvec/core/framework/index_error.h +170 -0
- include/include/zvec/core/framework/index_factory.h +287 -0
- include/include/zvec/core/framework/index_features.h +604 -0
- include/include/zvec/core/framework/index_filter.h +74 -0
- include/include/zvec/core/framework/index_flow.h +647 -0
- include/include/zvec/core/framework/index_format.h +185 -0
- include/include/zvec/core/framework/index_framework.h +32 -0
- include/include/zvec/core/framework/index_groupby.h +54 -0
- include/include/zvec/core/framework/index_helper.h +60 -0
- include/include/zvec/core/framework/index_holder.h +1777 -0
- include/include/zvec/core/framework/index_logger.h +169 -0
- include/include/zvec/core/framework/index_mapping.h +217 -0
- include/include/zvec/core/framework/index_memory.h +266 -0
- include/include/zvec/core/framework/index_meta.h +714 -0
- include/include/zvec/core/framework/index_metric.h +143 -0
- include/include/zvec/core/framework/index_module.h +67 -0
- include/include/zvec/core/framework/index_packer.h +223 -0
- include/include/zvec/core/framework/index_plugin.h +117 -0
- include/include/zvec/core/framework/index_provider.h +470 -0
- include/include/zvec/core/framework/index_reducer.h +229 -0
- include/include/zvec/core/framework/index_refiner.h +92 -0
- include/include/zvec/core/framework/index_reformer.h +134 -0
- include/include/zvec/core/framework/index_runner.h +743 -0
- include/include/zvec/core/framework/index_searcher.h +57 -0
- include/include/zvec/core/framework/index_segment_storage.h +236 -0
- include/include/zvec/core/framework/index_stats.h +69 -0
- include/include/zvec/core/framework/index_storage.h +270 -0
- include/include/zvec/core/framework/index_streamer.h +55 -0
- include/include/zvec/core/framework/index_threads.h +168 -0
- include/include/zvec/core/framework/index_trainer.h +112 -0
- include/include/zvec/core/framework/index_unpacker.h +317 -0
- include/include/zvec/core/framework/index_version.h +31 -0
- include/include/zvec/core/interface/constants.h +30 -0
- include/include/zvec/core/interface/index.h +320 -0
- include/include/zvec/core/interface/index_factory.h +54 -0
- include/include/zvec/core/interface/index_param.h +365 -0
- include/include/zvec/core/interface/index_param_builders.h +410 -0
- include/include/zvec/db/collection.h +108 -0
- include/include/zvec/db/config.h +177 -0
- include/include/zvec/db/doc.h +407 -0
- include/include/zvec/db/index_params.h +431 -0
- include/include/zvec/db/options.h +69 -0
- include/include/zvec/db/query_params.h +175 -0
- include/include/zvec/db/schema.h +401 -0
- include/include/zvec/db/stats.h +35 -0
- include/include/zvec/db/status.h +181 -0
- include/include/zvec/db/type.h +142 -0
- include/include/zvec/turbo/turbo.h +55 -0
- include/zvec/c_api.h +3251 -0
- lib/zvec_ailego.lib +0 -0
- lib/zvec_c_api.lib +0 -0
- lib/zvec_core.lib +0 -0
- lib/zvec_db.lib +0 -0
- lib/zvec_turbo.lib +0 -0
- zvec/__init__.py +168 -0
- zvec/__init__.pyi +175 -0
- zvec/common/__init__.py +18 -0
- zvec/common/constants.py +33 -0
- zvec/executor/__init__.py +26 -0
- zvec/executor/query_executor.py +307 -0
- zvec/extension/__init__.py +55 -0
- zvec/extension/bm25_embedding_function.py +375 -0
- zvec/extension/embedding_function.py +147 -0
- zvec/extension/http_embedding_function.py +162 -0
- zvec/extension/jina_embedding_function.py +240 -0
- zvec/extension/jina_function.py +182 -0
- zvec/extension/multi_vector_reranker.py +174 -0
- zvec/extension/openai_embedding_function.py +238 -0
- zvec/extension/openai_function.py +149 -0
- zvec/extension/qwen_embedding_function.py +537 -0
- zvec/extension/qwen_function.py +186 -0
- zvec/extension/qwen_rerank_function.py +162 -0
- zvec/extension/rerank_function.py +69 -0
- zvec/extension/sentence_transformer_embedding_function.py +839 -0
- zvec/extension/sentence_transformer_function.py +150 -0
- zvec/extension/sentence_transformer_rerank_function.py +384 -0
- zvec/model/__init__.py +22 -0
- zvec/model/collection.py +421 -0
- zvec/model/convert.py +54 -0
- zvec/model/doc.py +173 -0
- zvec/model/param/__init__.py +46 -0
- zvec/model/param/__init__.pyi +823 -0
- zvec/model/param/vector_query.py +80 -0
- zvec/model/schema/__init__.py +21 -0
- zvec/model/schema/__init__.pyi +109 -0
- zvec/model/schema/collection_schema.py +215 -0
- zvec/model/schema/field_schema.py +303 -0
- zvec/py.typed +0 -0
- zvec/tool/__init__.py +18 -0
- zvec/tool/util.py +63 -0
- zvec/typing/__init__.py +32 -0
- zvec/typing/__init__.pyi +404 -0
- zvec/typing/enum.py +62 -0
- zvec/zvec.py +226 -0
- zvec-0.3.0.dist-info/METADATA +184 -0
- zvec-0.3.0.dist-info/RECORD +141 -0
- zvec-0.3.0.dist-info/WHEEL +5 -0
- zvec-0.3.0.dist-info/licenses/LICENSE +201 -0
|
Binary file
|
bin/zvec_c_api.dll
ADDED
|
Binary file
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
// Copyright 2025-present the zvec project
|
|
2
|
+
//
|
|
3
|
+
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
// you may not use this file except in compliance with the License.
|
|
5
|
+
// You may obtain a copy of the License at
|
|
6
|
+
//
|
|
7
|
+
// http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
//
|
|
9
|
+
// Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
// See the License for the specific language governing permissions and
|
|
13
|
+
// limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
#pragma once
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
#include <sys/stat.h>
|
|
20
|
+
#include <chrono>
|
|
21
|
+
#include <cstdint>
|
|
22
|
+
#include <filesystem>
|
|
23
|
+
#include <memory>
|
|
24
|
+
#include <vector>
|
|
25
|
+
#include <zvec/ailego/io/file.h>
|
|
26
|
+
#include <zvec/ailego/pattern/singleton.h>
|
|
27
|
+
|
|
28
|
+
namespace arrow {
|
|
29
|
+
class ChunkedArray;
|
|
30
|
+
class Array;
|
|
31
|
+
class DataType;
|
|
32
|
+
class Scalar;
|
|
33
|
+
template <typename T>
|
|
34
|
+
class Result;
|
|
35
|
+
class Status;
|
|
36
|
+
class Buffer;
|
|
37
|
+
} // namespace arrow
|
|
38
|
+
|
|
39
|
+
namespace zvec {
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
namespace ailego {
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
struct BufferID;
|
|
46
|
+
class BufferManager;
|
|
47
|
+
class BufferHandle;
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
struct BufferID {
|
|
51
|
+
struct ParquetPos {
|
|
52
|
+
int column;
|
|
53
|
+
int row_group;
|
|
54
|
+
};
|
|
55
|
+
struct VectorPos {
|
|
56
|
+
uint32_t offset;
|
|
57
|
+
uint32_t length;
|
|
58
|
+
};
|
|
59
|
+
union Position {
|
|
60
|
+
explicit Position() = default;
|
|
61
|
+
ParquetPos forward;
|
|
62
|
+
VectorPos vector;
|
|
63
|
+
};
|
|
64
|
+
enum TYPE {
|
|
65
|
+
PARQUET = 1,
|
|
66
|
+
VECTOR = 2,
|
|
67
|
+
UNKNOWN = 0,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
static std::uint64_t getLastModifiedNs(const std::filesystem::path &p) {
|
|
72
|
+
auto ftime = std::filesystem::last_write_time(p);
|
|
73
|
+
return static_cast<std::uint64_t>(ftime.time_since_epoch().count());
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Cross-platform helper to get nanosecond modification time
|
|
77
|
+
// static long get_st_mtime_nsec(const struct stat &file_stat) {
|
|
78
|
+
// #ifdef __APPLE__
|
|
79
|
+
// return file_stat.st_mtim.tv_nsec;
|
|
80
|
+
// #else
|
|
81
|
+
// return file_stat.st_mtim.tv_nsec;
|
|
82
|
+
// #endif
|
|
83
|
+
// }
|
|
84
|
+
|
|
85
|
+
static BufferID ParquetID(const std::string &file_name, int column,
|
|
86
|
+
int row_group) {
|
|
87
|
+
BufferID buffer_id{};
|
|
88
|
+
buffer_id.type = TYPE::PARQUET;
|
|
89
|
+
buffer_id.file_name = file_name;
|
|
90
|
+
buffer_id.pos.forward.column = column;
|
|
91
|
+
buffer_id.pos.forward.row_group = row_group;
|
|
92
|
+
struct stat file_stat;
|
|
93
|
+
if (stat(file_name.c_str(), &file_stat) == 0) {
|
|
94
|
+
// file_stat.st_ino contains the inode number
|
|
95
|
+
// file_stat.st_dev contains the device ID
|
|
96
|
+
// Together they uniquely identify a file
|
|
97
|
+
buffer_id.file_id = file_stat.st_ino;
|
|
98
|
+
std::filesystem::path p(file_name);
|
|
99
|
+
buffer_id.mtime = getLastModifiedNs(p);
|
|
100
|
+
}
|
|
101
|
+
return buffer_id;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
static BufferID VectorID(const std::string &file_name, uint32_t offset,
|
|
105
|
+
uint32_t length) {
|
|
106
|
+
BufferID buffer_id{};
|
|
107
|
+
buffer_id.type = TYPE::VECTOR;
|
|
108
|
+
buffer_id.file_name = file_name;
|
|
109
|
+
struct stat file_stat;
|
|
110
|
+
if (stat(file_name.c_str(), &file_stat) == 0) {
|
|
111
|
+
buffer_id.file_id = file_stat.st_ino;
|
|
112
|
+
std::filesystem::path p(file_name);
|
|
113
|
+
buffer_id.mtime = getLastModifiedNs(p);
|
|
114
|
+
}
|
|
115
|
+
buffer_id.pos.vector.offset = offset;
|
|
116
|
+
buffer_id.pos.vector.length = length;
|
|
117
|
+
return buffer_id;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
explicit BufferID() = default;
|
|
121
|
+
|
|
122
|
+
// Type of the file backing this buffer
|
|
123
|
+
TYPE type{UNKNOWN};
|
|
124
|
+
|
|
125
|
+
// Name of the file backing this buffer
|
|
126
|
+
std::string file_name{};
|
|
127
|
+
|
|
128
|
+
// Unique file id
|
|
129
|
+
uint64_t file_id{};
|
|
130
|
+
|
|
131
|
+
long mtime{};
|
|
132
|
+
|
|
133
|
+
// To identify which part of the backing file should be loaded into the buffer
|
|
134
|
+
Position pos{};
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
// Get the forward ID
|
|
138
|
+
const inline struct ParquetPos &parquet() const {
|
|
139
|
+
return pos.forward;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
// Get the vector ID
|
|
144
|
+
const inline struct VectorPos &vector() const {
|
|
145
|
+
return pos.vector;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
// Get debug string
|
|
150
|
+
const std::string to_string() const {
|
|
151
|
+
std::string msg{"Buffer["};
|
|
152
|
+
if (type == TYPE::PARQUET) {
|
|
153
|
+
msg += "parquet: " + file_name + "[" + std::to_string(file_id) + "]" +
|
|
154
|
+
", column: " + std::to_string(parquet().column) +
|
|
155
|
+
", row_group: " + std::to_string(parquet().row_group);
|
|
156
|
+
} else if (type == TYPE::VECTOR) {
|
|
157
|
+
msg += "vector: " + file_name + "[" + std::to_string(file_id) + "]" +
|
|
158
|
+
", offset: " + std::to_string(vector().offset);
|
|
159
|
+
} else {
|
|
160
|
+
msg += "unknown";
|
|
161
|
+
}
|
|
162
|
+
msg += ", mtime: " + std::to_string(mtime);
|
|
163
|
+
msg += "]";
|
|
164
|
+
return msg;
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
// Thread-safe LRU buffer implementation.
|
|
170
|
+
class BufferManager : public Singleton<BufferManager> {
|
|
171
|
+
friend BufferHandle;
|
|
172
|
+
|
|
173
|
+
public:
|
|
174
|
+
void init(uint64_t limit, uint32_t num_shards = 1);
|
|
175
|
+
|
|
176
|
+
BufferHandle acquire(BufferID &buffer_id);
|
|
177
|
+
|
|
178
|
+
std::unique_ptr<BufferHandle> acquire_ptr(BufferID &buffer_id);
|
|
179
|
+
|
|
180
|
+
uint64_t total_size_in_bytes() const;
|
|
181
|
+
|
|
182
|
+
void cleanup();
|
|
183
|
+
|
|
184
|
+
~BufferManager();
|
|
185
|
+
|
|
186
|
+
private:
|
|
187
|
+
struct BufferContext;
|
|
188
|
+
|
|
189
|
+
class BufferPool;
|
|
190
|
+
|
|
191
|
+
// Custom deleter for Arrow buffer that automatically notifies us when the
|
|
192
|
+
// buffer is no longer referenced by Arrow
|
|
193
|
+
struct ArrowBufferDeleter {
|
|
194
|
+
explicit ArrowBufferDeleter(BufferContext *c);
|
|
195
|
+
BufferContext *context;
|
|
196
|
+
// Only reduces the reference count but does not actually release the
|
|
197
|
+
// buffer, since the buffer memory is managed by the BufferManager.
|
|
198
|
+
void operator()(arrow::Buffer *);
|
|
199
|
+
};
|
|
200
|
+
|
|
201
|
+
std::vector<BufferPool *> pools_;
|
|
202
|
+
};
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
class BufferHandle {
|
|
206
|
+
public:
|
|
207
|
+
typedef std::unique_ptr<BufferHandle> Pointer;
|
|
208
|
+
|
|
209
|
+
explicit BufferHandle(BufferManager::BufferContext *context = nullptr);
|
|
210
|
+
BufferHandle(const BufferHandle &) = delete;
|
|
211
|
+
BufferHandle(BufferHandle &&) = default;
|
|
212
|
+
BufferHandle &operator=(const BufferHandle &) = delete;
|
|
213
|
+
BufferHandle &operator=(BufferHandle &&) = default;
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
~BufferHandle();
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
// Pin parquet data in memory by allocating arrow buffers of appropriate size
|
|
220
|
+
// and reading data from the backing file.
|
|
221
|
+
// The lifecycle of the allocated memory is automatically managed through
|
|
222
|
+
// shared pointers. The buffers are guaranteed to be held until they are not
|
|
223
|
+
// referenced.
|
|
224
|
+
// Returns a pointer to the loaded ChunkedArray in Arrow format.
|
|
225
|
+
std::shared_ptr<arrow::ChunkedArray> pin_parquet_data();
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
// Pin vector data in memory by allocating a buffer of appropriate size and
|
|
229
|
+
// loading data from the backing file.
|
|
230
|
+
// The memory is guaranteed to be held until unpin() is called. The caller
|
|
231
|
+
// must call unpin() to release the memory when it is no longer needed.
|
|
232
|
+
// Returns a raw memory address.
|
|
233
|
+
void *pin_vector_data();
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
// Reduce the reference count for this vector buffer.
|
|
237
|
+
// Returns true if this was the last reference.
|
|
238
|
+
// When reference count is zero, the buffer is moved to the eviction list and
|
|
239
|
+
// becomes eligible for removal under memory pressure.
|
|
240
|
+
bool unpin_vector_data();
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
// Get the current reference count.
|
|
244
|
+
uint32_t references() const;
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
// Get the buffer size.
|
|
248
|
+
uint32_t size() const;
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
private:
|
|
252
|
+
using BufferContext = BufferManager::BufferContext;
|
|
253
|
+
using BufferPool = BufferManager::BufferPool;
|
|
254
|
+
|
|
255
|
+
BufferContext *context_;
|
|
256
|
+
BufferPool *pool_;
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
} // namespace ailego
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
} // namespace zvec
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <sys/stat.h>
|
|
4
|
+
#include <fcntl.h>
|
|
5
|
+
#include <atomic>
|
|
6
|
+
#include <cassert>
|
|
7
|
+
#include <cstdio>
|
|
8
|
+
#include <cstdlib>
|
|
9
|
+
#include <cstring>
|
|
10
|
+
#include <iostream>
|
|
11
|
+
#include <limits>
|
|
12
|
+
#include <map>
|
|
13
|
+
#include <memory>
|
|
14
|
+
#include <mutex>
|
|
15
|
+
#include <queue>
|
|
16
|
+
#include <stdexcept>
|
|
17
|
+
#include <string>
|
|
18
|
+
#include <unordered_map>
|
|
19
|
+
#include <zvec/ailego/internal/platform.h>
|
|
20
|
+
#include "concurrentqueue.h"
|
|
21
|
+
|
|
22
|
+
#if defined(_MSC_VER)
|
|
23
|
+
#include <io.h>
|
|
24
|
+
#endif
|
|
25
|
+
|
|
26
|
+
namespace zvec {
|
|
27
|
+
namespace ailego {
|
|
28
|
+
|
|
29
|
+
using block_id_t = size_t;
|
|
30
|
+
using version_t = size_t;
|
|
31
|
+
|
|
32
|
+
class LPMap;
|
|
33
|
+
|
|
34
|
+
class LRUCache {
|
|
35
|
+
public:
|
|
36
|
+
typedef std::pair<block_id_t, version_t> BlockType;
|
|
37
|
+
typedef moodycamel::ConcurrentQueue<BlockType> ConcurrentQueue;
|
|
38
|
+
|
|
39
|
+
int init(size_t block_size);
|
|
40
|
+
|
|
41
|
+
bool evict_single_block(BlockType &item);
|
|
42
|
+
|
|
43
|
+
bool add_single_block(const LPMap *lp_map, const BlockType &block,
|
|
44
|
+
int block_type);
|
|
45
|
+
|
|
46
|
+
void clear_dead_node(const LPMap *lp_map);
|
|
47
|
+
|
|
48
|
+
private:
|
|
49
|
+
constexpr static size_t CATCH_QUEUE_NUM = 3;
|
|
50
|
+
size_t block_size_{0};
|
|
51
|
+
std::vector<ConcurrentQueue> queues_;
|
|
52
|
+
alignas(64) std::atomic<size_t> evict_queue_insertions_{0};
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
class LPMap {
|
|
56
|
+
struct Entry {
|
|
57
|
+
alignas(64) std::atomic<int> ref_count;
|
|
58
|
+
alignas(64) std::atomic<version_t> load_count;
|
|
59
|
+
char *buffer;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
public:
|
|
63
|
+
LPMap() : entry_num_(0), entries_(nullptr) {}
|
|
64
|
+
~LPMap() {
|
|
65
|
+
delete[] entries_;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
void init(size_t entry_num);
|
|
69
|
+
|
|
70
|
+
char *acquire_block(block_id_t block_id, bool lru_mode);
|
|
71
|
+
|
|
72
|
+
void release_block(block_id_t block_id);
|
|
73
|
+
|
|
74
|
+
char *evict_block(block_id_t block_id);
|
|
75
|
+
|
|
76
|
+
char *set_block_acquired(block_id_t block_id, char *buffer);
|
|
77
|
+
|
|
78
|
+
void recycle(moodycamel::ConcurrentQueue<char *> &free_buffers);
|
|
79
|
+
|
|
80
|
+
size_t entry_num() const {
|
|
81
|
+
return entry_num_;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
inline bool isDeadBlock(LRUCache::BlockType block) const {
|
|
85
|
+
Entry &entry = entries_[block.first];
|
|
86
|
+
return block.second != entry.load_count.load();
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
private:
|
|
90
|
+
size_t entry_num_{0};
|
|
91
|
+
Entry *entries_{nullptr};
|
|
92
|
+
LRUCache cache_;
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
class VecBufferPoolHandle;
|
|
96
|
+
|
|
97
|
+
class VecBufferPool {
|
|
98
|
+
public:
|
|
99
|
+
typedef std::shared_ptr<VecBufferPool> Pointer;
|
|
100
|
+
|
|
101
|
+
VecBufferPool(const std::string &filename);
|
|
102
|
+
~VecBufferPool() {
|
|
103
|
+
// Free all buffers in the free list
|
|
104
|
+
char *buf = nullptr;
|
|
105
|
+
while (free_buffers_.try_dequeue(buf)) {
|
|
106
|
+
ailego_free(buf);
|
|
107
|
+
}
|
|
108
|
+
// Free any buffers still pinned in the map
|
|
109
|
+
for (size_t i = 0; i < lp_map_.entry_num(); ++i) {
|
|
110
|
+
char *b = lp_map_.evict_block(i);
|
|
111
|
+
if (b) ailego_free(b);
|
|
112
|
+
}
|
|
113
|
+
#if defined(_MSC_VER)
|
|
114
|
+
_close(fd_);
|
|
115
|
+
#else
|
|
116
|
+
close(fd_);
|
|
117
|
+
#endif
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
int init(size_t pool_capacity, size_t block_size, size_t segment_count);
|
|
121
|
+
|
|
122
|
+
VecBufferPoolHandle get_handle();
|
|
123
|
+
|
|
124
|
+
char *acquire_buffer(block_id_t block_id, size_t offset, size_t size,
|
|
125
|
+
int retry = 0);
|
|
126
|
+
|
|
127
|
+
int get_meta(size_t offset, size_t length, char *buffer);
|
|
128
|
+
|
|
129
|
+
size_t file_size() const {
|
|
130
|
+
return file_size_;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
bool no_lru_mode() {
|
|
134
|
+
return no_lru_mode_;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
private:
|
|
138
|
+
int fd_;
|
|
139
|
+
size_t file_size_;
|
|
140
|
+
size_t pool_capacity_;
|
|
141
|
+
bool no_lru_mode_;
|
|
142
|
+
|
|
143
|
+
public:
|
|
144
|
+
LPMap lp_map_;
|
|
145
|
+
|
|
146
|
+
private:
|
|
147
|
+
std::vector<std::unique_ptr<std::mutex>> mutex_vec_;
|
|
148
|
+
moodycamel::ConcurrentQueue<char *> free_buffers_;
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
class VecBufferPoolHandle {
|
|
152
|
+
public:
|
|
153
|
+
VecBufferPoolHandle(VecBufferPool &pool) : pool_(pool) {}
|
|
154
|
+
VecBufferPoolHandle(VecBufferPoolHandle &&other) : pool_(other.pool_) {}
|
|
155
|
+
|
|
156
|
+
~VecBufferPoolHandle() = default;
|
|
157
|
+
|
|
158
|
+
typedef std::shared_ptr<VecBufferPoolHandle> Pointer;
|
|
159
|
+
|
|
160
|
+
char *get_block(size_t offset, size_t size, size_t block_id);
|
|
161
|
+
|
|
162
|
+
int get_meta(size_t offset, size_t length, char *buffer);
|
|
163
|
+
|
|
164
|
+
void release_one(block_id_t block_id);
|
|
165
|
+
|
|
166
|
+
void acquire_one(block_id_t block_id);
|
|
167
|
+
|
|
168
|
+
private:
|
|
169
|
+
VecBufferPool &pool_;
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
} // namespace ailego
|
|
173
|
+
} // namespace zvec
|