xlr8 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,661 @@
1
+ """
2
+ XLR8 collection wrapper with PyMongo compatibility.
3
+
4
+ ================================================================================
5
+ DATA FLOW - COLLECTION WRAPPER
6
+ ================================================================================
7
+
8
+ This module wraps pymongo.collection.Collection to provide the `accelerate()`
9
+ function - the main entry point for users.
10
+
11
+ TYPICAL USAGE FLOW:
12
+ ────────────────────────────────────────────────────────────────────────────────
13
+
14
+ 1. USER WRAPS A COLLECTION:
15
+ ┌─────────────────────────────────────────────────────────────────────────────┐
16
+ │ from xlr8 import accelerate, Schema, Types │
17
+ │ │
18
+ │ schema = Schema( │
19
+ │ time_field="timestamp", │
20
+ │ fields={ │
21
+ │ "timestamp": Types.Timestamp("ms", tz="UTC"), │
22
+ │ "metadata.device_id": Types.ObjectId(), │
23
+ │ "metadata.sensor_id": Types.ObjectId(), │
24
+ │ "value": Types.Any(), # Polymorphic - can be int, float, str etc..│
25
+ │ } │
26
+ │ ) │
27
+ │ │
28
+ │ xlr8_col = accelerate(pymongo_collection, schema, mongo_uri) │
29
+ └─────────────────────────────────────────────────────────────────────────────┘
30
+
31
+ 2. USER CALLS find() - RETURNS XLR8Cursor (NOT PYMONGO CURSOR):
32
+ ┌─────────────────────────────────────────────────────────────────────────────┐
33
+ │ cursor = xlr8_col.find({ │
34
+ │ "timestamp": {"$gte": start, "$lt": end}, │
35
+ │ "metadata.device_id": ObjectId("64a..."), │
36
+ │ }) │
37
+ │ # cursor is XLR8Cursor, wrapping the query params │
38
+ └─────────────────────────────────────────────────────────────────────────────┘
39
+
40
+ 3. USER CALLS to_dataframe() - TRIGGERS ACCELERATION:
41
+ ┌─────────────────────────────────────────────────────────────────────────────┐
42
+ │ df = cursor.to_dataframe() │
43
+ │ # This triggers: │
44
+ │ # 1. Query analysis (can we chunk by time?) │
45
+ │ # 2. Check cache (have we fetched this before?) │
46
+ │ # 3. Parallel fetch via Rust async backend │
47
+ │ # 4. Stream to Parquet cache │
48
+ │ # 5. Read back and return DataFrame │
49
+ └─────────────────────────────────────────────────────────────────────────────┘
50
+
51
+ KEY CONFIG OPTIONS:
52
+ ────────────────────────────────────────────────────────────────────────────────
53
+ - schema: Required for type-aware encoding (especially Types.Any)
54
+ - mongo_uri: Required for accelerated execution (workers create connections)
55
+ - cache_dir: Where to store Parquet cache (default: .xlr8_cache)
56
+
57
+ PER-QUERY OPTIONS (via to_dataframe):
58
+ ────────────────────────────────────────────────────────────────────────────────
59
+ - max_workers: Number of parallel workers (default: 4)
60
+ - flush_ram_limit_mb: RAM budget for batch sizing (default: 512)
61
+ - chunking_granularity: Time chunk size (e.g., timedelta(days=7))
62
+
63
+ ================================================================================
64
+ """
65
+
66
+ from typing import Any, Callable, Dict, List, Optional, Union
67
+
68
+ from pymongo.collection import Collection as PyMongoCollection
69
+
70
+ from xlr8.collection.cursor import XLR8Cursor
71
+ from xlr8.schema import Schema
72
+
73
+
74
+ class XLR8Collection:
75
+ """
76
+ PyMongo-compatible collection wrapper with acceleration.
77
+
78
+ Drop-in replacement for pymongo.collection.Collection that transparently
79
+ accelerates analytical queries through parallel execution and caching.
80
+
81
+ All write operations (insert, update, delete) pass through to PyMongo.
82
+ Read operations (find, aggregate) can be accelerated if:
83
+ - Schema is provided
84
+ - Query has time-range predicates
85
+ - Query doesn't use complex operators ($nor, $where, etc.)
86
+
87
+ Example:
88
+ >>> import pymongo
89
+ >>> from xlr8 import XLR8Collection, Schema, Types
90
+ >>>
91
+ >>> # Create schema
92
+ >>> schema = Schema(
93
+ ... time_field="timestamp",
94
+ ... fields={
95
+ ... "timestamp": Types.Timestamp(),
96
+ ... "value": Types.Float(),
97
+ ... "sensor_id": Types.String(),
98
+ ... }
99
+ ... )
100
+ >>>
101
+ >>> # Wrap collection with mongo_uri for accelerated execution
102
+ >>> client = pymongo.MongoClient("mongodb://localhost:27017")
103
+ >>> pymongo_col = client.mydb.mycollection
104
+ >>> col = XLR8Collection(pymongo_col, schema=schema, mongo_uri="mongodb://localhost:27017")
105
+ >>>
106
+ >>> # Use like regular PyMongo
107
+ >>> cursor = col.find({"timestamp": {"$gte": start, "$lt": end}})
108
+ >>> df = cursor.to_dataframe(flush_ram_limit_mb=2000)
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ pymongo_collection,
114
+ schema: Optional[Schema] = None,
115
+ mongo_uri: Union[str, Callable[[], str], None] = None,
116
+ cache_dir: Optional[str] = None,
117
+ enable_cache: bool = True,
118
+ metadata_cardinality: int = 1,
119
+ approx_document_size_bytes: int = 500,
120
+ ):
121
+ """
122
+ Initialize XLR8 collection wrapper.
123
+
124
+ Args:
125
+ pymongo_collection: PyMongo Collection instance
126
+ schema: Optional schema definition for acceleration
127
+ mongo_uri: MongoDB connection string (str) or callable that returns one.
128
+ Required for accelerated execution. Can be:
129
+ - A string: "mongodb://localhost:27017"
130
+ - A callable: lambda: os.environ["MONGODB_URI"]
131
+ cache_dir: Directory for Parquet cache (default: ./.xlr8_cache)
132
+ enable_cache: Enable Parquet caching
133
+ metadata_cardinality: Number of unique metadata combinations
134
+ (e.g., sensor count)
135
+ approx_document_size_bytes: Approximate size of each document in bytes
136
+ (default: 500)
137
+
138
+ Note:
139
+ flush_ram_limit_mb and max_workers are parameters of to_dataframe(),
140
+ to_polars(), etc. for per-query control.
141
+ """
142
+ self._pymongo_collection = pymongo_collection
143
+ self._schema = schema
144
+ self._mongo_uri = mongo_uri
145
+ self._cache_dir = cache_dir or "./.xlr8_cache"
146
+ self._enable_cache = enable_cache
147
+ self._metadata_cardinality = metadata_cardinality
148
+ self._approx_document_size_bytes = approx_document_size_bytes
149
+
150
+ def raw_collection(self):
151
+ """
152
+ Get direct access to underlying PyMongo collection.
153
+
154
+ This is an escape hatch for power users who need direct access to PyMongo
155
+ collection methods that may not be available through delegation.
156
+
157
+ Returns:
158
+ pymongo.collection.Collection: The underlying PyMongo collection
159
+
160
+ Example:
161
+ >>> xlr8_col = accelerate(collection, schema=schema)
162
+ >>> xlr8_col.raw_collection().watch() # Use MongoDB change streams
163
+ >>> xlr8_col.raw_collection().list_indexes() # Direct PyMongo access
164
+ """
165
+ return self._pymongo_collection
166
+
167
+ # PyMongo pass-through properties
168
+
169
+ @property
170
+ def name(self) -> str:
171
+ """Collection name."""
172
+ return self._pymongo_collection.name
173
+
174
+ @property
175
+ def full_name(self) -> str:
176
+ """Full collection name (database.collection)."""
177
+ return self._pymongo_collection.full_name
178
+
179
+ @property
180
+ def database(self):
181
+ """Parent database."""
182
+ return self._pymongo_collection.database
183
+
184
+ # Public accessor properties for cursor usage
185
+
186
+ @property
187
+ def schema(self):
188
+ """Schema definition for acceleration."""
189
+ return self._schema
190
+
191
+ @property
192
+ def pymongo_collection(self):
193
+ """Underlying PyMongo collection instance."""
194
+ return self._pymongo_collection
195
+
196
+ @property
197
+ def mongo_uri(self):
198
+ """MongoDB connection URI for accelerated execution."""
199
+ return self._mongo_uri
200
+
201
+ @property
202
+ def approx_document_size_bytes(self) -> int:
203
+ """Approximate size of each document in bytes."""
204
+ return self._approx_document_size_bytes
205
+
206
+ def __getattr__(self, name: str):
207
+ """
208
+ Delegate unknown methods to PyMongo collection.
209
+
210
+ Why:
211
+ Provides full PyMongo compatibility without manually implementing
212
+ every collection method (insert, update, delete, indexes, etc.).
213
+
214
+ Example:
215
+ >>> xlr8_col.insert_one({...}) # Works via delegation
216
+ >>> xlr8_col.create_index("timestamp") # Works via delegation
217
+ >>> count = xlr8_col.count_documents({}) # Works via delegation
218
+ """
219
+ return getattr(self._pymongo_collection, name)
220
+
221
+ # Read operations (can be accelerated)
222
+
223
+ def find(
224
+ self,
225
+ filter: Optional[Dict[str, Any]] = None,
226
+ projection: Optional[Dict[str, Any]] = None,
227
+ skip: int = 0,
228
+ limit: int = 0,
229
+ sort: Optional[List[tuple]] = None,
230
+ batch_size: int = 1000,
231
+ **kwargs,
232
+ ) -> XLR8Cursor:
233
+ """
234
+ Query collection with optional acceleration.
235
+
236
+ Returns XLR8Cursor which is PyMongo-compatible but can accelerate
237
+ to_dataframe() / to_polars() conversions.
238
+
239
+ ┌─────────────────────────────────────────────────────────────────────┐
240
+ │ DATA FLOW EXAMPLE: │
241
+ │ │
242
+ │ INPUT (filter parameter): │
243
+ │ { │
244
+ │ "$or": [ │
245
+ │ {"metadata.sensor_id": ObjectId("64a...")}, │
246
+ │ {"metadata.sensor_id": ObjectId("64b...")}, │
247
+ │ ], │
248
+ │ "timestamp": {"$gte": datetime(2024,1,1), "$lt": datetime(...)} │
249
+ │ } │
250
+ │ │
251
+ │ OUTPUT: XLR8Cursor object containing: │
252
+ │ - _filter: The query dict (unchanged) │
253
+ │ - _collection: Reference back to this XLR8Collection │
254
+ │ - _projection, _skip, _limit, _sort: Query modifiers │
255
+ │ │
256
+ │ NEXT STEP: User calls cursor.to_dataframe() which triggers: │
257
+ │ 1. Query analysis in analysis/brackets.py │
258
+ │ 2. Execution planning in execution/planner.py │
259
+ │ 3. Parallel fetch in execution/worker.py │
260
+ └─────────────────────────────────────────────────────────────────────┘
261
+
262
+ Args:
263
+ filter: Query filter dict
264
+ projection: Field projection dict
265
+ skip: Number of documents to skip
266
+ limit: Maximum documents to return
267
+ sort: Sort specification
268
+ batch_size: Batch size for iteration
269
+ **kwargs: Additional PyMongo cursor options
270
+
271
+ Returns:
272
+ XLR8Cursor instance
273
+
274
+ Example:
275
+ >>> # Simple query
276
+ >>> cursor = col.find({"status": "active"})
277
+ >>>
278
+ >>> # Query with time range (accelerated)
279
+ >>> cursor = col.find({
280
+ ... "timestamp": {"$gte": start, "$lt": end},
281
+ ... "sensor_id": "sensor_1"
282
+ ... })
283
+ >>> df = cursor.to_dataframe()
284
+ """
285
+ if filter is None:
286
+ filter = {}
287
+
288
+ return XLR8Cursor(
289
+ collection=self,
290
+ query_filter=filter,
291
+ projection=projection,
292
+ skip=skip,
293
+ limit=limit,
294
+ sort=sort,
295
+ batch_size=batch_size,
296
+ )
297
+
298
+ def find_one(
299
+ self,
300
+ filter: Optional[Dict[str, Any]] = None,
301
+ projection: Optional[Dict[str, Any]] = None,
302
+ **kwargs,
303
+ ) -> Optional[Dict[str, Any]]:
304
+ """
305
+ Get single document.
306
+
307
+ Pass-through to PyMongo (no acceleration).
308
+
309
+ Args:
310
+ filter: Query filter
311
+ projection: Field projection
312
+ **kwargs: Additional options
313
+
314
+ Returns:
315
+ Document dict or None
316
+ """
317
+ return self._pymongo_collection.find_one(
318
+ filter=filter, projection=projection, **kwargs
319
+ )
320
+
321
+ def count_documents(self, filter: Dict[str, Any], **kwargs) -> int:
322
+ """
323
+ Count matching documents.
324
+
325
+ Pass-through to PyMongo (no acceleration).
326
+
327
+ Args:
328
+ filter: Query filter
329
+ **kwargs: Additional options
330
+
331
+ Returns:
332
+ Document count
333
+ """
334
+ return self._pymongo_collection.count_documents(filter, **kwargs)
335
+
336
+ def estimated_document_count(self, **kwargs) -> int:
337
+ """
338
+ Get estimated total document count.
339
+
340
+ Pass-through to PyMongo.
341
+
342
+ Returns:
343
+ Estimated count
344
+ """
345
+ return self._pymongo_collection.estimated_document_count(**kwargs)
346
+
347
+ def distinct(
348
+ self, key: str, filter: Optional[Dict[str, Any]] = None, **kwargs
349
+ ) -> List[Any]:
350
+ """
351
+ Get distinct values.
352
+
353
+ Pass-through to PyMongo (no acceleration).
354
+
355
+ Args:
356
+ key: Field name
357
+ filter: Query filter
358
+ **kwargs: Additional options
359
+
360
+ Returns:
361
+ List of distinct values
362
+ """
363
+ return self._pymongo_collection.distinct(key, filter=filter, **kwargs)
364
+
365
+ def aggregate(self, pipeline: List[Dict[str, Any]], **kwargs):
366
+ """
367
+ Run aggregation pipeline.
368
+
369
+ Pass-through to PyMongo (no acceleration for now).
370
+
371
+ TODO: Implement acceleration for time-range aggregations.
372
+
373
+ Args:
374
+ pipeline: Aggregation pipeline
375
+ **kwargs: Additional options
376
+
377
+ Returns:
378
+ PyMongo CommandCursor
379
+ """
380
+ return self._pymongo_collection.aggregate(pipeline, **kwargs)
381
+
382
+ # Write operations (pass-through to PyMongo)
383
+
384
+ def insert_one(self, document: Dict[str, Any], **kwargs):
385
+ """
386
+ Insert single document.
387
+
388
+ Pass-through to PyMongo.
389
+
390
+ Args:
391
+ document: Document to insert
392
+ **kwargs: Additional options
393
+
394
+ Returns:
395
+ InsertOneResult
396
+ """
397
+ return self._pymongo_collection.insert_one(document, **kwargs)
398
+
399
+ def insert_many(self, documents: List[Dict[str, Any]], **kwargs):
400
+ """
401
+ Insert multiple documents.
402
+
403
+ Pass-through to PyMongo.
404
+
405
+ Args:
406
+ documents: Documents to insert
407
+ **kwargs: Additional options
408
+
409
+ Returns:
410
+ InsertManyResult
411
+ """
412
+ return self._pymongo_collection.insert_many(documents, **kwargs)
413
+
414
+ def update_one(self, filter: Dict[str, Any], update: Dict[str, Any], **kwargs):
415
+ """
416
+ Update single document.
417
+
418
+ Pass-through to PyMongo.
419
+
420
+ Args:
421
+ filter: Query filter
422
+ update: Update operations
423
+ **kwargs: Additional options
424
+
425
+ Returns:
426
+ UpdateResult
427
+ """
428
+ return self._pymongo_collection.update_one(filter, update, **kwargs)
429
+
430
+ def update_many(self, filter: Dict[str, Any], update: Dict[str, Any], **kwargs):
431
+ """
432
+ Update multiple documents.
433
+
434
+ Pass-through to PyMongo.
435
+
436
+ Args:
437
+ filter: Query filter
438
+ update: Update operations
439
+ **kwargs: Additional options
440
+
441
+ Returns:
442
+ UpdateResult
443
+ """
444
+ return self._pymongo_collection.update_many(filter, update, **kwargs)
445
+
446
+ def replace_one(
447
+ self, filter: Dict[str, Any], replacement: Dict[str, Any], **kwargs
448
+ ):
449
+ """
450
+ Replace single document.
451
+
452
+ Pass-through to PyMongo.
453
+
454
+ Args:
455
+ filter: Query filter
456
+ replacement: Replacement document
457
+ **kwargs: Additional options
458
+
459
+ Returns:
460
+ UpdateResult
461
+ """
462
+ return self._pymongo_collection.replace_one(filter, replacement, **kwargs)
463
+
464
+ def delete_one(self, filter: Dict[str, Any], **kwargs):
465
+ """
466
+ Delete single document.
467
+
468
+ Pass-through to PyMongo.
469
+
470
+ Args:
471
+ filter: Query filter
472
+ **kwargs: Additional options
473
+
474
+ Returns:
475
+ DeleteResult
476
+ """
477
+ return self._pymongo_collection.delete_one(filter, **kwargs)
478
+
479
+ def delete_many(self, filter: Dict[str, Any], **kwargs):
480
+ """
481
+ Delete multiple documents.
482
+
483
+ Pass-through to PyMongo.
484
+
485
+ Args:
486
+ filter: Query filter
487
+ **kwargs: Additional options
488
+
489
+ Returns:
490
+ DeleteResult
491
+ """
492
+ return self._pymongo_collection.delete_many(filter, **kwargs)
493
+
494
+ # Index operations (pass-through)
495
+
496
+ def create_index(self, keys, **kwargs):
497
+ """Create index. Pass-through to PyMongo."""
498
+ return self._pymongo_collection.create_index(keys, **kwargs)
499
+
500
+ def create_indexes(self, indexes, **kwargs):
501
+ """Create multiple indexes. Pass-through to PyMongo."""
502
+ return self._pymongo_collection.create_indexes(indexes, **kwargs)
503
+
504
+ def drop_index(self, index_or_name, **kwargs):
505
+ """Drop index. Pass-through to PyMongo."""
506
+ return self._pymongo_collection.drop_index(index_or_name, **kwargs)
507
+
508
+ def drop_indexes(self, **kwargs):
509
+ """Drop all indexes. Pass-through to PyMongo."""
510
+ return self._pymongo_collection.drop_indexes(**kwargs)
511
+
512
+ def list_indexes(self, **kwargs):
513
+ """List indexes. Pass-through to PyMongo."""
514
+ return self._pymongo_collection.list_indexes(**kwargs)
515
+
516
+ def index_information(self, **kwargs):
517
+ """Get index information. Pass-through to PyMongo."""
518
+ return self._pymongo_collection.index_information(**kwargs)
519
+
520
+ # Collection operations
521
+
522
+ def drop(self, **kwargs):
523
+ """Drop collection. Pass-through to PyMongo."""
524
+ return self._pymongo_collection.drop(**kwargs)
525
+
526
+ def rename(self, new_name: str, **kwargs):
527
+ """Rename collection. Pass-through to PyMongo."""
528
+ return self._pymongo_collection.rename(new_name, **kwargs)
529
+
530
+ # XLR8-specific methods
531
+
532
+ def set_schema(self, schema: Schema) -> None:
533
+ """
534
+ Set or update schema for acceleration.
535
+
536
+ Args:
537
+ schema: Schema definition
538
+ """
539
+ self._schema = schema
540
+
541
+ def get_schema(self) -> Optional[Schema]:
542
+ """
543
+ Get current schema.
544
+
545
+ Returns:
546
+ Schema or None
547
+ """
548
+ return self._schema
549
+
550
+ def clear_cache(self) -> None:
551
+ """
552
+ Clear Parquet cache for this collection.
553
+
554
+ TODO: Implement in storage layer.
555
+ """
556
+ # Will be implemented in storage milestone
557
+ pass
558
+
559
+
560
+ def accelerate(
561
+ pymongo_collection: PyMongoCollection,
562
+ schema: Schema,
563
+ mongo_uri: Union[str, Callable[[], str]],
564
+ cache_dir: Optional[str] = None,
565
+ enable_cache: bool = True,
566
+ metadata_cardinality: int = 1,
567
+ approx_document_size_bytes: int = 500,
568
+ ) -> XLR8Collection:
569
+ """
570
+ Convenience function to wrap a PyMongo collection with acceleration.
571
+
572
+ ┌─────────────────────────────────────────────────────────────────────────┐
573
+ │ DATA FLOW EXAMPLE - MAIN ENTRY POINT: │
574
+ │ │
575
+ │ INPUT: │
576
+ │ - pymongo_collection: client["main"]["sensorLogs"] │
577
+ │ - schema: Schema(time_field="timestamp", fields={...}) │
578
+ │ - mongo_uri: Connection string used by accelerated workers │
579
+ │ │
580
+ │ Example: │
581
+ │ accelerate( │
582
+ │ collection, │
583
+ │ schema, │
584
+ │ mongo_uri="mongodb://localhost:27017", # Or callable │
585
+ │ ) │
586
+ │ │
587
+ │ OUTPUT: XLR8Collection wrapper that: │
588
+ │ - Wraps pymongo collection for transparent pass-through │
589
+ │ - Stores schema for type-aware Parquet encoding │
590
+ │ - Stores mongo_uri for workers to create their own connections │
591
+ │ │
592
+ │ WHAT HAPPENS NEXT: │
593
+ │ 1. User calls: xlr8_col.find({...}) │
594
+ │ 2. Returns XLR8Cursor (wraps query params) │
595
+ │ 3. User calls: cursor.to_dataframe() │
596
+ │ 4. Workers use mongo_uri to create their own connections │
597
+ └─────────────────────────────────────────────────────────────────────────┘
598
+
599
+ Args:
600
+ pymongo_collection: PyMongo Collection instance
601
+ schema: Schema definition
602
+ mongo_uri: MongoDB connection string (str) or callable that returns one.
603
+ Required for accelerated execution. Can be:
604
+ - A string: "mongodb://localhost:27017"
605
+ - A callable: lambda: os.environ["MONGODB_URI"]
606
+ cache_dir: Cache directory (default: .xlr8_cache)
607
+ enable_cache: Enable caching
608
+ metadata_cardinality: Number of unique metadata combinations
609
+ (e.g., number of sensors)
610
+ approx_document_size_bytes: Approximate size of each document in bytes
611
+ (default: 500)
612
+
613
+ Returns:
614
+ XLR8Collection wrapper
615
+
616
+ Note:
617
+ flush_ram_limit_mb and max_workers are parameters of to_dataframe(),
618
+ to_polars(), etc. for per-query control.
619
+
620
+ Example:
621
+ >>> import pymongo
622
+ >>> from xlr8 import accelerate, Schema, Types
623
+ >>>
624
+ >>> # Connection string or callable
625
+ >>> MONGO_URI = "mongodb://localhost:27017"
626
+ >>> # OR: get_uri = lambda: os.environ["MONGODB_URI"]
627
+ >>>
628
+ >>> client = pymongo.MongoClient(MONGO_URI)
629
+ >>> col = client.mydb.sensor_logs
630
+ >>>
631
+ >>> schema = Schema(
632
+ ... time_field="timestamp",
633
+ ... fields={
634
+ ... "timestamp": Types.Timestamp(),
635
+ ... "sensor_id": Types.String(),
636
+ ... "value": Types.Float(),
637
+ ... },
638
+ ... )
639
+ >>>
640
+ >>> # Pass mongo_uri for accelerated workers
641
+ >>> accelerated_col = accelerate(col, schema, mongo_uri=MONGO_URI)
642
+ >>>
643
+ >>> # max_workers and flush_ram_limit_mb are per-query
644
+ >>> from datetime import timedelta
645
+ >>> df = accelerated_col.find({
646
+ ... "timestamp": {"$gte": start, "$lt": end}
647
+ ... }).to_dataframe(
648
+ ... max_workers=8,
649
+ ... chunking_granularity=timedelta(days=1),
650
+ ... flush_ram_limit_mb=2000,
651
+ ... )
652
+ """
653
+ return XLR8Collection(
654
+ pymongo_collection=pymongo_collection,
655
+ schema=schema,
656
+ mongo_uri=mongo_uri,
657
+ cache_dir=cache_dir,
658
+ enable_cache=enable_cache,
659
+ metadata_cardinality=metadata_cardinality,
660
+ approx_document_size_bytes=approx_document_size_bytes,
661
+ )