xlr8 0.1.7b3__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ """Type stubs for XLR8 Cursor.
2
+
3
+ Provides IDE autocomplete for all PyMongo cursor methods plus XLR8 extensions.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from datetime import date, datetime, timedelta
9
+ from typing import (
10
+ Any,
11
+ Callable,
12
+ Dict,
13
+ Generator,
14
+ Generic,
15
+ List,
16
+ Literal,
17
+ Mapping,
18
+ Optional,
19
+ Tuple,
20
+ TypeVar,
21
+ Union,
22
+ overload,
23
+ )
24
+
25
+ import pandas as pd
26
+ import polars as pl
27
+ import pyarrow as pa
28
+ from bson.code import Code
29
+ from pymongo.cursor_shared import _Hint, _Sort
30
+ from pymongo.synchronous.client_session import ClientSession
31
+ from pymongo.synchronous.collection import Collection
32
+ from pymongo.synchronous.cursor import Cursor as PyMongoCursor
33
+ from pymongo.typings import _CollationIn
34
+
35
+ _DocumentType = TypeVar("_DocumentType", bound=Mapping[str, Any])
36
+
37
+ class XLR8Cursor(Generic[_DocumentType]):
38
+ """PyMongo-compatible cursor with optional acceleration.
39
+
40
+ All PyMongo cursor methods work via delegation.
41
+ Adds 4 XLR8-specific methods for DataFrame conversion.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ collection: Any, # XLR8Collection
47
+ query_filter: Dict[str, Any],
48
+ projection: Optional[Dict[str, Any]] = None,
49
+ skip: int = 0,
50
+ limit: int = 0,
51
+ sort: Optional[List[Tuple[str, int]]] = None,
52
+ batch_size: int = 1000,
53
+ # PyMongo compatibility parameters (passed through to PyMongo cursor)
54
+ no_cursor_timeout: bool = False,
55
+ cursor_type: int = ...,
56
+ allow_partial_results: bool = False,
57
+ oplog_replay: bool = False,
58
+ collation: Optional[Dict[str, Any]] = None,
59
+ hint: Optional[Any] = None,
60
+ max_scan: Optional[int] = None,
61
+ max_time_ms: Optional[int] = None,
62
+ max: Optional[List[Tuple[str, Any]]] = None,
63
+ min: Optional[List[Tuple[str, Any]]] = None,
64
+ return_key: Optional[bool] = None,
65
+ show_record_id: Optional[bool] = None,
66
+ snapshot: Optional[bool] = None,
67
+ comment: Optional[Any] = None,
68
+ session: Optional[Any] = None,
69
+ allow_disk_use: Optional[bool] = None,
70
+ let: Optional[Dict[str, Any]] = None,
71
+ **kwargs: Any,
72
+ ) -> None: ...
73
+ @property
74
+ def collection(self) -> Collection[_DocumentType]: ...
75
+ @property
76
+ def retrieved(self) -> int: ...
77
+ def clone(self) -> XLR8Cursor[_DocumentType]: ...
78
+ def add_option(self, mask: int) -> XLR8Cursor[_DocumentType]: ...
79
+ def remove_option(self, mask: int) -> XLR8Cursor[_DocumentType]: ...
80
+ def allow_disk_use(self, allow_disk_use: bool) -> XLR8Cursor[_DocumentType]: ...
81
+ def limit(self, limit: int) -> XLR8Cursor[_DocumentType]: ...
82
+ def batch_size(self, batch_size: int) -> XLR8Cursor[_DocumentType]: ...
83
+ def skip(self, skip: int) -> XLR8Cursor[_DocumentType]: ...
84
+ def max_time_ms(self, max_time_ms: int | None) -> XLR8Cursor[_DocumentType]: ...
85
+ def max_await_time_ms(
86
+ self, max_await_time_ms: int | None
87
+ ) -> XLR8Cursor[_DocumentType]: ...
88
+ @overload
89
+ def __getitem__(self, index: int) -> _DocumentType: ...
90
+ @overload
91
+ def __getitem__(self, index: slice) -> XLR8Cursor[_DocumentType]: ...
92
+ def max_scan(self, max_scan: int | None) -> XLR8Cursor[_DocumentType]: ...
93
+ def max(self, spec: _Sort) -> XLR8Cursor[_DocumentType]: ...
94
+ def min(self, spec: _Sort) -> XLR8Cursor[_DocumentType]: ...
95
+ def sort(
96
+ self, key_or_list: _Hint, direction: int | str | None = None
97
+ ) -> XLR8Cursor[_DocumentType]: ...
98
+ def explain(self) -> _DocumentType: ...
99
+ def hint(self, index: _Hint | None) -> XLR8Cursor[_DocumentType]: ...
100
+ def comment(self, comment: Any) -> XLR8Cursor[_DocumentType]: ...
101
+ def where(self, code: str | Code) -> XLR8Cursor[_DocumentType]: ...
102
+ def collation(
103
+ self, collation: _CollationIn | None
104
+ ) -> XLR8Cursor[_DocumentType]: ...
105
+ @property
106
+ def alive(self) -> bool: ...
107
+ @property
108
+ def cursor_id(self) -> int | None: ...
109
+ @property
110
+ def address(self) -> tuple[str, Any] | None: ...
111
+ @property
112
+ def session(self) -> ClientSession | None: ...
113
+ def close(self) -> None: ...
114
+ def distinct(self, key: str) -> list[Any]: ...
115
+ def rewind(self) -> XLR8Cursor[_DocumentType]: ...
116
+ def next(self) -> _DocumentType: ...
117
+ def __next__(self) -> _DocumentType: ...
118
+ def __iter__(self) -> XLR8Cursor[_DocumentType]: ...
119
+ def __enter__(self) -> XLR8Cursor[_DocumentType]: ...
120
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: ...
121
+ def to_list(self, length: int | None = None) -> list[_DocumentType]: ...
122
+
123
+ # XLR8-specific accelerated methods
124
+ def to_dataframe(
125
+ self,
126
+ accelerate: bool = True,
127
+ cache_read: bool = True,
128
+ cache_write: bool = True,
129
+ start_date: Optional[Union[datetime, date, str]] = None,
130
+ end_date: Optional[Union[datetime, date, str]] = None,
131
+ coerce: Literal["raise", "error"] = "raise",
132
+ max_workers: int = 4,
133
+ chunking_granularity: Optional[timedelta] = None,
134
+ row_group_size: Optional[int] = None,
135
+ flush_ram_limit_mb: int = 512,
136
+ ) -> pd.DataFrame: ...
137
+ def to_polars(
138
+ self,
139
+ accelerate: bool = True,
140
+ cache_read: bool = True,
141
+ cache_write: bool = True,
142
+ start_date: Optional[Union[datetime, date, str]] = None,
143
+ end_date: Optional[Union[datetime, date, str]] = None,
144
+ coerce: Literal["raise", "error"] = "raise",
145
+ max_workers: int = 4,
146
+ chunking_granularity: Optional[timedelta] = None,
147
+ row_group_size: Optional[int] = None,
148
+ any_type_strategy: Literal["float", "string", "keep_struct"] = "float",
149
+ flush_ram_limit_mb: int = 512,
150
+ ) -> pl.DataFrame: ...
151
+ def to_dataframe_batches(
152
+ self,
153
+ batch_size: int = 10000,
154
+ cache_read: bool = True,
155
+ cache_write: bool = True,
156
+ start_date: Optional[Union[datetime, date, str]] = None,
157
+ end_date: Optional[Union[datetime, date, str]] = None,
158
+ coerce: Literal["raise", "error"] = "raise",
159
+ max_workers: int = 4,
160
+ chunking_granularity: Optional[timedelta] = None,
161
+ row_group_size: Optional[int] = None,
162
+ flush_ram_limit_mb: int = 512,
163
+ ) -> Generator[pd.DataFrame, None, None]: ...
164
+ def stream_to_callback(
165
+ self,
166
+ callback: Callable[[pa.Table, Dict[str, Any]], None],
167
+ *,
168
+ partition_time_delta: timedelta,
169
+ partition_by: Optional[Union[str, List[str]]] = None,
170
+ any_type_strategy: Literal["float", "string", "keep_struct"] = "float",
171
+ max_workers: int = 4,
172
+ chunking_granularity: Optional[timedelta] = None,
173
+ row_group_size: Optional[int] = None,
174
+ flush_ram_limit_mb: int = 512,
175
+ cache_read: bool = True,
176
+ cache_write: bool = True,
177
+ ) -> Dict[str, Any]: ...
178
+ def raw_cursor(self) -> PyMongoCursor[_DocumentType]: ...
179
+ def explain_acceleration(self) -> Dict[str, Any]: ...
@@ -0,0 +1,400 @@
1
+ """
2
+ XLR8 collection wrapper with PyMongo compatibility.
3
+
4
+ ================================================================================
5
+ DATA FLOW - COLLECTION WRAPPER
6
+ ================================================================================
7
+
8
+ This module wraps pymongo.collection.Collection to provide the `accelerate()`
9
+ function - the main entry point for users.
10
+
11
+ TYPICAL USAGE FLOW:
12
+ ────────────────────────────────────────────────────────────────────────────────
13
+
14
+ 1. USER WRAPS A COLLECTION:
15
+ ┌─────────────────────────────────────────────────────────────────────────────┐
16
+ │ from xlr8 import accelerate, Schema, Types │
17
+ │ │
18
+ │ schema = Schema( │
19
+ │ time_field="timestamp", │
20
+ │ fields={ │
21
+ │ "timestamp": Types.Timestamp("ms", tz="UTC"), │
22
+ │ "metadata.device_id": Types.ObjectId(), │
23
+ │ "metadata.sensor_id": Types.ObjectId(), │
24
+ │ "value": Types.Any(), # Polymorphic - can be int, float, str etc..│
25
+ │ } │
26
+ │ ) │
27
+ │ │
28
+ │ xlr8_col = accelerate(pymongo_collection, schema, mongo_uri) │
29
+ └─────────────────────────────────────────────────────────────────────────────┘
30
+
31
+ 2. USER CALLS find() - RETURNS XLR8Cursor (NOT PYMONGO CURSOR):
32
+ ┌─────────────────────────────────────────────────────────────────────────────┐
33
+ │ cursor = xlr8_col.find({ │
34
+ │ "timestamp": {"$gte": start, "$lt": end}, │
35
+ │ "metadata.device_id": ObjectId("64a..."), │
36
+ │ }) │
37
+ │ # cursor is XLR8Cursor, wrapping the query params │
38
+ └─────────────────────────────────────────────────────────────────────────────┘
39
+
40
+ 3. USER CALLS to_dataframe() - TRIGGERS ACCELERATION:
41
+ ┌─────────────────────────────────────────────────────────────────────────────┐
42
+ │ df = cursor.to_dataframe() │
43
+ │ # This triggers: │
44
+ │ # 1. Query analysis (can we chunk by time?) │
45
+ │ # 2. Check cache (have we fetched this before?) │
46
+ │ # 3. Parallel fetch via Rust async backend │
47
+ │ # 4. Stream to Parquet cache │
48
+ │ # 5. Read back and return DataFrame │
49
+ └─────────────────────────────────────────────────────────────────────────────┘
50
+
51
+ KEY CONFIG OPTIONS:
52
+ ────────────────────────────────────────────────────────────────────────────────
53
+ - schema: Required for type-aware encoding (especially Types.Any)
54
+ - mongo_uri: Required for accelerated execution (workers create connections)
55
+ - cache_dir: Where to store Parquet cache (default: .xlr8_cache)
56
+
57
+ PER-QUERY OPTIONS (via to_dataframe):
58
+ ────────────────────────────────────────────────────────────────────────────────
59
+ - max_workers: Number of parallel workers (default: 4)
60
+ - flush_ram_limit_mb: RAM budget for batch sizing (default: 512)
61
+ - chunking_granularity: Time chunk size (e.g., timedelta(days=7))
62
+
63
+ ================================================================================
64
+ """
65
+
66
+ from typing import Any, Callable, Dict, List, Optional, Union
67
+
68
+ from pymongo.collection import Collection as PyMongoCollection
69
+
70
+ from xlr8.collection.cursor import XLR8Cursor
71
+ from xlr8.schema import Schema
72
+
73
+
74
+ class XLR8Collection:
75
+ """
76
+ PyMongo-compatible collection wrapper with acceleration.
77
+
78
+ Drop-in replacement for pymongo.collection.Collection that transparently
79
+ accelerates analytical queries through parallel execution and caching.
80
+
81
+ All write operations (insert, update, delete) pass through to PyMongo.
82
+ Read operations (find, aggregate) can be accelerated if:
83
+ - Schema is provided
84
+ - Query has time-range predicates
85
+ - Query doesn't use complex operators ($nor, $where, etc.)
86
+
87
+ Example:
88
+ >>> import pymongo
89
+ >>> from xlr8 import XLR8Collection, Schema, Types
90
+ >>>
91
+ >>> # Create schema
92
+ >>> schema = Schema(
93
+ ... time_field="timestamp",
94
+ ... fields={
95
+ ... "timestamp": Types.Timestamp(),
96
+ ... "value": Types.Float(),
97
+ ... "sensor_id": Types.String(),
98
+ ... }
99
+ ... )
100
+ >>>
101
+ >>> # Wrap collection with mongo_uri for accelerated execution
102
+ >>> client = pymongo.MongoClient("mongodb://localhost:27017")
103
+ >>> pymongo_col = client.mydb.mycollection
104
+ >>> col = XLR8Collection(pymongo_col, schema=schema, mongo_uri="mongodb://localhost:27017")
105
+ >>>
106
+ >>> # Use like regular PyMongo
107
+ >>> cursor = col.find({"timestamp": {"$gte": start, "$lt": end}})
108
+ >>> df = cursor.to_dataframe(flush_ram_limit_mb=2000)
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ pymongo_collection,
114
+ schema: Optional[Schema] = None,
115
+ mongo_uri: Union[str, Callable[[], str], None] = None,
116
+ approx_document_size_bytes: int = 500,
117
+ ):
118
+ """
119
+ Initialize XLR8 collection wrapper.
120
+
121
+ Args:
122
+ pymongo_collection: PyMongo Collection instance
123
+ schema: Optional schema definition for acceleration
124
+ mongo_uri: MongoDB connection string (str) or callable that returns one.
125
+ Required for accelerated execution. Can be:
126
+ - A string: "mongodb://localhost:27017"
127
+ - A callable: lambda: os.environ["MONGODB_URI"]
128
+ approx_document_size_bytes: Approximate size of each document in bytes
129
+ (default: 500). Used for memory budget calculations.
130
+
131
+ Note:
132
+ Cache directory is auto-managed based on query hash.
133
+ flush_ram_limit_mb and max_workers are parameters of to_dataframe(),
134
+ to_polars(), etc. for per-query control.
135
+ """
136
+ self._pymongo_collection = pymongo_collection
137
+ self._schema = schema
138
+ self._mongo_uri = mongo_uri
139
+ self._approx_document_size_bytes = approx_document_size_bytes
140
+
141
+ def raw_collection(self) -> PyMongoCollection:
142
+ """
143
+ Get direct access to underlying PyMongo collection.
144
+
145
+ This is an escape hatch for power users who need direct access to PyMongo
146
+ collection methods that may not be available through delegation.
147
+
148
+ Returns:
149
+ pymongo.collection.Collection: The underlying PyMongo collection
150
+
151
+ Example:
152
+ >>> xlr8_col = accelerate(collection, schema=schema)
153
+ >>> xlr8_col.raw_collection().watch() # Use MongoDB change streams
154
+ >>> xlr8_col.raw_collection().list_indexes() # Direct PyMongo access
155
+ """
156
+ return self._pymongo_collection
157
+
158
+ # PyMongo pass-through properties
159
+ @property
160
+ def name(self) -> str:
161
+ """Collection name."""
162
+ return self._pymongo_collection.name
163
+
164
+ @property
165
+ def full_name(self) -> str:
166
+ """Full collection name (database.collection)."""
167
+ return self._pymongo_collection.full_name
168
+
169
+ @property
170
+ def database(self):
171
+ """Parent database."""
172
+ return self._pymongo_collection.database
173
+
174
+ # Public accessor properties for cursor usage
175
+
176
+ @property
177
+ def schema(self):
178
+ """Schema definition for acceleration."""
179
+ return self._schema
180
+
181
+ @property
182
+ def pymongo_collection(self):
183
+ """Underlying PyMongo collection instance."""
184
+ return self._pymongo_collection
185
+
186
+ @property
187
+ def mongo_uri(self):
188
+ """MongoDB connection URI for accelerated execution."""
189
+ return self._mongo_uri
190
+
191
+ @property
192
+ def approx_document_size_bytes(self) -> int:
193
+ """Approximate size of each document in bytes."""
194
+ return self._approx_document_size_bytes
195
+
196
+ def __getattr__(self, name: str):
197
+ """
198
+ Delegate unknown methods to PyMongo collection.
199
+
200
+ Why:
201
+ Provides full PyMongo compatibility without manually implementing
202
+ every collection method (insert, update, delete, indexes, etc.).
203
+
204
+ Example:
205
+ >>> xlr8_col.insert_one({...}) # Works via delegation
206
+ >>> xlr8_col.create_index("timestamp") # Works via delegation
207
+ >>> count = xlr8_col.count_documents({}) # Works via delegation
208
+ """
209
+ return getattr(self._pymongo_collection, name)
210
+
211
+ # Read operations (can be accelerated)
212
+ def find(
213
+ self,
214
+ filter: Optional[Dict[str, Any]] = None,
215
+ projection: Optional[Dict[str, Any]] = None,
216
+ skip: int = 0,
217
+ limit: int = 0,
218
+ sort: Optional[List[tuple]] = None,
219
+ batch_size: int = 1000,
220
+ **kwargs,
221
+ ) -> XLR8Cursor:
222
+ """
223
+ Query collection with optional acceleration.
224
+
225
+ Returns XLR8Cursor which is PyMongo-compatible but can accelerate
226
+ to_dataframe() / to_polars() conversions.
227
+
228
+
229
+ DATA FLOW EXAMPLE:
230
+
231
+ INPUT (filter parameter):
232
+ {
233
+ "$or": [
234
+ {"metadata.sensor_id": ObjectId("64a...")},
235
+ {"metadata.sensor_id": ObjectId("64b...")},
236
+ ],
237
+ "timestamp": {"$gte": datetime(2024,1,1), "$lt": datetime(...)}
238
+ }
239
+
240
+ OUTPUT: XLR8Cursor object containing:
241
+ - _filter: The query dict (unchanged)
242
+ - _collection: Reference back to this XLR8Collection
243
+ - _projection, _skip, _limit, _sort: Query modifiers
244
+
245
+ NEXT STEP: User calls cursor.to_dataframe() which triggers:
246
+ 1. Query analysis in analysis/brackets.py
247
+ 2. Execution planning in execution/planner.py
248
+ 3. Parallel fetch in execution/worker.py
249
+
250
+ Args:
251
+ filter: Query filter dict
252
+ projection: Field projection dict
253
+ skip: Number of documents to skip
254
+ limit: Maximum documents to return
255
+ sort: Sort specification
256
+ batch_size: Batch size for iteration
257
+ **kwargs: Additional PyMongo cursor options
258
+
259
+ Returns:
260
+ XLR8Cursor instance
261
+
262
+ Example:
263
+ >>> # Simple query
264
+ >>> cursor = col.find({"status": "active"})
265
+ >>>
266
+ >>> # Query with time range (accelerated)
267
+ >>> cursor = col.find({
268
+ ... "timestamp": {"$gte": start, "$lt": end},
269
+ ... "sensor_id": "sensor_1"
270
+ ... })
271
+ >>> df = cursor.to_dataframe()
272
+ """
273
+ if filter is None:
274
+ filter = {}
275
+
276
+ return XLR8Cursor(
277
+ collection=self,
278
+ query_filter=filter,
279
+ projection=projection,
280
+ skip=skip,
281
+ limit=limit,
282
+ sort=sort,
283
+ batch_size=batch_size,
284
+ **kwargs, # Pass through all PyMongo cursor options
285
+ )
286
+
287
+ # XLR8-specific methods
288
+
289
+ def set_schema(self, schema: Schema) -> None:
290
+ """
291
+ Set or update schema for acceleration.
292
+
293
+ Args:
294
+ schema: Schema definition
295
+ """
296
+ self._schema = schema
297
+
298
+ def get_schema(self) -> Optional[Schema]:
299
+ """
300
+ Get current schema.
301
+
302
+ Returns:
303
+ Schema or None
304
+ """
305
+ return self._schema
306
+
307
+
308
+ def accelerate(
309
+ pymongo_collection: PyMongoCollection,
310
+ schema: Schema,
311
+ mongo_uri: Union[str, Callable[[], str]],
312
+ approx_document_size_bytes: int = 500,
313
+ ) -> XLR8Collection:
314
+ """
315
+ Convenience function to wrap a PyMongo collection with acceleration.
316
+
317
+
318
+ DATA FLOW EXAMPLE - MAIN ENTRY POINT:
319
+
320
+ INPUT:
321
+ - pymongo_collection: client["main"]["sensorData"]
322
+ - schema: Schema(time_field="timestamp", fields={...})
323
+ - mongo_uri: Connection string used by accelerated workers
324
+
325
+ Example:
326
+ accelerate(
327
+ collection,
328
+ schema,
329
+ mongo_uri="mongodb://localhost:27017", # Or callable
330
+ )
331
+
332
+ OUTPUT: XLR8Collection wrapper that:
333
+ - Wraps pymongo collection for transparent pass-through
334
+ - Stores schema for type-aware Parquet encoding
335
+ - Stores mongo_uri for workers to create their own connections
336
+
337
+ WHAT HAPPENS NEXT:
338
+ 1. User calls: xlr8_col.find({...})
339
+ 2. Returns XLR8Cursor (wraps query params)
340
+ 3. User calls: cursor.to_dataframe()
341
+ 4. Workers use mongo_uri to create their own connections
342
+
343
+
344
+ Args:
345
+ pymongo_collection: PyMongo Collection instance
346
+ schema: Schema definition
347
+ mongo_uri: MongoDB connection string (str) or callable that returns one.
348
+ Required for accelerated execution. Can be:
349
+ - A string: "mongodb://localhost:27017"
350
+ - A callable: lambda: os.environ["MONGODB_URI"]
351
+ approx_document_size_bytes: Approximate size of each document in bytes
352
+ (default: 500). Used for memory budget calculations.
353
+
354
+ Returns:
355
+ XLR8Collection wrapper
356
+
357
+ Note:
358
+ Cache directory is auto-managed based on query hash.
359
+ flush_ram_limit_mb and max_workers are parameters of to_dataframe(),
360
+ to_polars(), etc. for per-query control.
361
+
362
+ Example:
363
+ >>> import pymongo
364
+ >>> from xlr8 import accelerate, Schema, Types
365
+ >>>
366
+ >>> # Connection string or callable
367
+ >>> MONGO_URI = "mongodb://localhost:27017"
368
+ >>> # OR: get_uri = lambda: os.environ["MONGODB_URI"]
369
+ >>>
370
+ >>> client = pymongo.MongoClient(MONGO_URI)
371
+ >>> col = client.mydb.sensor_logs
372
+ >>>
373
+ >>> schema = Schema(
374
+ ... time_field="timestamp",
375
+ ... fields={
376
+ ... "timestamp": Types.Timestamp(),
377
+ ... "sensor_id": Types.String(),
378
+ ... "value": Types.Float(),
379
+ ... },
380
+ ... )
381
+ >>>
382
+ >>> # Pass mongo_uri for accelerated workers
383
+ >>> accelerated_col = accelerate(col, schema, mongo_uri=MONGO_URI)
384
+ >>>
385
+ >>> # max_workers and flush_ram_limit_mb are per-query
386
+ >>> from datetime import timedelta
387
+ >>> df = accelerated_col.find({
388
+ ... "timestamp": {"$gte": start, "$lt": end}
389
+ ... }).to_dataframe(
390
+ ... max_workers=8,
391
+ ... chunking_granularity=timedelta(days=1),
392
+ ... flush_ram_limit_mb=2000,
393
+ ... )
394
+ """
395
+ return XLR8Collection(
396
+ pymongo_collection=pymongo_collection,
397
+ schema=schema,
398
+ mongo_uri=mongo_uri,
399
+ approx_document_size_bytes=approx_document_size_bytes,
400
+ )