sqlitesearch 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/PKG-INFO +1 -1
  2. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/pyproject.toml +2 -0
  3. sqlitesearch-0.0.2/sqlitesearch/__version__.py +1 -0
  4. sqlitesearch-0.0.2/sqlitesearch/operators.py +32 -0
  5. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/sqlitesearch/text/fts.py +191 -10
  6. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/sqlitesearch/vector/lsh.py +224 -22
  7. sqlitesearch-0.0.2/tests/test_range_filters.py +458 -0
  8. sqlitesearch-0.0.2/tests/test_vector_range_filters.py +333 -0
  9. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/tests/test_vector_search.py +2 -2
  10. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/uv.lock +159 -31
  11. sqlitesearch-0.0.1/sqlitesearch/__version__.py +0 -1
  12. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/.github/workflows/tests.yml +0 -0
  13. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/.gitignore +0 -0
  14. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/CLAUDE.md +0 -0
  15. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/Makefile +0 -0
  16. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/README.md +0 -0
  17. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/notebooks/faq_search.ipynb +0 -0
  18. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/notebooks/faq_search.py +0 -0
  19. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/plan.md +0 -0
  20. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/sqlitesearch/__init__.py +0 -0
  21. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/sqlitesearch/text/__init__.py +0 -0
  22. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/sqlitesearch/vector/__init__.py +0 -0
  23. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/tests/__init__.py +0 -0
  24. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/tests/test_integration.py +0 -0
  25. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/tests/test_performance.py +0 -0
  26. {sqlitesearch-0.0.1 → sqlitesearch-0.0.2}/tests/test_text_search.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sqlitesearch
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: A tiny, SQLite-backed search library for small, local projects
5
5
  Author: sqlitesearch contributors
6
6
  License: MIT
@@ -35,6 +35,8 @@ dev = [
35
35
  "ruff>=0.5.0",
36
36
  "requests",
37
37
  "minsearch",
38
+ "build>=1.4.0",
39
+ "twine>=6.2.0",
38
40
  ]
39
41
 
40
42
  [tool.hatch.build.targets.wheel]
@@ -0,0 +1 @@
1
+ __version__ = "0.0.2"
@@ -0,0 +1,32 @@
1
+ """
2
+ Operators for range filtering in sqlitesearch.
3
+
4
+ This module provides operator functions used for numeric and date range filtering.
5
+ """
6
+
7
+ # Operator mapping for range filters
8
+ # Each operator is a lambda that takes two values (a, b) and returns a boolean
9
+ OPERATORS = {
10
+ '>=': lambda a, b: a >= b,
11
+ '>': lambda a, b: a > b,
12
+ '<=': lambda a, b: a <= b,
13
+ '<': lambda a, b: a < b,
14
+ '==': lambda a, b: a == b,
15
+ '!=': lambda a, b: a != b,
16
+ }
17
+
18
+
19
+ def is_range_filter(value: object) -> bool:
20
+ """
21
+ Check if a value is a range filter (list of tuples).
22
+
23
+ Args:
24
+ value: The value to check.
25
+
26
+ Returns:
27
+ True if value is a list of (operator, value) tuples.
28
+ """
29
+ return (
30
+ isinstance(value, list)
31
+ and all(isinstance(v, tuple) and len(v) == 2 for v in value)
32
+ )
@@ -9,8 +9,11 @@ import json
9
9
  import re
10
10
  import sqlite3
11
11
  import threading
12
+ from datetime import date, datetime
12
13
  from typing import Any, Optional
13
14
 
15
+ from sqlitesearch.operators import OPERATORS, is_range_filter
16
+
14
17
 
15
18
  class TextSearchIndex:
16
19
  """
@@ -20,7 +23,7 @@ class TextSearchIndex:
20
23
  full-text search with BM25 ranking.
21
24
 
22
25
  API matches minsearch.Index for easy migration:
23
- - __init__(text_fields, keyword_fields=None, id_field=None)
26
+ - __init__(text_fields, keyword_fields=None, numeric_fields=None, date_fields=None, id_field=None)
24
27
  - fit(docs) - Index documents (only if index is empty)
25
28
  - add(doc) - Add a single document to existing index
26
29
  - search(query, filter_dict=None, boost_dict=None, num_results=10, output_ids=False)
@@ -29,17 +32,21 @@ class TextSearchIndex:
29
32
  >>> index = TextSearchIndex(
30
33
  ... text_fields=["title", "description"],
31
34
  ... keyword_fields=["category"],
35
+ ... numeric_fields=["price", "rating"],
36
+ ... date_fields=["created_at"],
32
37
  ... id_field="id",
33
38
  ... db_path="search.db"
34
39
  ... )
35
- >>> index.fit([{"id": 1, "title": "Hello", "description": "World"}])
36
- >>> results = index.search("hello world")
40
+ >>> index.fit([{"id": 1, "title": "Hello", "description": "World", "price": 100}])
41
+ >>> results = index.search("hello", filter_dict={"price": [('>=', 50), ('<', 200)]})
37
42
  """
38
43
 
39
44
  def __init__(
40
45
  self,
41
46
  text_fields: list[str],
42
47
  keyword_fields: Optional[list[str]] = None,
48
+ numeric_fields: Optional[list[str]] = None,
49
+ date_fields: Optional[list[str]] = None,
43
50
  id_field: Optional[str] = None,
44
51
  db_path: str = "sqlitesearch.db",
45
52
  stemming: bool = False,
@@ -50,12 +57,16 @@ class TextSearchIndex:
50
57
  Args:
51
58
  text_fields: List of field names to index with FTS5.
52
59
  keyword_fields: List of field names for exact filtering (not full-text searched).
60
+ numeric_fields: List of field names for numeric range filtering.
61
+ date_fields: List of field names for date range filtering.
53
62
  id_field: Field name to use as document ID. If None, auto-generates IDs.
54
63
  db_path: Path to the SQLite database file.
55
64
  stemming: If True, use Porter stemmer for better matching (e.g., "running" matches "run").
56
65
  """
57
66
  self.text_fields = text_fields
58
67
  self.keyword_fields = list(keyword_fields) if keyword_fields is not None else []
68
+ self.numeric_fields = list(numeric_fields) if numeric_fields is not None else []
69
+ self.date_fields = list(date_fields) if date_fields is not None else []
59
70
  self.id_field = id_field
60
71
  self.db_path = db_path
61
72
  self.stemming = stemming
@@ -85,11 +96,23 @@ class TextSearchIndex:
85
96
  keyword_cols.append(f', "{field}" TEXT')
86
97
  keyword_sql = "\n".join(keyword_cols)
87
98
 
99
+ # Build numeric column definitions
100
+ numeric_cols = []
101
+ for field in self.numeric_fields:
102
+ numeric_cols.append(f', "{field}" REAL')
103
+ numeric_sql = "\n".join(numeric_cols)
104
+
105
+ # Build date column definitions (store as ISO 8601 strings for comparison)
106
+ date_cols = []
107
+ for field in self.date_fields:
108
+ date_cols.append(f', "{field}" TEXT')
109
+ date_sql = "\n".join(date_cols)
110
+
88
111
  # Create main documents table
89
112
  cursor.execute(f"""
90
113
  CREATE TABLE IF NOT EXISTS docs (
91
114
  id INTEGER PRIMARY KEY AUTOINCREMENT,
92
- doc_json TEXT NOT NULL{keyword_sql}
115
+ doc_json TEXT NOT NULL{keyword_sql}{numeric_sql}{date_sql}
93
116
  )
94
117
  """)
95
118
 
@@ -114,6 +137,14 @@ class TextSearchIndex:
114
137
  for field in self.keyword_fields:
115
138
  cursor.execute(f'CREATE INDEX IF NOT EXISTS idx_{field} ON docs ("{field}")')
116
139
 
140
+ # Create indexes on numeric fields for faster filtering
141
+ for field in self.numeric_fields:
142
+ cursor.execute(f'CREATE INDEX IF NOT EXISTS idx_num_{field} ON docs ("{field}")')
143
+
144
+ # Create indexes on date fields for faster filtering
145
+ for field in self.date_fields:
146
+ cursor.execute(f'CREATE INDEX IF NOT EXISTS idx_date_{field} ON docs ("{field}")')
147
+
117
148
  conn.commit()
118
149
 
119
150
  def _is_empty(self) -> bool:
@@ -164,19 +195,44 @@ class TextSearchIndex:
164
195
  conn = self._get_conn()
165
196
  cursor = conn.cursor()
166
197
 
167
- # Build column lists
168
- all_cols = ["doc_json"] + [f'"{field}"' for field in self.keyword_fields]
198
+ # Build column lists including keyword, numeric, and date fields
199
+ filter_cols = (
200
+ [f'"{field}"' for field in self.keyword_fields] +
201
+ [f'"{field}"' for field in self.numeric_fields] +
202
+ [f'"{field}"' for field in self.date_fields]
203
+ )
204
+ all_cols = ["doc_json"] + filter_cols
169
205
  col_names = ", ".join(all_cols)
170
206
  placeholders = ", ".join(["?"] * len(all_cols))
171
207
 
172
208
  for doc in docs:
173
- doc_json = json.dumps(doc)
209
+ # Convert date/datetime objects to ISO format for JSON serialization
210
+ doc_for_json = {}
211
+ for key, value in doc.items():
212
+ if isinstance(value, (date, datetime)):
213
+ doc_for_json[key] = value.isoformat()
214
+ else:
215
+ doc_for_json[key] = value
216
+ doc_json = json.dumps(doc_for_json)
217
+
174
218
  keyword_vals = [doc.get(field) for field in self.keyword_fields]
175
219
 
220
+ # Extract numeric values
221
+ numeric_vals = [doc.get(field) for field in self.numeric_fields]
222
+
223
+ # Extract date values and convert to ISO format
224
+ date_vals = []
225
+ for field in self.date_fields:
226
+ value = doc.get(field)
227
+ if isinstance(value, (date, datetime)):
228
+ date_vals.append(value.isoformat())
229
+ else:
230
+ date_vals.append(value)
231
+
176
232
  # Insert into main table
177
233
  cursor.execute(
178
234
  f"INSERT INTO docs ({col_names}) VALUES ({placeholders})",
179
- [doc_json] + keyword_vals
235
+ [doc_json] + keyword_vals + numeric_vals + date_vals
180
236
  )
181
237
  doc_id = cursor.lastrowid
182
238
 
@@ -221,7 +277,12 @@ class TextSearchIndex:
221
277
 
222
278
  Args:
223
279
  query: The search query string. Supports FTS5 query syntax.
224
- filter_dict: Dictionary of keyword fields to filter by.
280
+ filter_dict: Dictionary of filters. Can include:
281
+ - Keyword fields: {"field": "value"} for exact match
282
+ - Numeric fields: {"field": [('>=', 100), ('<', 200)]} for range filters
283
+ - Numeric fields: {"field": 100} for exact match
284
+ - Date fields: {"field": [('>=', date(...)), ('<', date(...))]} for range filters
285
+ - Any field: {"field": None} for null/missing values
225
286
  boost_dict: Dictionary of boost scores for text fields.
226
287
  num_results: Maximum number of results to return.
227
288
  output_ids: If True, adds an 'id' field with the document ID.
@@ -244,18 +305,31 @@ class TextSearchIndex:
244
305
  # Build FTS5 query with boosts
245
306
  fts_query = self._build_fts_query(query, boost_dict)
246
307
 
247
- # Build WHERE clause for keyword filters
308
+ # Build WHERE clause for filters (keyword, numeric, date)
248
309
  where_clauses = []
249
310
  where_params = []
250
311
 
251
312
  for field, value in filter_dict.items():
252
313
  if field in self.keyword_fields:
314
+ # Keyword field filters (exact match)
253
315
  if value is None:
254
316
  where_clauses.append(f'd."{field}" IS NULL')
255
317
  else:
256
318
  where_clauses.append(f'd."{field}" = ?')
257
319
  where_params.append(value)
258
320
 
321
+ elif field in self.numeric_fields:
322
+ # Numeric field filters (exact match or range)
323
+ where_clauses, where_params = self._add_numeric_filter(
324
+ where_clauses, where_params, field, value
325
+ )
326
+
327
+ elif field in self.date_fields:
328
+ # Date field filters (exact match or range)
329
+ where_clauses, where_params = self._add_date_filter(
330
+ where_clauses, where_params, field, value
331
+ )
332
+
259
333
  where_sql = " AND " + " AND ".join(where_clauses) if where_clauses else ""
260
334
 
261
335
  # Execute search query - simpler without content table
@@ -277,6 +351,8 @@ class TextSearchIndex:
277
351
  results = []
278
352
  for row in rows:
279
353
  doc = json.loads(row["doc_json"])
354
+ # Convert ISO date strings back to date/datetime objects
355
+ doc = self._convert_dates(doc)
280
356
  if output_ids:
281
357
  # Use id_field value if available, otherwise use database id
282
358
  if self.id_field:
@@ -291,6 +367,111 @@ class TextSearchIndex:
291
367
 
292
368
  return results
293
369
 
370
+ def _convert_dates(self, doc: dict[str, Any]) -> dict[str, Any]:
371
+ """
372
+ Convert ISO date strings back to date/datetime objects for date_fields.
373
+
374
+ Args:
375
+ doc: Document with potentially ISO formatted date strings.
376
+
377
+ Returns:
378
+ Document with date fields converted back to date/datetime objects.
379
+ """
380
+ if not self.date_fields:
381
+ return doc
382
+
383
+ for field in self.date_fields:
384
+ if field in doc and doc[field] is not None:
385
+ value = doc[field]
386
+ if isinstance(value, str):
387
+ # Check if string contains time component (has 'T' or ' ')
388
+ has_time = 'T' in value or ' ' in value
389
+
390
+ if has_time:
391
+ # Parse as datetime
392
+ try:
393
+ doc[field] = datetime.fromisoformat(value)
394
+ except ValueError:
395
+ pass
396
+ else:
397
+ # Parse as date only
398
+ try:
399
+ doc[field] = date.fromisoformat(value)
400
+ except ValueError:
401
+ pass
402
+ return doc
403
+
404
+ def _add_numeric_filter(
405
+ self,
406
+ where_clauses: list[str],
407
+ where_params: list[Any],
408
+ field: str,
409
+ value: Any,
410
+ ) -> tuple[list[str], list[Any]]:
411
+ """
412
+ Add a numeric filter to the WHERE clause.
413
+
414
+ Supports:
415
+ - None/missing values: {"field": None}
416
+ - Exact match: {"field": 100}
417
+ - Range filters: {"field": [('>=', 100), ('<', 200)]}
418
+
419
+ Returns:
420
+ Tuple of (updated where_clauses, updated where_params).
421
+ """
422
+ if value is None:
423
+ where_clauses.append(f'd."{field}" IS NULL')
424
+ elif is_range_filter(value):
425
+ # Range filter: [('>=', 100), ('<', 200)]
426
+ for op, op_value in value:
427
+ if op in OPERATORS and op_value is not None:
428
+ where_clauses.append(f'd."{field}" {op} ?')
429
+ where_params.append(op_value)
430
+ else:
431
+ # Exact match
432
+ where_clauses.append(f'd."{field}" = ?')
433
+ where_params.append(value)
434
+
435
+ return where_clauses, where_params
436
+
437
+ def _add_date_filter(
438
+ self,
439
+ where_clauses: list[str],
440
+ where_params: list[Any],
441
+ field: str,
442
+ value: Any,
443
+ ) -> tuple[list[str], list[Any]]:
444
+ """
445
+ Add a date filter to the WHERE clause.
446
+
447
+ Supports:
448
+ - None/missing values: {"field": None}
449
+ - Exact match: {"field": date(...)} or {"field": "2024-01-15"}
450
+ - Range filters: {"field": [('>=', date(...)), ('<', date(...))]}
451
+
452
+ Returns:
453
+ Tuple of (updated where_clauses, updated where_params).
454
+ """
455
+ if value is None:
456
+ where_clauses.append(f'd."{field}" IS NULL')
457
+ elif is_range_filter(value):
458
+ # Range filter: [('>=', date(...)), ('<', date(...))]
459
+ for op, op_value in value:
460
+ if op in OPERATORS and op_value is not None:
461
+ # Convert date/datetime to ISO format string for comparison
462
+ if isinstance(op_value, (date, datetime)):
463
+ op_value = op_value.isoformat()
464
+ where_clauses.append(f'd."{field}" {op} ?')
465
+ where_params.append(op_value)
466
+ else:
467
+ # Exact match - convert date/datetime to ISO format
468
+ if isinstance(value, (date, datetime)):
469
+ value = value.isoformat()
470
+ where_clauses.append(f'd."{field}" = ?')
471
+ where_params.append(value)
472
+
473
+ return where_clauses, where_params
474
+
294
475
  def _build_fts_query(self, query: str, boost_dict: dict[str, float]) -> str:
295
476
  """
296
477
  Build an FTS5 query with boost weights.