waveflowdb-client 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
- Name: waveflowdb-client
3
- Version: 0.0.1
2
+ Name: waveflowdb_client
3
+ Version: 0.0.2
4
4
  Summary: VectorLake SDK — Deterministic backend engine powering agent workflows
5
5
  Author-email: "agentanalytics.ai" <nitin@agentanalytics.ai>
6
6
  License: MIT License
@@ -73,7 +73,7 @@ to interact with the Vector Lake API.
73
73
  ### 1. Install Dependencies
74
74
 
75
75
  ``` bash
76
- pip install waveflowdb-client
76
+ pip install waveflowdb_client
77
77
  ```
78
78
 
79
79
  ### 2. Configure API Credentials
@@ -3,8 +3,8 @@ requires = ["setuptools>=61.0", "wheel"]
3
3
  build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
- name = "waveflowdb-client" # pip install name
7
- version = "0.0.1"
6
+ name = "waveflowdb_client" # pip install name
7
+ version = "0.0.2"
8
8
  description = "VectorLake SDK — Deterministic backend engine powering agent workflows"
9
9
  readme = "readme.md"
10
10
  requires-python = ">=3.8"
@@ -35,7 +35,7 @@ to interact with the Vector Lake API.
35
35
  ### 1. Install Dependencies
36
36
 
37
37
  ``` bash
38
- pip install waveflowdb-client
38
+ pip install waveflowdb_client
39
39
  ```
40
40
 
41
41
  ### 2. Configure API Credentials
@@ -0,0 +1,18 @@
1
+ __version__ = "1.0.0"
2
+
3
+ from .client import VectorLakeClient
4
+ from .config import Config
5
+ from .exceptions import APIError, FileProcessingError
6
+ from .models import DocumentInfo, ChatResponse, MatchingDocsResponse, HealthResponse, BatchResult
7
+
8
+ __all__ = [
9
+ "VectorLakeClient",
10
+ "Config",
11
+ "APIError",
12
+ "FileProcessingError",
13
+ "DocumentInfo",
14
+ "ChatResponse",
15
+ "MatchingDocsResponse",
16
+ "HealthResponse",
17
+ "BatchResult",
18
+ ]
@@ -0,0 +1,297 @@
1
+ import time
2
+ import logging
3
+ import json
4
+ import requests
5
+ import os
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
+ from typing import List, Optional, Dict, Any
8
+
9
+ from .config import Config
10
+ from .utils import FileProcessor, Logger, BatchManager
11
+ from .exceptions import APIError
12
+ from .models import ChatResponse, MatchingDocsResponse, HealthResponse, BatchResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+ logging.basicConfig(level=logging.INFO)
16
+
17
+ class VectorLakeClient:
18
+ def __init__(self, config: Optional[Config] = None, **kwargs):
19
+ if config is None:
20
+ config = Config(**kwargs)
21
+ logging.info(f"Initializing VectorLakeClient with base_url={config.base_url_query}")
22
+ self.config = config
23
+ self.logger = Logger(config.log_dir)
24
+ self.batch_manager = BatchManager(config.max_files_per_batch, config.max_batch_size_mb)
25
+ self.file_processor = FileProcessor()
26
+ self.perf_csv = "performance_logs.csv"
27
+
28
+ def _get_headers(self) -> Dict[str, str]:
29
+ return {
30
+ 'Content-Type': 'application/json',
31
+ 'x-api-key': self.config.api_key
32
+ }
33
+
34
+ def _make_request(self, endpoint: str, payload: Dict[str, Any], operation: str = "", batch_num: int = 0) -> Dict[str, Any]:
35
+ headers = self._get_headers()
36
+ request_size = len(json.dumps(payload).encode('utf-8')) / 1024 if payload is not None else 0
37
+ for attempt in range(self.config.max_retries):
38
+ try:
39
+ start_time = time.time()
40
+ response = requests.post(endpoint, json=payload, headers=headers, timeout=self.config.timeout)
41
+ latency = (time.time() - start_time) * 1000
42
+ try:
43
+ result = response.json()
44
+ except Exception:
45
+ result = {"status_code": response.status_code, "text": response.text}
46
+
47
+ if operation:
48
+ response_size = len(response.content) / 1024 if response.content is not None else 0
49
+ result_count = len(result.get("results", [])) if isinstance(result, dict) else "N/A"
50
+ self.logger.log_performance(operation, batch_num, latency, request_size, response_size, result_count)
51
+
52
+ if response.status_code >= 400:
53
+ raise APIError(result.get('message', f'HTTP {response.status_code}'), status_code=response.status_code, response_text=response.text)
54
+
55
+ return result
56
+ except requests.exceptions.RequestException as e:
57
+ if attempt == self.config.max_retries - 1:
58
+ error_msg = f"Request failed after {self.config.max_retries} attempts: {str(e)}"
59
+ if operation:
60
+ self.logger.log_api_error(operation, batch_num, error_msg)
61
+ raise APIError(error_msg, getattr(e.response, 'status_code', None), getattr(e.response, 'text', None))
62
+ time.sleep(2 ** attempt)
63
+
64
+ def _read_files(self, filenames: List[str]) -> List[str]:
65
+ contents = []
66
+ for filename in filenames:
67
+ filepath = os.path.join(self.config.vector_lake_path, filename)
68
+ try:
69
+ if self.file_processor.is_supported_file(filename):
70
+ content = self.file_processor.read_file_content(filepath)
71
+ contents.append(content)
72
+ else:
73
+ self.logger.log_skipped_file(filename, "Unsupported file type")
74
+ contents.append("")
75
+ except Exception as e:
76
+ self.logger.log_skipped_file(filename, f"Read error: {str(e)}")
77
+ contents.append("")
78
+ return contents
79
+
80
+ def chat_with_docs(self,
81
+ query: str,
82
+ user_id: str,
83
+ vector_lake_description: str,
84
+ pattern: str = "static",
85
+ session_id: Optional[str] = None,
86
+ hybrid_filter: bool = False,
87
+ top_docs: int = 3,
88
+ threshold: float = 0.2,
89
+ files: Optional[List[str]] = None) -> Dict[str, Any]:
90
+ endpoint = self.config.endpoints["chat_with_docs"]
91
+ payload = {
92
+ "session_id": session_id,
93
+ "user_id": user_id,
94
+ "vector_lake_description": vector_lake_description,
95
+ "query": query,
96
+ "hybrid_filter": hybrid_filter,
97
+ "top_docs": top_docs,
98
+ "threshold": threshold,
99
+ "pattern": pattern
100
+ }
101
+
102
+ if pattern == "dynamic" and files:
103
+ file_contents = self._read_files(files)
104
+ payload.update({
105
+ "files_name": files,
106
+ "files_data": file_contents
107
+ })
108
+
109
+ try:
110
+ result = self._make_request(endpoint, payload, "chat_with_docs")
111
+ return result
112
+ except Exception as e:
113
+ return ChatResponse(response=f"Error: {e}", query=query, session_id=session_id or "", user_id=user_id, timestamp=time.time())
114
+
115
+ def get_matching_docs(self,
116
+ query: str,
117
+ user_id: str,
118
+ vector_lake_description: str,
119
+ pattern: str = "static",
120
+ session_id: Optional[str] = None,
121
+ hybrid_filter: bool = False,
122
+ top_docs: int = 10,
123
+ threshold: float = 0.2,
124
+ files: Optional[List[str]] = None,
125
+ with_data: bool = False) -> Dict[str, Any]:
126
+ endpoint_key = "top_matching_docs_with_data" if with_data else "top_matching_docs"
127
+ endpoint = self.config.endpoints[endpoint_key]
128
+ payload = {
129
+ "session_id": session_id,
130
+ "user_id": user_id,
131
+ "vector_lake_description": vector_lake_description,
132
+ "query": query,
133
+ "hybrid_filter": hybrid_filter,
134
+ "top_docs": top_docs,
135
+ "threshold": threshold,
136
+ "pattern": pattern
137
+ }
138
+
139
+ if pattern == "dynamic" and files:
140
+ file_contents = self._read_files(files)
141
+ payload.update({
142
+ "files_name": files,
143
+ "files_data": file_contents
144
+ })
145
+
146
+ try:
147
+ raw_result = self._make_request(endpoint, payload, endpoint_key)
148
+ return raw_result
149
+ except Exception as e:
150
+ raise
151
+
152
+ def add_documents(self,
153
+ user_id: str,
154
+ vector_lake_description: str,
155
+ start_from_batch=1,
156
+ intelligent_segmentation: bool = True,
157
+ session_id: Optional[str] = None,
158
+ files: Optional[List[str]] = None,
159
+ files_name: Optional[List[str]] = None,
160
+ files_data: Optional[List[str]] = None,
161
+ max_workers=5) -> List[BatchResult]:
162
+ # If user supplies file names and data directly, bypass batching
163
+ if files_name and files_data:
164
+ if len(files_name) != len(files_data):
165
+ raise ValueError("files_name and files_data must be same length")
166
+ payload = {
167
+ "session_id": session_id,
168
+ "user_id": user_id,
169
+ "vector_lake_description": vector_lake_description,
170
+ "files_name": files_name,
171
+ "files_data": files_data,
172
+ "intelligent_segmentation": intelligent_segmentation
173
+ }
174
+ endpoint = self.config.endpoints["add_docs"]
175
+ result = self._make_request(endpoint, payload, "add_docs", batch_num=1)
176
+ return [BatchResult(batch_number=1, response=result, files_processed=files_name, success=True)]
177
+
178
+ return self._process_files_in_batches(
179
+ "add_docs", user_id, vector_lake_description, start_from_batch, intelligent_segmentation, session_id, files, max_workers=max_workers
180
+ )
181
+
182
+ def refresh_documents(self,
183
+ user_id: str,
184
+ vector_lake_description: str,
185
+ intelligent_segmentation: bool = True,
186
+ session_id: Optional[str] = None,
187
+ files: Optional[List[str]] = None,
188
+ files_name: Optional[List[str]] = None,
189
+ files_data: Optional[List[str]] = None) -> List[BatchResult]:
190
+ # If user supplies file names and data directly, bypass batching
191
+ if files_name and files_data:
192
+ if len(files_name) != len(files_data):
193
+ raise ValueError("files_name and files_data must be same length")
194
+ payload = {
195
+ "session_id": session_id,
196
+ "user_id": user_id,
197
+ "vector_lake_description": vector_lake_description,
198
+ "files_name": files_name,
199
+ "files_data": files_data,
200
+ "intelligent_segmentation": intelligent_segmentation
201
+ }
202
+ endpoint = self.config.endpoints["refresh_docs"]
203
+ result = self._make_request(endpoint, payload, "refresh_docs", batch_num=1)
204
+ return [BatchResult(batch_number=1, response=result, files_processed=files_name, success=True)]
205
+
206
+ return self._process_files_in_batches(
207
+ "refresh_docs", user_id, vector_lake_description, 1, intelligent_segmentation, session_id, files
208
+ )
209
+
210
+ def health_check(self, user_id: str, vector_lake_description: str, session_id: Optional[str] = None) -> Dict[str, Any]:
211
+ endpoint = self.config.endpoints["health"]
212
+ payload = {"session_id": session_id, "user_id": user_id, "vector_lake_description": vector_lake_description}
213
+ try:
214
+ result = self._make_request(endpoint, payload, "health")
215
+ return HealthResponse(status="success", message=result.get("message", "ok"), timestamp=time.time(), details=result)
216
+ except Exception as e:
217
+ return HealthResponse(status="error", message=str(e), timestamp=time.time())
218
+
219
+ def get_namespace_details(self, user_id: str, session_id: Optional[str] = None, vector_lake_description: Optional[str] = None) -> Dict[str, Any]:
220
+ endpoint = self.config.endpoints["get_namespace_details_by_userid"]
221
+ payload = {"session_id": session_id, "user_id": user_id}
222
+ if vector_lake_description:
223
+ payload["vector_lake_description"] = vector_lake_description
224
+ try:
225
+ result = self._make_request(endpoint, payload, "get_namespace_details")
226
+ return result
227
+ except Exception as e:
228
+ return {"status": "error", "message": str(e)}
229
+
230
+ def get_docs_information(self, user_id: str, vector_lake_description: str, session_id: Optional[str] = None, keyword: Optional[str] = None, threshold: int = 70) -> Dict[str, Any]:
231
+ endpoint = self.config.endpoints["get_docs_information"]
232
+ payload = {"session_id": session_id, "user_id": user_id, "vector_lake_description": vector_lake_description, "threshold": threshold}
233
+ if keyword:
234
+ payload["keyword"] = keyword
235
+ try:
236
+ result = self._make_request(endpoint, payload, "get_docs_information")
237
+ return result
238
+ except Exception as e:
239
+ return {"status": "error", "message": str(e)}
240
+
241
+ def full_corpus_search(self, user_id: str, vector_lake_description: str, keyword: str, session_id: Optional[str] = None, top_docs: int = 10) -> Dict[str, Any]:
242
+ endpoint = self.config.endpoints["full_corpus_search"]
243
+ payload = {"session_id": session_id, "user_id": user_id, "vector_lake_description": vector_lake_description, "keyword": keyword, "top_docs": top_docs}
244
+ try:
245
+ result = self._make_request(endpoint, payload, "full_corpus_search")
246
+ return result
247
+ except Exception as e:
248
+ return {"status": "error", "message": str(e)}
249
+
250
+ def _make_request_with_backoff(self, endpoint, payload, operation, batch_num, retries=5, base_delay=1):
251
+ delay = base_delay
252
+ for attempt in range(retries):
253
+ try:
254
+ result = self._make_request(endpoint, payload, operation, batch_num)
255
+ return result
256
+ except APIError as e:
257
+ if getattr(e, "status_code", None) == 429:
258
+ logging.warning(f"Batch {batch_num} throttled, retrying in {delay}s...")
259
+ time.sleep(delay)
260
+ delay *= 2
261
+ continue
262
+ raise
263
+ except Exception:
264
+ raise
265
+
266
+ def _process_files_in_batches(self, operation: str, user_id: str, vector_lake_description: str, start_from_batch, intelligent_segmentation: bool = False, session_id: Optional[str] = None, files: Optional[List[str]] = None, max_workers: int = 1, batch_delay: float = 2):
267
+ if files is None:
268
+ files = [f for f in os.listdir(self.config.vector_lake_path) if os.path.isfile(os.path.join(self.config.vector_lake_path, f)) and self.file_processor.is_supported_file(f)]
269
+ batches = self.batch_manager.create_batches(files, self.config.vector_lake_path)
270
+ results = []
271
+ start_batch_index = start_from_batch - 1
272
+ if start_from_batch > 1:
273
+ logging.info(f"Resuming from batch {start_from_batch}, skipping first {start_from_batch - 1} batches")
274
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
275
+ futures = {}
276
+ for i, batch in enumerate(batches):
277
+ if i < start_batch_index:
278
+ logging.info(f"Skipping batch {i + 1}")
279
+ continue
280
+ batch_num = i + 1
281
+ file_contents = self._read_files(batch)
282
+ payload = {"session_id": session_id, "user_id": user_id, "vector_lake_description": vector_lake_description, "files_name": batch, "files_data": file_contents, "intelligent_segmentation": intelligent_segmentation}
283
+ endpoint = self.config.endpoints[operation]
284
+ futures[executor.submit(self._make_request_with_backoff, endpoint, payload, operation, batch_num)] = (batch_num, batch, time.time())
285
+ time.sleep(batch_delay)
286
+ for future in as_completed(futures):
287
+ batch_num, batch, start_time = futures[future]
288
+ try:
289
+ result = future.result()
290
+ processing_time = time.time() - start_time
291
+ logging.info(f"Batch {batch_num} done")
292
+ results.append(BatchResult(batch_number=batch_num, response=result, files_processed=batch, success=True, processing_time=processing_time))
293
+ except Exception as e:
294
+ processing_time = time.time() - start_time
295
+ logging.error(f"Batch {batch_num} failed: {str(e)}")
296
+ results.append(BatchResult(batch_number=batch_num, response=str(e), files_processed=batch, success=False, error_message=str(e), processing_time=processing_time))
297
+ return results
@@ -0,0 +1,56 @@
1
+ import os
2
+ from typing import Optional, Dict
3
+
4
+ class Config:
5
+ ALLOWED_EXTENSIONS = ["txt", "csv", "json", "py", "docx", "pdf"]
6
+
7
+ def __init__(
8
+ self,
9
+ api_key: Optional[str] = None,
10
+ host: str = "https://waveflow-analytics.com",
11
+ timeout: int = 60,
12
+ max_retries: int = 2,
13
+ max_files_per_batch: int = 100,
14
+ max_batch_size_mb: int = 20,
15
+ vector_lake_path: str = "upload",
16
+ log_dir: str = "logs",
17
+ ):
18
+ self.api_key = api_key or os.getenv("VECTOR_LAKE_API_KEY")
19
+ self.host = host.rstrip("/")
20
+ self.timeout = timeout
21
+ self.max_retries = max_retries
22
+ self.max_files_per_batch = max_files_per_batch
23
+ self.max_batch_size_mb = max_batch_size_mb
24
+ self.vector_lake_path = vector_lake_path
25
+ self.log_dir = log_dir
26
+
27
+ if not self.api_key:
28
+ raise ValueError("API key is required. Provide api_key or set VECTOR_LAKE_API_KEY environment variable.")
29
+
30
+ os.makedirs(self.log_dir, exist_ok=True)
31
+ os.makedirs(self.vector_lake_path, exist_ok=True)
32
+
33
+ @property
34
+ def base_url_query(self) -> str:
35
+ return f"{self.host}/query"
36
+
37
+ @property
38
+ def base_url_upload(self) -> str:
39
+ return f"{self.host}/upload"
40
+
41
+ @property
42
+ def endpoints(self) -> Dict[str, str]:
43
+ return {
44
+ # Query service
45
+ "chat_with_docs": f"{self.base_url_query}/chat_with_docs",
46
+ "top_matching_docs": f"{self.base_url_query}/top_matching_docs",
47
+ "top_matching_docs_with_data": f"{self.base_url_query}/top_matching_docs_with_data",
48
+ "full_corpus_search": f"{self.base_url_query}/full_corpus_search",
49
+
50
+ # Upload service
51
+ "add_docs": f"{self.base_url_upload}/add_docs",
52
+ "refresh_docs": f"{self.base_url_upload}/refresh_docs",
53
+ "health": f"{self.base_url_upload}/health",
54
+ "get_namespace_details_by_userid": f"{self.base_url_upload}/get_namespace_details_by_userid",
55
+ "get_docs_information": f"{self.base_url_upload}/get_docs_information",
56
+ }
@@ -0,0 +1,11 @@
1
+ class VectorLakeError(Exception):
2
+ pass
3
+
4
+ class APIError(VectorLakeError):
5
+ def __init__(self, message: str, status_code: int = None, response_text: str = None):
6
+ super().__init__(message)
7
+ self.status_code = status_code
8
+ self.response_text = response_text
9
+
10
+ class FileProcessingError(VectorLakeError):
11
+ pass
@@ -0,0 +1,44 @@
1
+ from dataclasses import dataclass
2
+ from typing import List, Dict, Any, Optional
3
+ from datetime import datetime
4
+
5
+ @dataclass
6
+ class DocumentInfo:
7
+ filename: str
8
+ content: str
9
+ size: int
10
+ modified_time: Optional[datetime] = None
11
+
12
+ @dataclass
13
+ class ChatResponse:
14
+ response: str
15
+ query: str
16
+ session_id: str
17
+ user_id: str
18
+ timestamp: datetime
19
+
20
+ @dataclass
21
+ class MatchingDocsResponse:
22
+ status: str
23
+ query: str
24
+ response: str
25
+ with_data: bool
26
+ session_id: str
27
+ user_id: str
28
+ timestamp: datetime
29
+
30
+ @dataclass
31
+ class HealthResponse:
32
+ status: str
33
+ message: str
34
+ timestamp: datetime
35
+ details: Optional[Dict[str, Any]] = None
36
+
37
+ @dataclass
38
+ class BatchResult:
39
+ batch_number: int
40
+ response: Any
41
+ files_processed: List[str]
42
+ success: bool
43
+ error_message: Optional[str] = None
44
+ processing_time: Optional[float] = None
@@ -0,0 +1,149 @@
1
+ import os
2
+ import csv
3
+ import logging
4
+ from pathlib import Path
5
+ from datetime import datetime
6
+ import json
7
+ import traceback
8
+ import time
9
+ from typing import List
10
+ # Heavy libs are imported at runtime when needed in real environments.
11
+
12
+ from .exceptions import FileProcessingError
13
+
14
+ logger = logging.getLogger(__name__)
15
+ logging.basicConfig(level=logging.INFO)
16
+
17
+ class FileProcessor:
18
+ SUPPORTED_EXTENSIONS = ['txt', 'csv', 'json', 'py', 'docx', 'pdf']
19
+
20
+ @staticmethod
21
+ def read_file_content(filepath: str) -> str:
22
+ ext = filepath.lower().split('.')[-1]
23
+ if ext in ['txt', 'csv', 'py', 'json']:
24
+ with open(filepath, encoding='utf-8') as f:
25
+ return f.read()
26
+ # Defer complex parsing to runtime imports to avoid import-time failures.
27
+ if ext == 'docx':
28
+ try:
29
+ from docx import Document
30
+ except Exception as e:
31
+ raise FileProcessingError(f"python-docx not available: {e}")
32
+ doc = Document(filepath)
33
+ return '\n'.join(p.text for p in doc.paragraphs)
34
+ if ext == 'pdf':
35
+ try:
36
+ import PyPDF2
37
+ except Exception as e:
38
+ raise FileProcessingError(f"PyPDF2 not available: {e}")
39
+ with open(filepath, 'rb') as f:
40
+ reader = PyPDF2.PdfReader(f)
41
+ return '\n'.join(p.extract_text() or '' for p in reader.pages)
42
+ raise FileProcessingError(f'Unsupported extension: {ext}')
43
+
44
+ @staticmethod
45
+ def get_file_size(filepath: str) -> int:
46
+ return os.path.getsize(filepath)
47
+
48
+ @staticmethod
49
+ def is_supported_file(filename: str) -> bool:
50
+ ext = filename.lower().split('.')[-1]
51
+ return ext in FileProcessor.SUPPORTED_EXTENSIONS
52
+
53
+ class Logger:
54
+ def __init__(self, log_dir: str):
55
+ Path(log_dir).mkdir(parents=True, exist_ok=True)
56
+ self.skipped_log = Path(log_dir) / 'skipped_files_log.csv'
57
+ self.api_error_log = Path(log_dir) / 'api_error_log.csv'
58
+ self.performance_log = Path(log_dir) / 'api_performance_log.csv'
59
+
60
+ def _write_csv_log(self, path: Path, header: List[str], row: List):
61
+ exists = path.exists()
62
+ with open(path, 'a', newline='', encoding='utf-8') as f:
63
+ writer = csv.writer(f)
64
+ if not exists:
65
+ writer.writerow(header)
66
+ writer.writerow(row)
67
+
68
+ def log_skipped_file(self, filename: str, reason: str):
69
+ self._write_csv_log(self.skipped_log, ['ts', 'filename', 'reason'], [datetime.utcnow().isoformat(), filename, reason])
70
+
71
+ def log_api_error(self, operation: str, batch_num: int, error_message: str):
72
+ self._write_csv_log(self.api_error_log, ['ts', 'operation', 'batch_num', 'err'], [datetime.utcnow().isoformat(), operation, batch_num, error_message])
73
+
74
+ def log_performance(self, operation=None, batch_num=None, latency=None,
75
+ request_size=None, response_size=None, result_count=None,
76
+ files_processed=None, error: Exception = None):
77
+ """
78
+ SAFE CSV logger for all SDK operations.
79
+ It will never throw TypeError regardless of input type.
80
+ """
81
+
82
+ logfile = getattr(self, "perf_csv", "performance_logs.csv")
83
+
84
+ # --- Safe stringify for ANY type ---
85
+ def safe_str(value):
86
+ try:
87
+ if isinstance(value, (list, tuple, set)):
88
+ return ",".join(str(v) for v in value)
89
+ return str(value) if value is not None else ""
90
+ except Exception:
91
+ return "UNSERIALIZABLE"
92
+
93
+ row = {
94
+ "timestamp": datetime.utcnow().isoformat(),
95
+ "operation": safe_str(operation),
96
+ "batch_num": safe_str(batch_num),
97
+ "latency_ms": safe_str(latency),
98
+ "request_size": safe_str(request_size),
99
+ "response_size": safe_str(response_size),
100
+ "result_count": safe_str(result_count),
101
+ "files_processed": safe_str(files_processed),
102
+ "error": safe_str(error),
103
+ }
104
+
105
+ try:
106
+ file_exists = os.path.isfile(logfile)
107
+
108
+ with open(logfile, mode="a", newline="", encoding="utf-8") as f:
109
+ writer = csv.DictWriter(f, fieldnames=row.keys())
110
+
111
+ if not file_exists:
112
+ writer.writeheader()
113
+
114
+ writer.writerow(row)
115
+
116
+ except Exception:
117
+ print("[CSV LOGGING ERROR] Could not write performance log:")
118
+ print(traceback.format_exc())
119
+
120
+ class BatchManager:
121
+ def __init__(self, max_files: int, max_size_mb: int):
122
+ self.max_files = max_files
123
+ self.max_bytes = max_size_mb * 1024 * 1024
124
+
125
+ def create_batches(self, files: List[str], base_path: str) -> List[List[str]]:
126
+ file_info = []
127
+ for f in files:
128
+ p = os.path.join(base_path, f)
129
+ try:
130
+ size = os.path.getsize(p)
131
+ if size <= self.max_bytes:
132
+ file_info.append((f, size))
133
+ except Exception:
134
+ continue
135
+
136
+ batches = []
137
+ cur = []
138
+ cur_size = 0
139
+ for fname, size in file_info:
140
+ if len(cur) >= self.max_files or (cur_size + size) > self.max_bytes:
141
+ if cur:
142
+ batches.append(cur)
143
+ cur = []
144
+ cur_size = 0
145
+ cur.append(fname)
146
+ cur_size += size
147
+ if cur:
148
+ batches.append(cur)
149
+ return batches
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
- Name: waveflowdb-client
3
- Version: 0.0.1
2
+ Name: waveflowdb_client
3
+ Version: 0.0.2
4
4
  Summary: VectorLake SDK — Deterministic backend engine powering agent workflows
5
5
  Author-email: "agentanalytics.ai" <nitin@agentanalytics.ai>
6
6
  License: MIT License
@@ -73,7 +73,7 @@ to interact with the Vector Lake API.
73
73
  ### 1. Install Dependencies
74
74
 
75
75
  ``` bash
76
- pip install waveflowdb-client
76
+ pip install waveflowdb_client
77
77
  ```
78
78
 
79
79
  ### 2. Configure API Credentials
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ pyproject.toml
3
+ readme.md
4
+ waveflowdb_client/__init__.py
5
+ waveflowdb_client/client.py
6
+ waveflowdb_client/config.py
7
+ waveflowdb_client/exceptions.py
8
+ waveflowdb_client/models.py
9
+ waveflowdb_client/utils.py
10
+ waveflowdb_client.egg-info/PKG-INFO
11
+ waveflowdb_client.egg-info/SOURCES.txt
12
+ waveflowdb_client.egg-info/dependency_links.txt
13
+ waveflowdb_client.egg-info/requires.txt
14
+ waveflowdb_client.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ waveflowdb_client
@@ -1,195 +0,0 @@
1
- """
2
- run.py
3
- Simple launcher for Vector Lake SDK (v1.0.0)
4
-
5
- Allows you to:
6
- - configure client (host, port, key)
7
- - call ANY API: add, refresh, chat, match, health, namespace info, etc.
8
- """
9
-
10
- from waveflowdb-client import Config, VectorLakeClient
11
-
12
-
13
- # -------------------------------------------------------
14
- # CONFIGURATION (EDIT THIS ONCE)
15
- # -------------------------------------------------------
16
- API_KEY = "<<>>" # visit https://db.agentanalytics.ai/signup
17
- HOST = "https://waveflow-analytics.com" # OR "http://localhost"
18
- VECTOR_LAKE_PATH = "<<>>" # folder for path-based ingestion
19
- USER_ID = "" ## your email id used for registratoin
20
- NAMESPACE = "" ## database created via UI
21
-
22
-
23
- # -------------------------------------------------------
24
- # INITIALIZE CLIENT
25
- # -------------------------------------------------------
26
- def get_client():
27
- cfg = Config(
28
- api_key=API_KEY,
29
- host=HOST,
30
- vector_lake_path=VECTOR_LAKE_PATH
31
- )
32
- return VectorLakeClient(cfg)
33
-
34
-
35
- client = get_client()
36
-
37
-
38
- # -------------------------------------------------------
39
- # READY-TO-USE ACTION FUNCTIONS
40
- # -------------------------------------------------------
41
-
42
- def run_health():
43
- """Health check"""
44
- print("\n--- HEALTH CHECK ---")
45
- res = client.health_check(USER_ID, NAMESPACE)
46
- print(res)
47
-
48
-
49
- def run_add_direct():
50
- """Add docs using files_name + files_data"""
51
- print("\n--- ADD DOCUMENTS (Direct Payload Mode) ---")
52
- res = client.add_documents(
53
- user_id=USER_ID,
54
- vector_lake_description=NAMESPACE,
55
- files_name=["test1.txt", "test2.txt"],
56
- files_data=["hello world", "this is test doc 2"]
57
- )
58
- print(res)
59
-
60
-
61
- def run_add_path():
62
- """Add docs by reading actual files from disk"""
63
- print("\n--- ADD DOCUMENTS (Disk Path Mode) ---")
64
- res = client.add_documents(
65
- user_id=USER_ID,
66
- vector_lake_description=NAMESPACE
67
- # files=[""] # must exist inside VECTOR_LAKE_PATH
68
- )
69
- print(res)
70
-
71
-
72
- def run_refresh_direct():
73
- """Refresh docs using direct data (no disk read)"""
74
- print("\n--- REFRESH DOCUMENTS (Direct Data Mode) ---")
75
- res = client.refresh_documents(
76
- user_id=USER_ID,
77
- vector_lake_description=NAMESPACE,
78
- files_name=["test1.txt"],
79
- files_data=["UPDATED CONTENT FOR TEST1"]
80
- )
81
- print(res)
82
-
83
-
84
- def run_refresh_path():
85
- """Refresh docs by reading actual files"""
86
- print("\n--- REFRESH DOCUMENTS (Path Mode) ---")
87
- res = client.refresh_documents(
88
- user_id=USER_ID,
89
- vector_lake_description=NAMESPACE
90
- # files=["file1.pdf"] # must exist
91
- )
92
- print(res)
93
-
94
-
95
- def run_chat_static(query):
96
- """Chat with stored index"""
97
- print("\n--- CHAT (STATIC MODE) ---")
98
- res = client.chat_with_docs(
99
- query=query,
100
- user_id=USER_ID,
101
- vector_lake_description=NAMESPACE,
102
- pattern="static"
103
- )
104
- print(res)
105
-
106
-
107
- def run_chat_dynamic(query):
108
- """Chat using temporary files (dynamic mode)"""
109
- print("\n--- CHAT (DYNAMIC MODE) ---")
110
- res = client.chat_with_docs(
111
- query=query,
112
- user_id=USER_ID,
113
- vector_lake_description=NAMESPACE,
114
- pattern="dynamic",
115
- files_name=["dyn1.txt"],
116
- files_data=["This is dynamic content to summarize."]
117
- )
118
- print(res)
119
-
120
-
121
- def run_match_static(query):
122
- """Top matching docs (static mode)"""
123
- print("\n--- TOP MATCHING DOCS (STATIC) ---")
124
- res = client.get_matching_docs(
125
- query=query,
126
- user_id=USER_ID,
127
- vector_lake_description=NAMESPACE,
128
- pattern="static",
129
- top_docs=5,
130
- threshold=0.1
131
- )
132
- print(res)
133
-
134
-
135
- def run_match_dynamic(query):
136
- """Top matching docs (dynamic mode)"""
137
- print("\n--- TOP MATCHING DOCS (DYNAMIC) ---")
138
- res = client.get_matching_docs(
139
- query=query,
140
- user_id=USER_ID,
141
- vector_lake_description=NAMESPACE,
142
- pattern="dynamic",
143
- files_name=["temp.txt"],
144
- files_data=["Sample dynamic content"]
145
- )
146
- print(res)
147
-
148
-
149
- def run_match_with_data(query):
150
- """Top matching docs including chunk data"""
151
- print("\n--- TOP MATCHING DOCS (WITH DATA) ---")
152
- res = client.get_matching_docs(
153
- query=query,
154
- user_id=USER_ID,
155
- vector_lake_description=NAMESPACE,
156
- pattern="static",
157
- top_docs=5,
158
- with_data=True
159
- )
160
- print(res)
161
-
162
-
163
- def run_namespace_details():
164
- """Get namespace information"""
165
- print("\n--- GET NAMESPACE DETAILS ---")
166
- res = client.get_namespace_details(USER_ID, vector_lake_description=NAMESPACE)
167
- print(res)
168
-
169
-
170
- def run_docs_info():
171
- """List all stored docs + info"""
172
- print("\n--- GET DOCS INFORMATION ---")
173
- res = client.get_docs_information(USER_ID, NAMESPACE)
174
- print(res)
175
-
176
-
177
- # -------------------------------------------------------
178
- # MAIN SELECTOR – RUN ANY FUNCTION YOU WANT
179
- # -------------------------------------------------------
180
- if __name__ == "__main__":
181
- query="<<>>"
182
- # --- UNCOMMENT ANY ONE OF THESE TO RUN THAT OPERATION ---
183
- # run_health()
184
- # run_add_direct()
185
- # run_add_path()
186
- # run_refresh_direct()
187
- # run_refresh_path()
188
- # run_chat_static(query)
189
- # run_chat_dynamic(query)
190
- # run_match_static(query)
191
- # run_match_dynamic(query)
192
- # run_match_with_data(query)
193
- run_namespace_details()
194
- # run_docs_info()
195
-
@@ -1,9 +0,0 @@
1
- LICENSE
2
- pyproject.toml
3
- readme.md
4
- starter.py
5
- waveflowdb_client.egg-info/PKG-INFO
6
- waveflowdb_client.egg-info/SOURCES.txt
7
- waveflowdb_client.egg-info/dependency_links.txt
8
- waveflowdb_client.egg-info/requires.txt
9
- waveflowdb_client.egg-info/top_level.txt