streamlit-octostar-utils 0.4.2.dev8__tar.gz → 0.4.2.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/contents.py +199 -190
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/nifi.py +37 -14
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/LICENSE +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/README.md +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/relationships.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
|
@@ -15,13 +15,12 @@ Storage Backends:
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
from abc import ABC, abstractmethod
|
|
18
|
-
from typing import Optional, Dict, Any,
|
|
18
|
+
from typing import Optional, Dict, Any, List
|
|
19
19
|
from enum import Enum
|
|
20
20
|
import base64
|
|
21
21
|
import httpx
|
|
22
22
|
import logging
|
|
23
23
|
from io import BytesIO, SEEK_SET, SEEK_CUR, SEEK_END
|
|
24
|
-
from copy import deepcopy
|
|
25
24
|
|
|
26
25
|
_logger = logging.getLogger(__name__)
|
|
27
26
|
|
|
@@ -415,107 +414,107 @@ class MemoryContents(Contents):
|
|
|
415
414
|
)
|
|
416
415
|
|
|
417
416
|
|
|
418
|
-
class
|
|
417
|
+
class _PresignedUrlRemoteContents(Contents):
|
|
419
418
|
"""
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
-
|
|
424
|
-
-
|
|
425
|
-
-
|
|
426
|
-
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
-
|
|
431
|
-
-
|
|
419
|
+
Abstract base for remote storage backends accessed via presigned URLs.
|
|
420
|
+
|
|
421
|
+
Provides all FileIO operations using presigned URLs:
|
|
422
|
+
- Reads via HTTP GET with Range headers (no need to download entire file)
|
|
423
|
+
- Writes via presigned POST (no entity upsert, only raw bytes to S3)
|
|
424
|
+
- Automatic presigned URL refresh on 403 (expired)
|
|
425
|
+
- Lazy full-load into memory buffer only when writes require it
|
|
426
|
+
- Cache invalidation on flush (next read re-fetches from remote)
|
|
427
|
+
|
|
428
|
+
Subclasses only need to implement:
|
|
429
|
+
- _refresh_download_url(): get a fresh presigned download URL
|
|
430
|
+
- _get_upload_info(): get presigned upload URL + form fields
|
|
431
|
+
- delete(): delete from the storage backend
|
|
432
|
+
- to_locator() / _from_locator(): serialization
|
|
432
433
|
"""
|
|
433
434
|
|
|
434
|
-
DEFAULT_CHUNK_SIZE = 8192
|
|
435
435
|
DEFAULT_URL_TIMEOUT = 120
|
|
436
|
+
MAX_RETRIES = 3
|
|
436
437
|
|
|
437
438
|
def __init__(
|
|
438
439
|
self,
|
|
439
440
|
entity_type: Optional[str] = None,
|
|
440
441
|
filetype: Optional[str] = None,
|
|
441
|
-
*,
|
|
442
|
-
workspace_id: str,
|
|
443
|
-
entity_id: str,
|
|
444
|
-
client,
|
|
445
|
-
initial_data: Optional[bytes] = None,
|
|
446
|
-
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
|
447
442
|
**kwargs
|
|
448
443
|
):
|
|
449
444
|
super().__init__(entity_type, filetype, **kwargs)
|
|
450
|
-
self._workspace_id = workspace_id
|
|
451
|
-
self._entity_id = entity_id
|
|
452
|
-
self._client = client
|
|
453
|
-
self._chunk_size = chunk_size
|
|
454
|
-
|
|
455
445
|
self._buffer: Optional[BytesIO] = None
|
|
456
446
|
self._fully_loaded = False
|
|
457
447
|
self._modified = False
|
|
458
448
|
self._position = 0
|
|
459
449
|
self._size: Optional[int] = None
|
|
460
|
-
self.
|
|
450
|
+
self._download_url: Optional[str] = None
|
|
461
451
|
self._http_client: Optional[httpx.Client] = None
|
|
462
|
-
|
|
463
|
-
if initial_data is not None:
|
|
464
|
-
self._buffer = BytesIO(initial_data)
|
|
465
|
-
self._fully_loaded = True
|
|
466
|
-
self._size = len(initial_data)
|
|
467
452
|
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
453
|
+
# ==================== Abstract URL methods ====================
|
|
454
|
+
|
|
455
|
+
@abstractmethod
|
|
456
|
+
def _refresh_download_url(self) -> str:
|
|
457
|
+
"""Fetch a fresh presigned download URL from the API."""
|
|
458
|
+
pass
|
|
459
|
+
|
|
460
|
+
@abstractmethod
|
|
461
|
+
def _get_upload_info(self) -> Dict[str, Any]:
|
|
462
|
+
"""
|
|
463
|
+
Get presigned upload URL and form fields from the API.
|
|
471
464
|
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
465
|
+
Returns:
|
|
466
|
+
{"url": "<presigned upload URL>", "fields": {<form fields>}}
|
|
467
|
+
"""
|
|
468
|
+
pass
|
|
469
|
+
|
|
470
|
+
# ==================== HTTP infrastructure ====================
|
|
477
471
|
|
|
478
472
|
def _ensure_http_client(self):
|
|
479
|
-
"""Ensure
|
|
473
|
+
"""Ensure the shared httpx.Client is initialized."""
|
|
480
474
|
if not self._http_client:
|
|
481
475
|
self._http_client = httpx.Client(timeout=self.DEFAULT_URL_TIMEOUT)
|
|
482
476
|
|
|
477
|
+
def _ensure_download_url(self):
|
|
478
|
+
"""Ensure a download URL is cached, fetching one if needed."""
|
|
479
|
+
if not self._download_url:
|
|
480
|
+
self._download_url = self._refresh_download_url()
|
|
481
|
+
|
|
482
|
+
# ==================== Remote read operations ====================
|
|
483
|
+
|
|
483
484
|
def _fetch_size(self) -> int:
|
|
484
485
|
"""
|
|
485
|
-
Fetch the size of the remote
|
|
486
|
+
Fetch the size of the remote content using a HEAD request.
|
|
486
487
|
|
|
487
488
|
Handles presigned URL expiration with automatic refresh and retry.
|
|
488
489
|
"""
|
|
489
490
|
if self._size is not None:
|
|
490
491
|
return self._size
|
|
491
492
|
|
|
492
|
-
|
|
493
|
-
self._presigned_url = self._get_presigned_url()
|
|
494
|
-
|
|
493
|
+
self._ensure_download_url()
|
|
495
494
|
self._ensure_http_client()
|
|
496
|
-
max_retries = 3
|
|
497
495
|
|
|
498
|
-
for attempt in range(
|
|
499
|
-
response = self._http_client.head(self.
|
|
496
|
+
for attempt in range(self.MAX_RETRIES):
|
|
497
|
+
response = self._http_client.head(self._download_url)
|
|
500
498
|
if response.status_code == 200:
|
|
501
499
|
self._size = int(response.headers.get('content-length', 0))
|
|
502
500
|
return self._size
|
|
503
501
|
elif response.status_code == 403:
|
|
504
502
|
_logger.debug(
|
|
505
|
-
f"Presigned URL expired while fetching size, refreshing...
|
|
503
|
+
f"Presigned URL expired while fetching size, refreshing... "
|
|
504
|
+
f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
|
|
506
505
|
)
|
|
507
|
-
self.
|
|
506
|
+
self._download_url = self._refresh_download_url()
|
|
508
507
|
continue
|
|
509
508
|
else:
|
|
510
509
|
response.raise_for_status()
|
|
511
510
|
|
|
512
511
|
raise ConnectionError(
|
|
513
|
-
f"Failed to fetch file size after {
|
|
512
|
+
f"Failed to fetch file size after {self.MAX_RETRIES} attempts (URL kept expiring)"
|
|
514
513
|
)
|
|
515
514
|
|
|
516
515
|
def _read_range(self, start: int, end: int) -> bytes:
|
|
517
516
|
"""
|
|
518
|
-
Read a specific byte range using HTTP Range request.
|
|
517
|
+
Read a specific byte range using an HTTP Range request.
|
|
519
518
|
|
|
520
519
|
Handles presigned URL expiration with automatic refresh and retry.
|
|
521
520
|
|
|
@@ -526,16 +525,13 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
526
525
|
Returns:
|
|
527
526
|
Bytes from the specified range
|
|
528
527
|
"""
|
|
529
|
-
|
|
530
|
-
self._presigned_url = self._get_presigned_url()
|
|
531
|
-
|
|
528
|
+
self._ensure_download_url()
|
|
532
529
|
self._ensure_http_client()
|
|
533
530
|
|
|
534
531
|
headers = {"Range": f"bytes={start}-{end}"}
|
|
535
|
-
max_retries = 3
|
|
536
532
|
|
|
537
|
-
for attempt in range(
|
|
538
|
-
response = self._http_client.get(self.
|
|
533
|
+
for attempt in range(self.MAX_RETRIES):
|
|
534
|
+
response = self._http_client.get(self._download_url, headers=headers)
|
|
539
535
|
|
|
540
536
|
if response.status_code == 206:
|
|
541
537
|
return response.content
|
|
@@ -543,38 +539,85 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
543
539
|
return b""
|
|
544
540
|
elif response.status_code == 403:
|
|
545
541
|
_logger.debug(
|
|
546
|
-
f"Presigned URL expired at byte {start}, refreshing...
|
|
542
|
+
f"Presigned URL expired at byte {start}, refreshing... "
|
|
543
|
+
f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
|
|
547
544
|
)
|
|
548
|
-
self.
|
|
545
|
+
self._download_url = self._refresh_download_url()
|
|
549
546
|
continue
|
|
550
547
|
else:
|
|
551
548
|
response.raise_for_status()
|
|
552
549
|
return response.content
|
|
553
550
|
|
|
554
551
|
raise ConnectionError(
|
|
555
|
-
f"Failed to read range {start}-{end} after {
|
|
552
|
+
f"Failed to read range {start}-{end} after {self.MAX_RETRIES} attempts "
|
|
553
|
+
f"(URL kept expiring)"
|
|
556
554
|
)
|
|
557
555
|
|
|
558
556
|
def _load_full(self):
|
|
559
|
-
"""
|
|
557
|
+
"""
|
|
558
|
+
Load the entire remote content into memory buffer via presigned URL.
|
|
559
|
+
|
|
560
|
+
Handles presigned URL expiration with automatic refresh and retry.
|
|
561
|
+
After loading, the buffer position is restored to self._position.
|
|
562
|
+
"""
|
|
560
563
|
if self._fully_loaded:
|
|
561
564
|
return
|
|
562
565
|
|
|
563
|
-
|
|
566
|
+
self._ensure_download_url()
|
|
567
|
+
self._ensure_http_client()
|
|
564
568
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
569
|
+
for attempt in range(self.MAX_RETRIES):
|
|
570
|
+
response = self._http_client.get(self._download_url)
|
|
571
|
+
if response.status_code == 200:
|
|
572
|
+
data = response.content
|
|
573
|
+
self._buffer = BytesIO(data)
|
|
574
|
+
self._fully_loaded = True
|
|
575
|
+
self._size = len(data)
|
|
576
|
+
self._buffer.seek(self._position, SEEK_SET)
|
|
577
|
+
return
|
|
578
|
+
elif response.status_code == 403:
|
|
579
|
+
_logger.debug(
|
|
580
|
+
f"Presigned URL expired during full load, refreshing... "
|
|
581
|
+
f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
|
|
582
|
+
)
|
|
583
|
+
self._download_url = self._refresh_download_url()
|
|
584
|
+
continue
|
|
585
|
+
else:
|
|
586
|
+
response.raise_for_status()
|
|
587
|
+
|
|
588
|
+
raise ConnectionError(
|
|
589
|
+
f"Failed to load full content after {self.MAX_RETRIES} attempts "
|
|
590
|
+
f"(URL kept expiring)"
|
|
571
591
|
)
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
592
|
+
|
|
593
|
+
# ==================== Remote write operations ====================
|
|
594
|
+
|
|
595
|
+
def _upload_data(self, data: bytes):
|
|
596
|
+
"""
|
|
597
|
+
Upload data to remote storage via presigned POST URL.
|
|
598
|
+
|
|
599
|
+
Uses the presigned URL and form fields from _get_upload_info().
|
|
600
|
+
This only touches S3/MinIO — no entity record upsert.
|
|
601
|
+
"""
|
|
602
|
+
upload_info = self._get_upload_info()
|
|
603
|
+
upload_url = upload_info["url"]
|
|
604
|
+
upload_fields = upload_info.get("fields", {})
|
|
605
|
+
|
|
606
|
+
self._ensure_http_client()
|
|
607
|
+
file_obj = BytesIO(data)
|
|
608
|
+
|
|
609
|
+
response = self._http_client.post(
|
|
610
|
+
upload_url,
|
|
611
|
+
data=upload_fields,
|
|
612
|
+
files={"file": file_obj},
|
|
613
|
+
)
|
|
614
|
+
if response.status_code not in (200, 204):
|
|
615
|
+
response.raise_for_status()
|
|
616
|
+
|
|
617
|
+
# ==================== FileIO Interface ====================
|
|
576
618
|
|
|
577
619
|
def read(self, size: int = -1) -> bytes:
|
|
620
|
+
# If fully loaded into buffer, read from buffer
|
|
578
621
|
if self._fully_loaded:
|
|
579
622
|
if not self._buffer:
|
|
580
623
|
self._load_full()
|
|
@@ -582,7 +625,6 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
582
625
|
|
|
583
626
|
# Otherwise, use HTTP Range requests for efficient streaming
|
|
584
627
|
if size == -1:
|
|
585
|
-
# Read all from current position
|
|
586
628
|
self._fetch_size()
|
|
587
629
|
if self._position >= self._size:
|
|
588
630
|
return b""
|
|
@@ -591,33 +633,28 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
591
633
|
if size <= 0:
|
|
592
634
|
return b""
|
|
593
635
|
|
|
594
|
-
# Fetch the size if we don't know it
|
|
595
636
|
self._fetch_size()
|
|
596
637
|
|
|
597
|
-
#
|
|
638
|
+
# Clamp to available bytes
|
|
598
639
|
if self._position + size > self._size:
|
|
599
640
|
size = self._size - self._position
|
|
600
641
|
|
|
601
642
|
if size <= 0:
|
|
602
643
|
return b""
|
|
603
644
|
|
|
604
|
-
# Read using HTTP Range request
|
|
605
645
|
end_byte = self._position + size - 1
|
|
606
646
|
data = self._read_range(self._position, end_byte)
|
|
607
647
|
self._position += len(data)
|
|
608
|
-
|
|
609
648
|
return data
|
|
610
649
|
|
|
611
650
|
def write(self, b: bytes) -> int:
|
|
612
651
|
if not self._buffer:
|
|
613
652
|
self._load_full()
|
|
614
|
-
|
|
615
653
|
n = self._buffer.write(b)
|
|
616
654
|
self._modified = True
|
|
617
655
|
return n
|
|
618
656
|
|
|
619
657
|
def seek(self, offset: int, whence: int = SEEK_SET) -> int:
|
|
620
|
-
# Calculate new position
|
|
621
658
|
if whence == SEEK_SET:
|
|
622
659
|
new_pos = offset
|
|
623
660
|
elif whence == SEEK_CUR:
|
|
@@ -634,11 +671,9 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
634
671
|
if new_pos < 0:
|
|
635
672
|
raise ValueError("Negative seek position")
|
|
636
673
|
|
|
637
|
-
# If fully loaded, use buffer seek
|
|
638
674
|
if self._buffer and self._fully_loaded:
|
|
639
675
|
return self._buffer.seek(new_pos, SEEK_SET)
|
|
640
676
|
|
|
641
|
-
# Otherwise, just update position (HTTP Range will handle it)
|
|
642
677
|
self._position = new_pos
|
|
643
678
|
return self._position
|
|
644
679
|
|
|
@@ -648,32 +683,30 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
648
683
|
return self._position
|
|
649
684
|
|
|
650
685
|
def flush(self):
|
|
651
|
-
"""Flush
|
|
686
|
+
"""Flush: upload buffer to remote storage via presigned URL, then invalidate cache."""
|
|
652
687
|
if self._buffer:
|
|
653
688
|
self._buffer.flush()
|
|
654
689
|
|
|
655
690
|
if not self._modified or not self._buffer:
|
|
656
691
|
return
|
|
657
692
|
|
|
658
|
-
|
|
659
|
-
raise ValueError("entity_type and filetype required to flush to workspace")
|
|
660
|
-
|
|
661
|
-
from octostar.utils.workspace import write_attachment
|
|
662
|
-
|
|
693
|
+
# Read all data from buffer
|
|
663
694
|
current_pos = self._buffer.tell()
|
|
664
695
|
self._buffer.seek(0, SEEK_SET)
|
|
665
696
|
data = self._buffer.read()
|
|
666
697
|
self._buffer.seek(current_pos, SEEK_SET)
|
|
667
698
|
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
filetype=self._filetype,
|
|
673
|
-
file=data,
|
|
674
|
-
client=self._client
|
|
675
|
-
)
|
|
699
|
+
# Upload directly to S3 via presigned URL
|
|
700
|
+
self._upload_data(data)
|
|
701
|
+
|
|
702
|
+
# Invalidate cache so next read re-fetches from remote
|
|
676
703
|
self._modified = False
|
|
704
|
+
self._buffer.close()
|
|
705
|
+
self._buffer = None
|
|
706
|
+
self._fully_loaded = False
|
|
707
|
+
self._position = 0
|
|
708
|
+
self._size = None
|
|
709
|
+
self._download_url = None
|
|
677
710
|
|
|
678
711
|
def close(self):
|
|
679
712
|
if not self._closed:
|
|
@@ -686,15 +719,6 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
686
719
|
self._http_client = None
|
|
687
720
|
super().close()
|
|
688
721
|
|
|
689
|
-
def delete(self):
|
|
690
|
-
"""Delete the entity from the workspace using delete_entity()."""
|
|
691
|
-
from octostar.utils.workspace import delete_entity
|
|
692
|
-
|
|
693
|
-
delete_entity.sync(
|
|
694
|
-
os_entity_uid=self._entity_id,
|
|
695
|
-
client=self._client
|
|
696
|
-
)
|
|
697
|
-
|
|
698
722
|
def truncate(self, size: Optional[int] = None) -> int:
|
|
699
723
|
if not self._buffer:
|
|
700
724
|
self._load_full()
|
|
@@ -705,6 +729,64 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
705
729
|
if not self._buffer or not self._fully_loaded:
|
|
706
730
|
self._load_full()
|
|
707
731
|
return self._buffer.getvalue()
|
|
732
|
+
|
|
733
|
+
|
|
734
|
+
class WorkspaceAttachmentContents(_PresignedUrlRemoteContents):
|
|
735
|
+
"""
|
|
736
|
+
Contents implementation for Octostar workspace attachments.
|
|
737
|
+
|
|
738
|
+
All I/O goes through presigned S3 URLs:
|
|
739
|
+
- Reads use get_attachment_url() for presigned download URLs
|
|
740
|
+
- Writes use request_attachment_url() for presigned upload URLs
|
|
741
|
+
- No entity upsert on flush — only raw bytes are written to S3
|
|
742
|
+
|
|
743
|
+
Presigned URL Handling:
|
|
744
|
+
- Download URLs are obtained via get_attachment_url() and cached
|
|
745
|
+
- On 403 (Forbidden) responses, URLs are refreshed and requests retried
|
|
746
|
+
"""
|
|
747
|
+
|
|
748
|
+
def __init__(
|
|
749
|
+
self,
|
|
750
|
+
entity_type: Optional[str] = None,
|
|
751
|
+
filetype: Optional[str] = None,
|
|
752
|
+
*,
|
|
753
|
+
workspace_id: str,
|
|
754
|
+
entity_id: str,
|
|
755
|
+
client,
|
|
756
|
+
**kwargs
|
|
757
|
+
):
|
|
758
|
+
super().__init__(entity_type, filetype, **kwargs)
|
|
759
|
+
self._workspace_id = workspace_id
|
|
760
|
+
self._entity_id = entity_id
|
|
761
|
+
self._client = client
|
|
762
|
+
|
|
763
|
+
def _refresh_download_url(self) -> str:
|
|
764
|
+
from octostar.utils.workspace import get_attachment_url
|
|
765
|
+
|
|
766
|
+
return get_attachment_url.sync(
|
|
767
|
+
os_workspace=self._workspace_id,
|
|
768
|
+
os_entity_uid=self._entity_id,
|
|
769
|
+
client=self._client
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
def _get_upload_info(self) -> Dict[str, Any]:
|
|
773
|
+
from octostar.utils.workspace import request_attachment_url
|
|
774
|
+
|
|
775
|
+
return request_attachment_url.sync(
|
|
776
|
+
os_workspace=self._workspace_id,
|
|
777
|
+
os_entity_uid=self._entity_id,
|
|
778
|
+
content_type=self._filetype,
|
|
779
|
+
client=self._client
|
|
780
|
+
)
|
|
781
|
+
|
|
782
|
+
def delete(self):
|
|
783
|
+
"""Delete the entity from the workspace using delete_entity()."""
|
|
784
|
+
from octostar.utils.workspace import delete_entity
|
|
785
|
+
|
|
786
|
+
delete_entity.sync(
|
|
787
|
+
os_entity_uid=self._entity_id,
|
|
788
|
+
client=self._client
|
|
789
|
+
)
|
|
708
790
|
|
|
709
791
|
def to_locator(self) -> Dict[str, Any]:
|
|
710
792
|
"""
|
|
@@ -712,7 +794,7 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
712
794
|
|
|
713
795
|
Returns:
|
|
714
796
|
{"location": "workspace_attachment", "pointer": "workspace_id/entity_id",
|
|
715
|
-
"entity_type": "...", "filetype": "..."}
|
|
797
|
+
"entity_type": "...", "filetype": "...", "item_name": "..."}
|
|
716
798
|
"""
|
|
717
799
|
if self._workspace_id and self._entity_id:
|
|
718
800
|
pointer = f"{self._workspace_id}/{self._entity_id}"
|
|
@@ -760,12 +842,13 @@ class WorkspaceAttachmentContents(Contents):
|
|
|
760
842
|
)
|
|
761
843
|
|
|
762
844
|
|
|
763
|
-
class TemporaryAttachmentContents(
|
|
845
|
+
class TemporaryAttachmentContents(_PresignedUrlRemoteContents):
|
|
764
846
|
"""
|
|
765
847
|
Contents implementation for Octostar temporary blob storage.
|
|
766
848
|
|
|
767
|
-
|
|
768
|
-
|
|
849
|
+
All I/O goes through presigned S3 URLs:
|
|
850
|
+
- Reads use get_temporary_blob_url() for presigned download URLs
|
|
851
|
+
- Writes use request_temporary_blob_url() for presigned upload URLs
|
|
769
852
|
|
|
770
853
|
Temporary blobs are keyed by filename (not workspace/entity), and are not
|
|
771
854
|
associated with any workspace entity. Use WorkspaceAttachmentContents for that.
|
|
@@ -778,90 +861,27 @@ class TemporaryAttachmentContents(Contents):
|
|
|
778
861
|
*,
|
|
779
862
|
filename: str,
|
|
780
863
|
client,
|
|
781
|
-
initial_data: Optional[bytes] = None,
|
|
782
864
|
**kwargs
|
|
783
865
|
):
|
|
784
866
|
super().__init__(entity_type, filetype, **kwargs)
|
|
785
867
|
self._filename = filename
|
|
786
868
|
self._client = client
|
|
787
|
-
|
|
788
|
-
self._buffer: Optional[BytesIO] = None
|
|
789
|
-
self._fully_loaded = False
|
|
790
|
-
self._modified = False
|
|
791
|
-
|
|
792
|
-
if initial_data is not None:
|
|
793
|
-
self._buffer = BytesIO(initial_data)
|
|
794
|
-
self._fully_loaded = True
|
|
795
869
|
|
|
796
|
-
def
|
|
797
|
-
|
|
798
|
-
if self._fully_loaded:
|
|
799
|
-
return
|
|
800
|
-
|
|
801
|
-
from octostar.utils.workspace import read_temporary_blob
|
|
870
|
+
def _refresh_download_url(self) -> str:
|
|
871
|
+
from octostar.utils.workspace import get_temporary_blob_url
|
|
802
872
|
|
|
803
|
-
|
|
873
|
+
return get_temporary_blob_url.sync(
|
|
804
874
|
filename=self._filename,
|
|
805
|
-
decode=False,
|
|
806
875
|
client=self._client
|
|
807
876
|
)
|
|
808
|
-
self._buffer = BytesIO(data or b"")
|
|
809
|
-
self._fully_loaded = True
|
|
810
|
-
|
|
811
|
-
def read(self, size: int = -1) -> bytes:
|
|
812
|
-
if not self._buffer:
|
|
813
|
-
self._load_full()
|
|
814
|
-
return self._buffer.read(size)
|
|
815
|
-
|
|
816
|
-
def write(self, b: bytes) -> int:
|
|
817
|
-
if not self._buffer:
|
|
818
|
-
self._load_full()
|
|
819
|
-
n = self._buffer.write(b)
|
|
820
|
-
self._modified = True
|
|
821
|
-
return n
|
|
822
877
|
|
|
823
|
-
def
|
|
824
|
-
|
|
825
|
-
self._load_full()
|
|
826
|
-
return self._buffer.seek(offset, whence)
|
|
827
|
-
|
|
828
|
-
def tell(self) -> int:
|
|
829
|
-
if not self._buffer:
|
|
830
|
-
self._load_full()
|
|
831
|
-
return self._buffer.tell()
|
|
832
|
-
|
|
833
|
-
def flush(self):
|
|
834
|
-
"""Flush the internal buffer and write to temp bucket if modified."""
|
|
835
|
-
if self._buffer:
|
|
836
|
-
self._buffer.flush()
|
|
837
|
-
|
|
838
|
-
if not self._modified or not self._buffer:
|
|
839
|
-
return
|
|
840
|
-
|
|
841
|
-
from octostar.utils.workspace import write_temporary_blob
|
|
842
|
-
|
|
843
|
-
current_pos = self._buffer.tell()
|
|
844
|
-
self._buffer.seek(0, SEEK_SET)
|
|
845
|
-
data = self._buffer.read()
|
|
846
|
-
self._buffer.seek(current_pos, SEEK_SET)
|
|
878
|
+
def _get_upload_info(self) -> Dict[str, Any]:
|
|
879
|
+
from octostar.utils.workspace import request_temporary_blob_url
|
|
847
880
|
|
|
848
|
-
|
|
881
|
+
return request_temporary_blob_url.sync(
|
|
849
882
|
filename=self._filename,
|
|
850
|
-
file=data,
|
|
851
883
|
client=self._client
|
|
852
884
|
)
|
|
853
|
-
self._modified = False
|
|
854
|
-
self._buffer.close()
|
|
855
|
-
self._buffer = None
|
|
856
|
-
self._fully_loaded = False
|
|
857
|
-
|
|
858
|
-
def close(self):
|
|
859
|
-
if not self._closed:
|
|
860
|
-
if self._modified:
|
|
861
|
-
self.flush()
|
|
862
|
-
if self._buffer:
|
|
863
|
-
self._buffer.close()
|
|
864
|
-
super().close()
|
|
865
885
|
|
|
866
886
|
def delete(self):
|
|
867
887
|
"""Delete the blob from the temporary bucket."""
|
|
@@ -872,17 +892,6 @@ class TemporaryAttachmentContents(Contents):
|
|
|
872
892
|
client=self._client
|
|
873
893
|
)
|
|
874
894
|
|
|
875
|
-
def truncate(self, size: Optional[int] = None) -> int:
|
|
876
|
-
if not self._buffer:
|
|
877
|
-
self._load_full()
|
|
878
|
-
self._modified = True
|
|
879
|
-
return self._buffer.truncate(size)
|
|
880
|
-
|
|
881
|
-
def getvalue(self) -> bytes:
|
|
882
|
-
if not self._buffer or not self._fully_loaded:
|
|
883
|
-
self._load_full()
|
|
884
|
-
return self._buffer.getvalue()
|
|
885
|
-
|
|
886
895
|
def to_locator(self) -> Dict[str, Any]:
|
|
887
896
|
"""
|
|
888
897
|
Serialize to locator with filename.
|
|
@@ -18,7 +18,7 @@ from octostar.utils.workspace import upsert_entities
|
|
|
18
18
|
from octostar.utils.ontology import fetch_ontology_data
|
|
19
19
|
from octostar.utils.workspace.permissions import get_permissions, PermissionLevel
|
|
20
20
|
from octostar.utils.pipeline import update_processing_status
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
from octostar.client import make_client
|
|
23
23
|
|
|
24
24
|
from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
|
|
@@ -464,26 +464,49 @@ class NifiContextManager(object):
|
|
|
464
464
|
if not file.contents:
|
|
465
465
|
continue
|
|
466
466
|
old_contents = file._contents
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
entity_id=file.record['os_entity_uid'],
|
|
467
|
+
record = file.record
|
|
468
|
+
ws = file.write_os_workspace
|
|
469
|
+
entity_uid = record["os_entity_uid"]
|
|
470
|
+
entity_type = record["os_concept"]
|
|
471
|
+
filetype = record.get("os_item_content_type")
|
|
472
|
+
item_name = record.get("os_item_name")
|
|
473
|
+
data = old_contents.getvalue()
|
|
474
|
+
target = WorkspaceAttachmentContents(
|
|
475
|
+
workspace_id=ws,
|
|
476
|
+
entity_id=entity_uid,
|
|
478
477
|
client=self.client,
|
|
479
|
-
entity_type=
|
|
480
|
-
filetype=
|
|
478
|
+
entity_type=entity_type,
|
|
479
|
+
filetype=filetype,
|
|
481
480
|
)
|
|
481
|
+
target.write(data)
|
|
482
|
+
target.flush()
|
|
483
|
+
file._contents = target
|
|
482
484
|
if isinstance(old_contents, TemporaryAttachmentContents):
|
|
483
485
|
try:
|
|
484
486
|
old_contents.delete()
|
|
485
487
|
except Exception:
|
|
486
488
|
pass
|
|
489
|
+
fields = {"os_has_attachment": True}
|
|
490
|
+
if filetype:
|
|
491
|
+
fields["os_item_content_type"] = filetype
|
|
492
|
+
if item_name:
|
|
493
|
+
fields["os_item_name"] = item_name
|
|
494
|
+
new_entities = upsert_entities.sync(
|
|
495
|
+
ws,
|
|
496
|
+
[{
|
|
497
|
+
"entity_type": entity_type,
|
|
498
|
+
"os_entity_uid": entity_uid,
|
|
499
|
+
"fields": fields,
|
|
500
|
+
}],
|
|
501
|
+
client=self.client,
|
|
502
|
+
)
|
|
503
|
+
new_entity = {e["os_entity_uid"]: e for e in new_entities}.get(entity_uid, {})
|
|
504
|
+
file.record = {**record, **new_entity}
|
|
505
|
+
file.record["entity_id"] = file.record["os_entity_uid"]
|
|
506
|
+
file.record["entity_type"] = file.record["os_concept"]
|
|
507
|
+
file.record["entity_label"] = file.label
|
|
508
|
+
file.request["entity_timestamp"] = file.record["os_last_updated_at"]
|
|
509
|
+
file.request["is_temporary"] = False
|
|
487
510
|
|
|
488
511
|
def _sync_upsert_entities(self, entities_to_upsert):
|
|
489
512
|
if not entities_to_upsert:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|