streamlit-octostar-utils 0.4.2.dev8__tar.gz → 0.4.2.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/contents.py +199 -190
  4. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/nifi.py +37 -14
  5. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/LICENSE +0 -0
  6. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/README.md +0 -0
  7. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/__init__.py +0 -0
  8. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  9. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  10. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  11. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  12. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  13. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  14. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  15. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  16. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  17. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  18. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  19. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  20. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  21. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  22. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/__init__.py +0 -0
  23. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/dict.py +0 -0
  24. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/filetypes.py +0 -0
  25. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  26. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  27. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/timestamp.py +0 -0
  28. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  29. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  30. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/language.py +0 -0
  31. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/ner.py +0 -0
  32. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  33. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/client.py +0 -0
  34. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/context.py +0 -0
  35. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  36. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  37. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
  38. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  39. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/validation.py +0 -0
  40. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/style/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/style/common.py +0 -0
  42. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/__init__.py +0 -0
  43. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  44. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  45. {streamlit_octostar_utils-0.4.2.dev8 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.4.2.dev8
3
+ Version: 0.4.2.dev10
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.4.2-dev.8"
8
+ version = "0.4.2-dev.10"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -15,13 +15,12 @@ Storage Backends:
15
15
  """
16
16
 
17
17
  from abc import ABC, abstractmethod
18
- from typing import Optional, Dict, Any, Union, BinaryIO, List
18
+ from typing import Optional, Dict, Any, List
19
19
  from enum import Enum
20
20
  import base64
21
21
  import httpx
22
22
  import logging
23
23
  from io import BytesIO, SEEK_SET, SEEK_CUR, SEEK_END
24
- from copy import deepcopy
25
24
 
26
25
  _logger = logging.getLogger(__name__)
27
26
 
@@ -415,107 +414,107 @@ class MemoryContents(Contents):
415
414
  )
416
415
 
417
416
 
418
- class WorkspaceAttachmentContents(Contents):
417
+ class _PresignedUrlRemoteContents(Contents):
419
418
  """
420
- Contents implementation for Octostar workspace attachments.
421
-
422
- Uses octostar-api utilities (read_attachment, get_attachment_url) to:
423
- - Lazy load from workspace storage via read_attachment()
424
- - Stream efficiently with HTTP Range requests (no need to download entire file)
425
- - Support true seeking with configurable chunk sizes
426
- - Buffer modifications in memory (write back handled at NiFi entity level)
427
-
428
- Presigned URL Handling:
429
- - URLs are obtained via get_attachment_url() and cached
430
- - On 403 (Forbidden) responses, URLs are refreshed and requests retried
431
- - Follows the same retry pattern as octostar-api read_file utilities
419
+ Abstract base for remote storage backends accessed via presigned URLs.
420
+
421
+ Provides all FileIO operations using presigned URLs:
422
+ - Reads via HTTP GET with Range headers (no need to download entire file)
423
+ - Writes via presigned POST (no entity upsert, only raw bytes to S3)
424
+ - Automatic presigned URL refresh on 403 (expired)
425
+ - Lazy full-load into memory buffer only when writes require it
426
+ - Cache invalidation on flush (next read re-fetches from remote)
427
+
428
+ Subclasses only need to implement:
429
+ - _refresh_download_url(): get a fresh presigned download URL
430
+ - _get_upload_info(): get presigned upload URL + form fields
431
+ - delete(): delete from the storage backend
432
+ - to_locator() / _from_locator(): serialization
432
433
  """
433
434
 
434
- DEFAULT_CHUNK_SIZE = 8192
435
435
  DEFAULT_URL_TIMEOUT = 120
436
+ MAX_RETRIES = 3
436
437
 
437
438
  def __init__(
438
439
  self,
439
440
  entity_type: Optional[str] = None,
440
441
  filetype: Optional[str] = None,
441
- *,
442
- workspace_id: str,
443
- entity_id: str,
444
- client,
445
- initial_data: Optional[bytes] = None,
446
- chunk_size: int = DEFAULT_CHUNK_SIZE,
447
442
  **kwargs
448
443
  ):
449
444
  super().__init__(entity_type, filetype, **kwargs)
450
- self._workspace_id = workspace_id
451
- self._entity_id = entity_id
452
- self._client = client
453
- self._chunk_size = chunk_size
454
-
455
445
  self._buffer: Optional[BytesIO] = None
456
446
  self._fully_loaded = False
457
447
  self._modified = False
458
448
  self._position = 0
459
449
  self._size: Optional[int] = None
460
- self._presigned_url: Optional[str] = None
450
+ self._download_url: Optional[str] = None
461
451
  self._http_client: Optional[httpx.Client] = None
462
-
463
- if initial_data is not None:
464
- self._buffer = BytesIO(initial_data)
465
- self._fully_loaded = True
466
- self._size = len(initial_data)
467
452
 
468
- def _get_presigned_url(self) -> str:
469
- """Get or refresh the presigned URL for the attachment."""
470
- from octostar.utils.workspace import get_attachment_url
453
+ # ==================== Abstract URL methods ====================
454
+
455
+ @abstractmethod
456
+ def _refresh_download_url(self) -> str:
457
+ """Fetch a fresh presigned download URL from the API."""
458
+ pass
459
+
460
+ @abstractmethod
461
+ def _get_upload_info(self) -> Dict[str, Any]:
462
+ """
463
+ Get presigned upload URL and form fields from the API.
471
464
 
472
- return get_attachment_url.sync(
473
- os_workspace=self._workspace_id,
474
- os_entity_uid=self._entity_id,
475
- client=self._client
476
- )
465
+ Returns:
466
+ {"url": "<presigned upload URL>", "fields": {<form fields>}}
467
+ """
468
+ pass
469
+
470
+ # ==================== HTTP infrastructure ====================
477
471
 
478
472
  def _ensure_http_client(self):
479
- """Ensure HTTP client is initialized."""
473
+ """Ensure the shared httpx.Client is initialized."""
480
474
  if not self._http_client:
481
475
  self._http_client = httpx.Client(timeout=self.DEFAULT_URL_TIMEOUT)
482
476
 
477
+ def _ensure_download_url(self):
478
+ """Ensure a download URL is cached, fetching one if needed."""
479
+ if not self._download_url:
480
+ self._download_url = self._refresh_download_url()
481
+
482
+ # ==================== Remote read operations ====================
483
+
483
484
  def _fetch_size(self) -> int:
484
485
  """
485
- Fetch the size of the remote attachment using HEAD request.
486
+ Fetch the size of the remote content using a HEAD request.
486
487
 
487
488
  Handles presigned URL expiration with automatic refresh and retry.
488
489
  """
489
490
  if self._size is not None:
490
491
  return self._size
491
492
 
492
- if not self._presigned_url:
493
- self._presigned_url = self._get_presigned_url()
494
-
493
+ self._ensure_download_url()
495
494
  self._ensure_http_client()
496
- max_retries = 3
497
495
 
498
- for attempt in range(max_retries):
499
- response = self._http_client.head(self._presigned_url)
496
+ for attempt in range(self.MAX_RETRIES):
497
+ response = self._http_client.head(self._download_url)
500
498
  if response.status_code == 200:
501
499
  self._size = int(response.headers.get('content-length', 0))
502
500
  return self._size
503
501
  elif response.status_code == 403:
504
502
  _logger.debug(
505
- f"Presigned URL expired while fetching size, refreshing... (attempt {attempt + 1}/{max_retries})"
503
+ f"Presigned URL expired while fetching size, refreshing... "
504
+ f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
506
505
  )
507
- self._presigned_url = self._get_presigned_url()
506
+ self._download_url = self._refresh_download_url()
508
507
  continue
509
508
  else:
510
509
  response.raise_for_status()
511
510
 
512
511
  raise ConnectionError(
513
- f"Failed to fetch file size after {max_retries} attempts (URL kept expiring)"
512
+ f"Failed to fetch file size after {self.MAX_RETRIES} attempts (URL kept expiring)"
514
513
  )
515
514
 
516
515
  def _read_range(self, start: int, end: int) -> bytes:
517
516
  """
518
- Read a specific byte range using HTTP Range request.
517
+ Read a specific byte range using an HTTP Range request.
519
518
 
520
519
  Handles presigned URL expiration with automatic refresh and retry.
521
520
 
@@ -526,16 +525,13 @@ class WorkspaceAttachmentContents(Contents):
526
525
  Returns:
527
526
  Bytes from the specified range
528
527
  """
529
- if not self._presigned_url:
530
- self._presigned_url = self._get_presigned_url()
531
-
528
+ self._ensure_download_url()
532
529
  self._ensure_http_client()
533
530
 
534
531
  headers = {"Range": f"bytes={start}-{end}"}
535
- max_retries = 3
536
532
 
537
- for attempt in range(max_retries):
538
- response = self._http_client.get(self._presigned_url, headers=headers)
533
+ for attempt in range(self.MAX_RETRIES):
534
+ response = self._http_client.get(self._download_url, headers=headers)
539
535
 
540
536
  if response.status_code == 206:
541
537
  return response.content
@@ -543,38 +539,85 @@ class WorkspaceAttachmentContents(Contents):
543
539
  return b""
544
540
  elif response.status_code == 403:
545
541
  _logger.debug(
546
- f"Presigned URL expired at byte {start}, refreshing... (attempt {attempt + 1}/{max_retries})"
542
+ f"Presigned URL expired at byte {start}, refreshing... "
543
+ f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
547
544
  )
548
- self._presigned_url = self._get_presigned_url()
545
+ self._download_url = self._refresh_download_url()
549
546
  continue
550
547
  else:
551
548
  response.raise_for_status()
552
549
  return response.content
553
550
 
554
551
  raise ConnectionError(
555
- f"Failed to read range {start}-{end} after {max_retries} attempts (URL kept expiring)"
552
+ f"Failed to read range {start}-{end} after {self.MAX_RETRIES} attempts "
553
+ f"(URL kept expiring)"
556
554
  )
557
555
 
558
556
  def _load_full(self):
559
- """Load the entire attachment into memory buffer using read_attachment()."""
557
+ """
558
+ Load the entire remote content into memory buffer via presigned URL.
559
+
560
+ Handles presigned URL expiration with automatic refresh and retry.
561
+ After loading, the buffer position is restored to self._position.
562
+ """
560
563
  if self._fully_loaded:
561
564
  return
562
565
 
563
- from octostar.utils.workspace import read_attachment
566
+ self._ensure_download_url()
567
+ self._ensure_http_client()
564
568
 
565
- data = read_attachment.sync(
566
- os_workspace=self._workspace_id,
567
- os_entity_uid=self._entity_id,
568
- decode=False,
569
- stream=False,
570
- client=self._client
569
+ for attempt in range(self.MAX_RETRIES):
570
+ response = self._http_client.get(self._download_url)
571
+ if response.status_code == 200:
572
+ data = response.content
573
+ self._buffer = BytesIO(data)
574
+ self._fully_loaded = True
575
+ self._size = len(data)
576
+ self._buffer.seek(self._position, SEEK_SET)
577
+ return
578
+ elif response.status_code == 403:
579
+ _logger.debug(
580
+ f"Presigned URL expired during full load, refreshing... "
581
+ f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
582
+ )
583
+ self._download_url = self._refresh_download_url()
584
+ continue
585
+ else:
586
+ response.raise_for_status()
587
+
588
+ raise ConnectionError(
589
+ f"Failed to load full content after {self.MAX_RETRIES} attempts "
590
+ f"(URL kept expiring)"
571
591
  )
572
- self._buffer = BytesIO(data or b"")
573
- self._fully_loaded = True
574
- self._size = len(data) if data else 0
575
- self._position = 0
592
+
593
+ # ==================== Remote write operations ====================
594
+
595
+ def _upload_data(self, data: bytes):
596
+ """
597
+ Upload data to remote storage via presigned POST URL.
598
+
599
+ Uses the presigned URL and form fields from _get_upload_info().
600
+ This only touches S3/MinIO — no entity record upsert.
601
+ """
602
+ upload_info = self._get_upload_info()
603
+ upload_url = upload_info["url"]
604
+ upload_fields = upload_info.get("fields", {})
605
+
606
+ self._ensure_http_client()
607
+ file_obj = BytesIO(data)
608
+
609
+ response = self._http_client.post(
610
+ upload_url,
611
+ data=upload_fields,
612
+ files={"file": file_obj},
613
+ )
614
+ if response.status_code not in (200, 204):
615
+ response.raise_for_status()
616
+
617
+ # ==================== FileIO Interface ====================
576
618
 
577
619
  def read(self, size: int = -1) -> bytes:
620
+ # If fully loaded into buffer, read from buffer
578
621
  if self._fully_loaded:
579
622
  if not self._buffer:
580
623
  self._load_full()
@@ -582,7 +625,6 @@ class WorkspaceAttachmentContents(Contents):
582
625
 
583
626
  # Otherwise, use HTTP Range requests for efficient streaming
584
627
  if size == -1:
585
- # Read all from current position
586
628
  self._fetch_size()
587
629
  if self._position >= self._size:
588
630
  return b""
@@ -591,33 +633,28 @@ class WorkspaceAttachmentContents(Contents):
591
633
  if size <= 0:
592
634
  return b""
593
635
 
594
- # Fetch the size if we don't know it
595
636
  self._fetch_size()
596
637
 
597
- # Adjust size if it goes beyond the end
638
+ # Clamp to available bytes
598
639
  if self._position + size > self._size:
599
640
  size = self._size - self._position
600
641
 
601
642
  if size <= 0:
602
643
  return b""
603
644
 
604
- # Read using HTTP Range request
605
645
  end_byte = self._position + size - 1
606
646
  data = self._read_range(self._position, end_byte)
607
647
  self._position += len(data)
608
-
609
648
  return data
610
649
 
611
650
  def write(self, b: bytes) -> int:
612
651
  if not self._buffer:
613
652
  self._load_full()
614
-
615
653
  n = self._buffer.write(b)
616
654
  self._modified = True
617
655
  return n
618
656
 
619
657
  def seek(self, offset: int, whence: int = SEEK_SET) -> int:
620
- # Calculate new position
621
658
  if whence == SEEK_SET:
622
659
  new_pos = offset
623
660
  elif whence == SEEK_CUR:
@@ -634,11 +671,9 @@ class WorkspaceAttachmentContents(Contents):
634
671
  if new_pos < 0:
635
672
  raise ValueError("Negative seek position")
636
673
 
637
- # If fully loaded, use buffer seek
638
674
  if self._buffer and self._fully_loaded:
639
675
  return self._buffer.seek(new_pos, SEEK_SET)
640
676
 
641
- # Otherwise, just update position (HTTP Range will handle it)
642
677
  self._position = new_pos
643
678
  return self._position
644
679
 
@@ -648,32 +683,30 @@ class WorkspaceAttachmentContents(Contents):
648
683
  return self._position
649
684
 
650
685
  def flush(self):
651
- """Flush the internal buffer and write to workspace if modified."""
686
+ """Flush: upload buffer to remote storage via presigned URL, then invalidate cache."""
652
687
  if self._buffer:
653
688
  self._buffer.flush()
654
689
 
655
690
  if not self._modified or not self._buffer:
656
691
  return
657
692
 
658
- if not self._entity_type or not self._filetype:
659
- raise ValueError("entity_type and filetype required to flush to workspace")
660
-
661
- from octostar.utils.workspace import write_attachment
662
-
693
+ # Read all data from buffer
663
694
  current_pos = self._buffer.tell()
664
695
  self._buffer.seek(0, SEEK_SET)
665
696
  data = self._buffer.read()
666
697
  self._buffer.seek(current_pos, SEEK_SET)
667
698
 
668
- write_attachment.sync(
669
- os_workspace=self._workspace_id,
670
- os_entity_uid=self._entity_id,
671
- entity_type=self._entity_type,
672
- filetype=self._filetype,
673
- file=data,
674
- client=self._client
675
- )
699
+ # Upload directly to S3 via presigned URL
700
+ self._upload_data(data)
701
+
702
+ # Invalidate cache so next read re-fetches from remote
676
703
  self._modified = False
704
+ self._buffer.close()
705
+ self._buffer = None
706
+ self._fully_loaded = False
707
+ self._position = 0
708
+ self._size = None
709
+ self._download_url = None
677
710
 
678
711
  def close(self):
679
712
  if not self._closed:
@@ -686,15 +719,6 @@ class WorkspaceAttachmentContents(Contents):
686
719
  self._http_client = None
687
720
  super().close()
688
721
 
689
- def delete(self):
690
- """Delete the entity from the workspace using delete_entity()."""
691
- from octostar.utils.workspace import delete_entity
692
-
693
- delete_entity.sync(
694
- os_entity_uid=self._entity_id,
695
- client=self._client
696
- )
697
-
698
722
  def truncate(self, size: Optional[int] = None) -> int:
699
723
  if not self._buffer:
700
724
  self._load_full()
@@ -705,6 +729,64 @@ class WorkspaceAttachmentContents(Contents):
705
729
  if not self._buffer or not self._fully_loaded:
706
730
  self._load_full()
707
731
  return self._buffer.getvalue()
732
+
733
+
734
+ class WorkspaceAttachmentContents(_PresignedUrlRemoteContents):
735
+ """
736
+ Contents implementation for Octostar workspace attachments.
737
+
738
+ All I/O goes through presigned S3 URLs:
739
+ - Reads use get_attachment_url() for presigned download URLs
740
+ - Writes use request_attachment_url() for presigned upload URLs
741
+ - No entity upsert on flush — only raw bytes are written to S3
742
+
743
+ Presigned URL Handling:
744
+ - Download URLs are obtained via get_attachment_url() and cached
745
+ - On 403 (Forbidden) responses, URLs are refreshed and requests retried
746
+ """
747
+
748
+ def __init__(
749
+ self,
750
+ entity_type: Optional[str] = None,
751
+ filetype: Optional[str] = None,
752
+ *,
753
+ workspace_id: str,
754
+ entity_id: str,
755
+ client,
756
+ **kwargs
757
+ ):
758
+ super().__init__(entity_type, filetype, **kwargs)
759
+ self._workspace_id = workspace_id
760
+ self._entity_id = entity_id
761
+ self._client = client
762
+
763
+ def _refresh_download_url(self) -> str:
764
+ from octostar.utils.workspace import get_attachment_url
765
+
766
+ return get_attachment_url.sync(
767
+ os_workspace=self._workspace_id,
768
+ os_entity_uid=self._entity_id,
769
+ client=self._client
770
+ )
771
+
772
+ def _get_upload_info(self) -> Dict[str, Any]:
773
+ from octostar.utils.workspace import request_attachment_url
774
+
775
+ return request_attachment_url.sync(
776
+ os_workspace=self._workspace_id,
777
+ os_entity_uid=self._entity_id,
778
+ content_type=self._filetype,
779
+ client=self._client
780
+ )
781
+
782
+ def delete(self):
783
+ """Delete the entity from the workspace using delete_entity()."""
784
+ from octostar.utils.workspace import delete_entity
785
+
786
+ delete_entity.sync(
787
+ os_entity_uid=self._entity_id,
788
+ client=self._client
789
+ )
708
790
 
709
791
  def to_locator(self) -> Dict[str, Any]:
710
792
  """
@@ -712,7 +794,7 @@ class WorkspaceAttachmentContents(Contents):
712
794
 
713
795
  Returns:
714
796
  {"location": "workspace_attachment", "pointer": "workspace_id/entity_id",
715
- "entity_type": "...", "filetype": "..."}
797
+ "entity_type": "...", "filetype": "...", "item_name": "..."}
716
798
  """
717
799
  if self._workspace_id and self._entity_id:
718
800
  pointer = f"{self._workspace_id}/{self._entity_id}"
@@ -760,12 +842,13 @@ class WorkspaceAttachmentContents(Contents):
760
842
  )
761
843
 
762
844
 
763
- class TemporaryAttachmentContents(Contents):
845
+ class TemporaryAttachmentContents(_PresignedUrlRemoteContents):
764
846
  """
765
847
  Contents implementation for Octostar temporary blob storage.
766
848
 
767
- Uses octostar-api utilities (read_temporary_blob, write_temporary_blob,
768
- delete_temporary_blob) to store files in the user's temporary S3 bucket.
849
+ All I/O goes through presigned S3 URLs:
850
+ - Reads use get_temporary_blob_url() for presigned download URLs
851
+ - Writes use request_temporary_blob_url() for presigned upload URLs
769
852
 
770
853
  Temporary blobs are keyed by filename (not workspace/entity), and are not
771
854
  associated with any workspace entity. Use WorkspaceAttachmentContents for that.
@@ -778,90 +861,27 @@ class TemporaryAttachmentContents(Contents):
778
861
  *,
779
862
  filename: str,
780
863
  client,
781
- initial_data: Optional[bytes] = None,
782
864
  **kwargs
783
865
  ):
784
866
  super().__init__(entity_type, filetype, **kwargs)
785
867
  self._filename = filename
786
868
  self._client = client
787
-
788
- self._buffer: Optional[BytesIO] = None
789
- self._fully_loaded = False
790
- self._modified = False
791
-
792
- if initial_data is not None:
793
- self._buffer = BytesIO(initial_data)
794
- self._fully_loaded = True
795
869
 
796
- def _load_full(self):
797
- """Load the entire blob into memory using read_temporary_blob()."""
798
- if self._fully_loaded:
799
- return
800
-
801
- from octostar.utils.workspace import read_temporary_blob
870
+ def _refresh_download_url(self) -> str:
871
+ from octostar.utils.workspace import get_temporary_blob_url
802
872
 
803
- data = read_temporary_blob.sync(
873
+ return get_temporary_blob_url.sync(
804
874
  filename=self._filename,
805
- decode=False,
806
875
  client=self._client
807
876
  )
808
- self._buffer = BytesIO(data or b"")
809
- self._fully_loaded = True
810
-
811
- def read(self, size: int = -1) -> bytes:
812
- if not self._buffer:
813
- self._load_full()
814
- return self._buffer.read(size)
815
-
816
- def write(self, b: bytes) -> int:
817
- if not self._buffer:
818
- self._load_full()
819
- n = self._buffer.write(b)
820
- self._modified = True
821
- return n
822
877
 
823
- def seek(self, offset: int, whence: int = SEEK_SET) -> int:
824
- if not self._buffer:
825
- self._load_full()
826
- return self._buffer.seek(offset, whence)
827
-
828
- def tell(self) -> int:
829
- if not self._buffer:
830
- self._load_full()
831
- return self._buffer.tell()
832
-
833
- def flush(self):
834
- """Flush the internal buffer and write to temp bucket if modified."""
835
- if self._buffer:
836
- self._buffer.flush()
837
-
838
- if not self._modified or not self._buffer:
839
- return
840
-
841
- from octostar.utils.workspace import write_temporary_blob
842
-
843
- current_pos = self._buffer.tell()
844
- self._buffer.seek(0, SEEK_SET)
845
- data = self._buffer.read()
846
- self._buffer.seek(current_pos, SEEK_SET)
878
+ def _get_upload_info(self) -> Dict[str, Any]:
879
+ from octostar.utils.workspace import request_temporary_blob_url
847
880
 
848
- write_temporary_blob.sync(
881
+ return request_temporary_blob_url.sync(
849
882
  filename=self._filename,
850
- file=data,
851
883
  client=self._client
852
884
  )
853
- self._modified = False
854
- self._buffer.close()
855
- self._buffer = None
856
- self._fully_loaded = False
857
-
858
- def close(self):
859
- if not self._closed:
860
- if self._modified:
861
- self.flush()
862
- if self._buffer:
863
- self._buffer.close()
864
- super().close()
865
885
 
866
886
  def delete(self):
867
887
  """Delete the blob from the temporary bucket."""
@@ -872,17 +892,6 @@ class TemporaryAttachmentContents(Contents):
872
892
  client=self._client
873
893
  )
874
894
 
875
- def truncate(self, size: Optional[int] = None) -> int:
876
- if not self._buffer:
877
- self._load_full()
878
- self._modified = True
879
- return self._buffer.truncate(size)
880
-
881
- def getvalue(self) -> bytes:
882
- if not self._buffer or not self._fully_loaded:
883
- self._load_full()
884
- return self._buffer.getvalue()
885
-
886
895
  def to_locator(self) -> Dict[str, Any]:
887
896
  """
888
897
  Serialize to locator with filename.
@@ -18,7 +18,7 @@ from octostar.utils.workspace import upsert_entities
18
18
  from octostar.utils.ontology import fetch_ontology_data
19
19
  from octostar.utils.workspace.permissions import get_permissions, PermissionLevel
20
20
  from octostar.utils.pipeline import update_processing_status
21
- from octostar.utils.workspace import write_attachment
21
+
22
22
  from octostar.client import make_client
23
23
 
24
24
  from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
@@ -464,26 +464,49 @@ class NifiContextManager(object):
464
464
  if not file.contents:
465
465
  continue
466
466
  old_contents = file._contents
467
- write_attachment.sync(
468
- os_workspace=file.write_os_workspace,
469
- os_entity_uid=file.record["os_entity_uid"],
470
- entity_type=file.record["os_concept"],
471
- filetype=file.record["os_item_content_type"],
472
- file=file.contents,
473
- client=self.client,
474
- )
475
- file._contents = WorkspaceAttachmentContents(
476
- workspace_id=file.record['os_workspace'],
477
- entity_id=file.record['os_entity_uid'],
467
+ record = file.record
468
+ ws = file.write_os_workspace
469
+ entity_uid = record["os_entity_uid"]
470
+ entity_type = record["os_concept"]
471
+ filetype = record.get("os_item_content_type")
472
+ item_name = record.get("os_item_name")
473
+ data = old_contents.getvalue()
474
+ target = WorkspaceAttachmentContents(
475
+ workspace_id=ws,
476
+ entity_id=entity_uid,
478
477
  client=self.client,
479
- entity_type=file.record["os_concept"],
480
- filetype=file.record["os_item_content_type"]
478
+ entity_type=entity_type,
479
+ filetype=filetype,
481
480
  )
481
+ target.write(data)
482
+ target.flush()
483
+ file._contents = target
482
484
  if isinstance(old_contents, TemporaryAttachmentContents):
483
485
  try:
484
486
  old_contents.delete()
485
487
  except Exception:
486
488
  pass
489
+ fields = {"os_has_attachment": True}
490
+ if filetype:
491
+ fields["os_item_content_type"] = filetype
492
+ if item_name:
493
+ fields["os_item_name"] = item_name
494
+ new_entities = upsert_entities.sync(
495
+ ws,
496
+ [{
497
+ "entity_type": entity_type,
498
+ "os_entity_uid": entity_uid,
499
+ "fields": fields,
500
+ }],
501
+ client=self.client,
502
+ )
503
+ new_entity = {e["os_entity_uid"]: e for e in new_entities}.get(entity_uid, {})
504
+ file.record = {**record, **new_entity}
505
+ file.record["entity_id"] = file.record["os_entity_uid"]
506
+ file.record["entity_type"] = file.record["os_concept"]
507
+ file.record["entity_label"] = file.label
508
+ file.request["entity_timestamp"] = file.record["os_last_updated_at"]
509
+ file.request["is_temporary"] = False
487
510
 
488
511
  def _sync_upsert_entities(self, entities_to_upsert):
489
512
  if not entities_to_upsert: