streamlit-octostar-utils 0.4.2.dev9__tar.gz → 0.4.2.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/contents.py +195 -191
  4. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/nifi.py +37 -14
  5. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/LICENSE +0 -0
  6. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/README.md +0 -0
  7. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/__init__.py +0 -0
  8. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  9. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  10. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  11. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  12. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  13. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  14. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  15. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  16. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  17. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  18. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  19. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  20. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  21. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  22. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/__init__.py +0 -0
  23. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/dict.py +0 -0
  24. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/filetypes.py +0 -0
  25. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  26. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  27. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/core/timestamp.py +0 -0
  28. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  29. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  30. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/language.py +0 -0
  31. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/nlp/ner.py +0 -0
  32. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  33. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/client.py +0 -0
  34. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/context.py +0 -0
  35. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  36. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  37. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
  38. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  39. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/ontology/validation.py +0 -0
  40. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/style/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/style/common.py +0 -0
  42. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/__init__.py +0 -0
  43. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  44. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  45. {streamlit_octostar_utils-0.4.2.dev9 → streamlit_octostar_utils-0.4.2.dev10}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.4.2.dev9
3
+ Version: 0.4.2.dev10
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.4.2-dev.9"
8
+ version = "0.4.2-dev.10"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -15,13 +15,12 @@ Storage Backends:
15
15
  """
16
16
 
17
17
  from abc import ABC, abstractmethod
18
- from typing import Optional, Dict, Any, Union, BinaryIO, List
18
+ from typing import Optional, Dict, Any, List
19
19
  from enum import Enum
20
20
  import base64
21
21
  import httpx
22
22
  import logging
23
23
  from io import BytesIO, SEEK_SET, SEEK_CUR, SEEK_END
24
- from copy import deepcopy
25
24
 
26
25
  _logger = logging.getLogger(__name__)
27
26
 
@@ -415,107 +414,107 @@ class MemoryContents(Contents):
415
414
  )
416
415
 
417
416
 
418
- class WorkspaceAttachmentContents(Contents):
417
+ class _PresignedUrlRemoteContents(Contents):
419
418
  """
420
- Contents implementation for Octostar workspace attachments.
421
-
422
- Uses octostar-api utilities (read_attachment, get_attachment_url) to:
423
- - Lazy load from workspace storage via read_attachment()
424
- - Stream efficiently with HTTP Range requests (no need to download entire file)
425
- - Support true seeking with configurable chunk sizes
426
- - Buffer modifications in memory (write back handled at NiFi entity level)
427
-
428
- Presigned URL Handling:
429
- - URLs are obtained via get_attachment_url() and cached
430
- - On 403 (Forbidden) responses, URLs are refreshed and requests retried
431
- - Follows the same retry pattern as octostar-api read_file utilities
419
+ Abstract base for remote storage backends accessed via presigned URLs.
420
+
421
+ Provides all FileIO operations using presigned URLs:
422
+ - Reads via HTTP GET with Range headers (no need to download entire file)
423
+ - Writes via presigned POST (no entity upsert, only raw bytes to S3)
424
+ - Automatic presigned URL refresh on 403 (expired)
425
+ - Lazy full-load into memory buffer only when writes require it
426
+ - Cache invalidation on flush (next read re-fetches from remote)
427
+
428
+ Subclasses only need to implement:
429
+ - _refresh_download_url(): get a fresh presigned download URL
430
+ - _get_upload_info(): get presigned upload URL + form fields
431
+ - delete(): delete from the storage backend
432
+ - to_locator() / _from_locator(): serialization
432
433
  """
433
434
 
434
- DEFAULT_CHUNK_SIZE = 8192
435
435
  DEFAULT_URL_TIMEOUT = 120
436
+ MAX_RETRIES = 3
436
437
 
437
438
  def __init__(
438
439
  self,
439
440
  entity_type: Optional[str] = None,
440
441
  filetype: Optional[str] = None,
441
- *,
442
- workspace_id: str,
443
- entity_id: str,
444
- client,
445
- initial_data: Optional[bytes] = None,
446
- chunk_size: int = DEFAULT_CHUNK_SIZE,
447
442
  **kwargs
448
443
  ):
449
444
  super().__init__(entity_type, filetype, **kwargs)
450
- self._workspace_id = workspace_id
451
- self._entity_id = entity_id
452
- self._client = client
453
- self._chunk_size = chunk_size
454
-
455
445
  self._buffer: Optional[BytesIO] = None
456
446
  self._fully_loaded = False
457
447
  self._modified = False
458
448
  self._position = 0
459
449
  self._size: Optional[int] = None
460
- self._presigned_url: Optional[str] = None
450
+ self._download_url: Optional[str] = None
461
451
  self._http_client: Optional[httpx.Client] = None
462
-
463
- if initial_data is not None:
464
- self._buffer = BytesIO(initial_data)
465
- self._fully_loaded = True
466
- self._size = len(initial_data)
467
452
 
468
- def _get_presigned_url(self) -> str:
469
- """Get or refresh the presigned URL for the attachment."""
470
- from octostar.utils.workspace import get_attachment_url
453
+ # ==================== Abstract URL methods ====================
454
+
455
+ @abstractmethod
456
+ def _refresh_download_url(self) -> str:
457
+ """Fetch a fresh presigned download URL from the API."""
458
+ pass
459
+
460
+ @abstractmethod
461
+ def _get_upload_info(self) -> Dict[str, Any]:
462
+ """
463
+ Get presigned upload URL and form fields from the API.
471
464
 
472
- return get_attachment_url.sync(
473
- os_workspace=self._workspace_id,
474
- os_entity_uid=self._entity_id,
475
- client=self._client
476
- )
465
+ Returns:
466
+ {"url": "<presigned upload URL>", "fields": {<form fields>}}
467
+ """
468
+ pass
469
+
470
+ # ==================== HTTP infrastructure ====================
477
471
 
478
472
  def _ensure_http_client(self):
479
- """Ensure HTTP client is initialized."""
473
+ """Ensure the shared httpx.Client is initialized."""
480
474
  if not self._http_client:
481
475
  self._http_client = httpx.Client(timeout=self.DEFAULT_URL_TIMEOUT)
482
476
 
477
+ def _ensure_download_url(self):
478
+ """Ensure a download URL is cached, fetching one if needed."""
479
+ if not self._download_url:
480
+ self._download_url = self._refresh_download_url()
481
+
482
+ # ==================== Remote read operations ====================
483
+
483
484
  def _fetch_size(self) -> int:
484
485
  """
485
- Fetch the size of the remote attachment using HEAD request.
486
+ Fetch the size of the remote content using a HEAD request.
486
487
 
487
488
  Handles presigned URL expiration with automatic refresh and retry.
488
489
  """
489
490
  if self._size is not None:
490
491
  return self._size
491
492
 
492
- if not self._presigned_url:
493
- self._presigned_url = self._get_presigned_url()
494
-
493
+ self._ensure_download_url()
495
494
  self._ensure_http_client()
496
- max_retries = 3
497
495
 
498
- for attempt in range(max_retries):
499
- response = self._http_client.head(self._presigned_url)
496
+ for attempt in range(self.MAX_RETRIES):
497
+ response = self._http_client.head(self._download_url)
500
498
  if response.status_code == 200:
501
499
  self._size = int(response.headers.get('content-length', 0))
502
500
  return self._size
503
501
  elif response.status_code == 403:
504
502
  _logger.debug(
505
- f"Presigned URL expired while fetching size, refreshing... (attempt {attempt + 1}/{max_retries})"
503
+ f"Presigned URL expired while fetching size, refreshing... "
504
+ f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
506
505
  )
507
- self._presigned_url = self._get_presigned_url()
506
+ self._download_url = self._refresh_download_url()
508
507
  continue
509
508
  else:
510
509
  response.raise_for_status()
511
510
 
512
511
  raise ConnectionError(
513
- f"Failed to fetch file size after {max_retries} attempts (URL kept expiring)"
512
+ f"Failed to fetch file size after {self.MAX_RETRIES} attempts (URL kept expiring)"
514
513
  )
515
514
 
516
515
  def _read_range(self, start: int, end: int) -> bytes:
517
516
  """
518
- Read a specific byte range using HTTP Range request.
517
+ Read a specific byte range using an HTTP Range request.
519
518
 
520
519
  Handles presigned URL expiration with automatic refresh and retry.
521
520
 
@@ -526,16 +525,13 @@ class WorkspaceAttachmentContents(Contents):
526
525
  Returns:
527
526
  Bytes from the specified range
528
527
  """
529
- if not self._presigned_url:
530
- self._presigned_url = self._get_presigned_url()
531
-
528
+ self._ensure_download_url()
532
529
  self._ensure_http_client()
533
530
 
534
531
  headers = {"Range": f"bytes={start}-{end}"}
535
- max_retries = 3
536
532
 
537
- for attempt in range(max_retries):
538
- response = self._http_client.get(self._presigned_url, headers=headers)
533
+ for attempt in range(self.MAX_RETRIES):
534
+ response = self._http_client.get(self._download_url, headers=headers)
539
535
 
540
536
  if response.status_code == 206:
541
537
  return response.content
@@ -543,38 +539,85 @@ class WorkspaceAttachmentContents(Contents):
543
539
  return b""
544
540
  elif response.status_code == 403:
545
541
  _logger.debug(
546
- f"Presigned URL expired at byte {start}, refreshing... (attempt {attempt + 1}/{max_retries})"
542
+ f"Presigned URL expired at byte {start}, refreshing... "
543
+ f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
547
544
  )
548
- self._presigned_url = self._get_presigned_url()
545
+ self._download_url = self._refresh_download_url()
549
546
  continue
550
547
  else:
551
548
  response.raise_for_status()
552
549
  return response.content
553
550
 
554
551
  raise ConnectionError(
555
- f"Failed to read range {start}-{end} after {max_retries} attempts (URL kept expiring)"
552
+ f"Failed to read range {start}-{end} after {self.MAX_RETRIES} attempts "
553
+ f"(URL kept expiring)"
556
554
  )
557
555
 
558
556
  def _load_full(self):
559
- """Load the entire attachment into memory buffer using read_attachment()."""
557
+ """
558
+ Load the entire remote content into memory buffer via presigned URL.
559
+
560
+ Handles presigned URL expiration with automatic refresh and retry.
561
+ After loading, the buffer position is restored to self._position.
562
+ """
560
563
  if self._fully_loaded:
561
564
  return
562
565
 
563
- from octostar.utils.workspace import read_attachment
566
+ self._ensure_download_url()
567
+ self._ensure_http_client()
564
568
 
565
- data = read_attachment.sync(
566
- os_workspace=self._workspace_id,
567
- os_entity_uid=self._entity_id,
568
- decode=False,
569
- stream=False,
570
- client=self._client
569
+ for attempt in range(self.MAX_RETRIES):
570
+ response = self._http_client.get(self._download_url)
571
+ if response.status_code == 200:
572
+ data = response.content
573
+ self._buffer = BytesIO(data)
574
+ self._fully_loaded = True
575
+ self._size = len(data)
576
+ self._buffer.seek(self._position, SEEK_SET)
577
+ return
578
+ elif response.status_code == 403:
579
+ _logger.debug(
580
+ f"Presigned URL expired during full load, refreshing... "
581
+ f"(attempt {attempt + 1}/{self.MAX_RETRIES})"
582
+ )
583
+ self._download_url = self._refresh_download_url()
584
+ continue
585
+ else:
586
+ response.raise_for_status()
587
+
588
+ raise ConnectionError(
589
+ f"Failed to load full content after {self.MAX_RETRIES} attempts "
590
+ f"(URL kept expiring)"
571
591
  )
572
- self._buffer = BytesIO(data or b"")
573
- self._fully_loaded = True
574
- self._size = len(data) if data else 0
575
- self._position = 0
592
+
593
+ # ==================== Remote write operations ====================
594
+
595
+ def _upload_data(self, data: bytes):
596
+ """
597
+ Upload data to remote storage via presigned POST URL.
598
+
599
+ Uses the presigned URL and form fields from _get_upload_info().
600
+ This only touches S3/MinIO — no entity record upsert.
601
+ """
602
+ upload_info = self._get_upload_info()
603
+ upload_url = upload_info["url"]
604
+ upload_fields = upload_info.get("fields", {})
605
+
606
+ self._ensure_http_client()
607
+ file_obj = BytesIO(data)
608
+
609
+ response = self._http_client.post(
610
+ upload_url,
611
+ data=upload_fields,
612
+ files={"file": file_obj},
613
+ )
614
+ if response.status_code not in (200, 204):
615
+ response.raise_for_status()
616
+
617
+ # ==================== FileIO Interface ====================
576
618
 
577
619
  def read(self, size: int = -1) -> bytes:
620
+ # If fully loaded into buffer, read from buffer
578
621
  if self._fully_loaded:
579
622
  if not self._buffer:
580
623
  self._load_full()
@@ -582,7 +625,6 @@ class WorkspaceAttachmentContents(Contents):
582
625
 
583
626
  # Otherwise, use HTTP Range requests for efficient streaming
584
627
  if size == -1:
585
- # Read all from current position
586
628
  self._fetch_size()
587
629
  if self._position >= self._size:
588
630
  return b""
@@ -591,33 +633,28 @@ class WorkspaceAttachmentContents(Contents):
591
633
  if size <= 0:
592
634
  return b""
593
635
 
594
- # Fetch the size if we don't know it
595
636
  self._fetch_size()
596
637
 
597
- # Adjust size if it goes beyond the end
638
+ # Clamp to available bytes
598
639
  if self._position + size > self._size:
599
640
  size = self._size - self._position
600
641
 
601
642
  if size <= 0:
602
643
  return b""
603
644
 
604
- # Read using HTTP Range request
605
645
  end_byte = self._position + size - 1
606
646
  data = self._read_range(self._position, end_byte)
607
647
  self._position += len(data)
608
-
609
648
  return data
610
649
 
611
650
  def write(self, b: bytes) -> int:
612
651
  if not self._buffer:
613
652
  self._load_full()
614
-
615
653
  n = self._buffer.write(b)
616
654
  self._modified = True
617
655
  return n
618
656
 
619
657
  def seek(self, offset: int, whence: int = SEEK_SET) -> int:
620
- # Calculate new position
621
658
  if whence == SEEK_SET:
622
659
  new_pos = offset
623
660
  elif whence == SEEK_CUR:
@@ -634,11 +671,9 @@ class WorkspaceAttachmentContents(Contents):
634
671
  if new_pos < 0:
635
672
  raise ValueError("Negative seek position")
636
673
 
637
- # If fully loaded, use buffer seek
638
674
  if self._buffer and self._fully_loaded:
639
675
  return self._buffer.seek(new_pos, SEEK_SET)
640
676
 
641
- # Otherwise, just update position (HTTP Range will handle it)
642
677
  self._position = new_pos
643
678
  return self._position
644
679
 
@@ -648,37 +683,30 @@ class WorkspaceAttachmentContents(Contents):
648
683
  return self._position
649
684
 
650
685
  def flush(self):
651
- """Flush the internal buffer and write to workspace if modified."""
686
+ """Flush: upload buffer to remote storage via presigned URL, then invalidate cache."""
652
687
  if self._buffer:
653
688
  self._buffer.flush()
654
689
 
655
690
  if not self._modified or not self._buffer:
656
691
  return
657
692
 
658
- if not self._entity_type or not self._filetype:
659
- raise ValueError("entity_type and filetype required to flush to workspace")
660
-
661
- from octostar.utils.workspace import write_attachment
662
-
693
+ # Read all data from buffer
663
694
  current_pos = self._buffer.tell()
664
695
  self._buffer.seek(0, SEEK_SET)
665
696
  data = self._buffer.read()
666
697
  self._buffer.seek(current_pos, SEEK_SET)
667
698
 
668
- write_attachment.sync(
669
- os_workspace=self._workspace_id,
670
- os_entity_uid=self._entity_id,
671
- entity_type=self._entity_type,
672
- filetype=self._filetype,
673
- file=data,
674
- client=self._client
675
- )
699
+ # Upload directly to S3 via presigned URL
700
+ self._upload_data(data)
701
+
702
+ # Invalidate cache so next read re-fetches from remote
676
703
  self._modified = False
677
704
  self._buffer.close()
678
705
  self._buffer = None
679
706
  self._fully_loaded = False
680
707
  self._position = 0
681
- self._presigned_url = None # force refresh on next remote read
708
+ self._size = None
709
+ self._download_url = None
682
710
 
683
711
  def close(self):
684
712
  if not self._closed:
@@ -691,15 +719,6 @@ class WorkspaceAttachmentContents(Contents):
691
719
  self._http_client = None
692
720
  super().close()
693
721
 
694
- def delete(self):
695
- """Delete the entity from the workspace using delete_entity()."""
696
- from octostar.utils.workspace import delete_entity
697
-
698
- delete_entity.sync(
699
- os_entity_uid=self._entity_id,
700
- client=self._client
701
- )
702
-
703
722
  def truncate(self, size: Optional[int] = None) -> int:
704
723
  if not self._buffer:
705
724
  self._load_full()
@@ -710,6 +729,64 @@ class WorkspaceAttachmentContents(Contents):
710
729
  if not self._buffer or not self._fully_loaded:
711
730
  self._load_full()
712
731
  return self._buffer.getvalue()
732
+
733
+
734
+ class WorkspaceAttachmentContents(_PresignedUrlRemoteContents):
735
+ """
736
+ Contents implementation for Octostar workspace attachments.
737
+
738
+ All I/O goes through presigned S3 URLs:
739
+ - Reads use get_attachment_url() for presigned download URLs
740
+ - Writes use request_attachment_url() for presigned upload URLs
741
+ - No entity upsert on flush — only raw bytes are written to S3
742
+
743
+ Presigned URL Handling:
744
+ - Download URLs are obtained via get_attachment_url() and cached
745
+ - On 403 (Forbidden) responses, URLs are refreshed and requests retried
746
+ """
747
+
748
+ def __init__(
749
+ self,
750
+ entity_type: Optional[str] = None,
751
+ filetype: Optional[str] = None,
752
+ *,
753
+ workspace_id: str,
754
+ entity_id: str,
755
+ client,
756
+ **kwargs
757
+ ):
758
+ super().__init__(entity_type, filetype, **kwargs)
759
+ self._workspace_id = workspace_id
760
+ self._entity_id = entity_id
761
+ self._client = client
762
+
763
+ def _refresh_download_url(self) -> str:
764
+ from octostar.utils.workspace import get_attachment_url
765
+
766
+ return get_attachment_url.sync(
767
+ os_workspace=self._workspace_id,
768
+ os_entity_uid=self._entity_id,
769
+ client=self._client
770
+ )
771
+
772
+ def _get_upload_info(self) -> Dict[str, Any]:
773
+ from octostar.utils.workspace import request_attachment_url
774
+
775
+ return request_attachment_url.sync(
776
+ os_workspace=self._workspace_id,
777
+ os_entity_uid=self._entity_id,
778
+ content_type=self._filetype,
779
+ client=self._client
780
+ )
781
+
782
+ def delete(self):
783
+ """Delete the entity from the workspace using delete_entity()."""
784
+ from octostar.utils.workspace import delete_entity
785
+
786
+ delete_entity.sync(
787
+ os_entity_uid=self._entity_id,
788
+ client=self._client
789
+ )
713
790
 
714
791
  def to_locator(self) -> Dict[str, Any]:
715
792
  """
@@ -717,7 +794,7 @@ class WorkspaceAttachmentContents(Contents):
717
794
 
718
795
  Returns:
719
796
  {"location": "workspace_attachment", "pointer": "workspace_id/entity_id",
720
- "entity_type": "...", "filetype": "..."}
797
+ "entity_type": "...", "filetype": "...", "item_name": "..."}
721
798
  """
722
799
  if self._workspace_id and self._entity_id:
723
800
  pointer = f"{self._workspace_id}/{self._entity_id}"
@@ -765,12 +842,13 @@ class WorkspaceAttachmentContents(Contents):
765
842
  )
766
843
 
767
844
 
768
- class TemporaryAttachmentContents(Contents):
845
+ class TemporaryAttachmentContents(_PresignedUrlRemoteContents):
769
846
  """
770
847
  Contents implementation for Octostar temporary blob storage.
771
848
 
772
- Uses octostar-api utilities (read_temporary_blob, write_temporary_blob,
773
- delete_temporary_blob) to store files in the user's temporary S3 bucket.
849
+ All I/O goes through presigned S3 URLs:
850
+ - Reads use get_temporary_blob_url() for presigned download URLs
851
+ - Writes use request_temporary_blob_url() for presigned upload URLs
774
852
 
775
853
  Temporary blobs are keyed by filename (not workspace/entity), and are not
776
854
  associated with any workspace entity. Use WorkspaceAttachmentContents for that.
@@ -783,90 +861,27 @@ class TemporaryAttachmentContents(Contents):
783
861
  *,
784
862
  filename: str,
785
863
  client,
786
- initial_data: Optional[bytes] = None,
787
864
  **kwargs
788
865
  ):
789
866
  super().__init__(entity_type, filetype, **kwargs)
790
867
  self._filename = filename
791
868
  self._client = client
792
-
793
- self._buffer: Optional[BytesIO] = None
794
- self._fully_loaded = False
795
- self._modified = False
796
-
797
- if initial_data is not None:
798
- self._buffer = BytesIO(initial_data)
799
- self._fully_loaded = True
800
869
 
801
- def _load_full(self):
802
- """Load the entire blob into memory using read_temporary_blob()."""
803
- if self._fully_loaded:
804
- return
870
+ def _refresh_download_url(self) -> str:
871
+ from octostar.utils.workspace import get_temporary_blob_url
805
872
 
806
- from octostar.utils.workspace import read_temporary_blob
807
-
808
- data = read_temporary_blob.sync(
873
+ return get_temporary_blob_url.sync(
809
874
  filename=self._filename,
810
- decode=False,
811
875
  client=self._client
812
876
  )
813
- self._buffer = BytesIO(data or b"")
814
- self._fully_loaded = True
815
-
816
- def read(self, size: int = -1) -> bytes:
817
- if not self._buffer:
818
- self._load_full()
819
- return self._buffer.read(size)
820
-
821
- def write(self, b: bytes) -> int:
822
- if not self._buffer:
823
- self._load_full()
824
- n = self._buffer.write(b)
825
- self._modified = True
826
- return n
827
877
 
828
- def seek(self, offset: int, whence: int = SEEK_SET) -> int:
829
- if not self._buffer:
830
- self._load_full()
831
- return self._buffer.seek(offset, whence)
832
-
833
- def tell(self) -> int:
834
- if not self._buffer:
835
- self._load_full()
836
- return self._buffer.tell()
837
-
838
- def flush(self):
839
- """Flush the internal buffer and write to temp bucket if modified."""
840
- if self._buffer:
841
- self._buffer.flush()
842
-
843
- if not self._modified or not self._buffer:
844
- return
845
-
846
- from octostar.utils.workspace import write_temporary_blob
878
+ def _get_upload_info(self) -> Dict[str, Any]:
879
+ from octostar.utils.workspace import request_temporary_blob_url
847
880
 
848
- current_pos = self._buffer.tell()
849
- self._buffer.seek(0, SEEK_SET)
850
- data = self._buffer.read()
851
- self._buffer.seek(current_pos, SEEK_SET)
852
-
853
- write_temporary_blob.sync(
881
+ return request_temporary_blob_url.sync(
854
882
  filename=self._filename,
855
- file=data,
856
883
  client=self._client
857
884
  )
858
- self._modified = False
859
- self._buffer.close()
860
- self._buffer = None
861
- self._fully_loaded = False
862
-
863
- def close(self):
864
- if not self._closed:
865
- if self._modified:
866
- self.flush()
867
- if self._buffer:
868
- self._buffer.close()
869
- super().close()
870
885
 
871
886
  def delete(self):
872
887
  """Delete the blob from the temporary bucket."""
@@ -877,17 +892,6 @@ class TemporaryAttachmentContents(Contents):
877
892
  client=self._client
878
893
  )
879
894
 
880
- def truncate(self, size: Optional[int] = None) -> int:
881
- if not self._buffer:
882
- self._load_full()
883
- self._modified = True
884
- return self._buffer.truncate(size)
885
-
886
- def getvalue(self) -> bytes:
887
- if not self._buffer or not self._fully_loaded:
888
- self._load_full()
889
- return self._buffer.getvalue()
890
-
891
895
  def to_locator(self) -> Dict[str, Any]:
892
896
  """
893
897
  Serialize to locator with filename.
@@ -18,7 +18,7 @@ from octostar.utils.workspace import upsert_entities
18
18
  from octostar.utils.ontology import fetch_ontology_data
19
19
  from octostar.utils.workspace.permissions import get_permissions, PermissionLevel
20
20
  from octostar.utils.pipeline import update_processing_status
21
- from octostar.utils.workspace import write_attachment
21
+
22
22
  from octostar.client import make_client
23
23
 
24
24
  from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
@@ -464,26 +464,49 @@ class NifiContextManager(object):
464
464
  if not file.contents:
465
465
  continue
466
466
  old_contents = file._contents
467
- write_attachment.sync(
468
- os_workspace=file.write_os_workspace,
469
- os_entity_uid=file.record["os_entity_uid"],
470
- entity_type=file.record["os_concept"],
471
- filetype=file.record["os_item_content_type"],
472
- file=file.contents,
473
- client=self.client,
474
- )
475
- file._contents = WorkspaceAttachmentContents(
476
- workspace_id=file.record['os_workspace'],
477
- entity_id=file.record['os_entity_uid'],
467
+ record = file.record
468
+ ws = file.write_os_workspace
469
+ entity_uid = record["os_entity_uid"]
470
+ entity_type = record["os_concept"]
471
+ filetype = record.get("os_item_content_type")
472
+ item_name = record.get("os_item_name")
473
+ data = old_contents.getvalue()
474
+ target = WorkspaceAttachmentContents(
475
+ workspace_id=ws,
476
+ entity_id=entity_uid,
478
477
  client=self.client,
479
- entity_type=file.record["os_concept"],
480
- filetype=file.record["os_item_content_type"]
478
+ entity_type=entity_type,
479
+ filetype=filetype,
481
480
  )
481
+ target.write(data)
482
+ target.flush()
483
+ file._contents = target
482
484
  if isinstance(old_contents, TemporaryAttachmentContents):
483
485
  try:
484
486
  old_contents.delete()
485
487
  except Exception:
486
488
  pass
489
+ fields = {"os_has_attachment": True}
490
+ if filetype:
491
+ fields["os_item_content_type"] = filetype
492
+ if item_name:
493
+ fields["os_item_name"] = item_name
494
+ new_entities = upsert_entities.sync(
495
+ ws,
496
+ [{
497
+ "entity_type": entity_type,
498
+ "os_entity_uid": entity_uid,
499
+ "fields": fields,
500
+ }],
501
+ client=self.client,
502
+ )
503
+ new_entity = {e["os_entity_uid"]: e for e in new_entities}.get(entity_uid, {})
504
+ file.record = {**record, **new_entity}
505
+ file.record["entity_id"] = file.record["os_entity_uid"]
506
+ file.record["entity_type"] = file.record["os_concept"]
507
+ file.record["entity_label"] = file.label
508
+ file.request["entity_timestamp"] = file.record["os_last_updated_at"]
509
+ file.request["is_temporary"] = False
487
510
 
488
511
  def _sync_upsert_entities(self, entities_to_upsert):
489
512
  if not entities_to_upsert: