streamlit-octostar-utils 0.4.2.dev24__tar.gz → 0.5.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/celery.py +217 -52
  4. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/fastapi.py +1 -97
  5. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/nifi.py +5 -3
  6. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/inheritance.py +5 -5
  7. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/LICENSE +0 -0
  8. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/README.md +0 -0
  9. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/__init__.py +0 -0
  10. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  11. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
  12. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  13. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  14. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  15. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  16. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  17. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  18. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  19. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  20. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  21. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  22. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  23. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/__init__.py +0 -0
  24. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/dict.py +0 -0
  25. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/filetypes.py +0 -0
  26. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  27. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  28. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/timestamp.py +0 -0
  29. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  30. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  31. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/language.py +0 -0
  32. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/ner.py +0 -0
  33. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  34. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/client.py +0 -0
  35. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/context.py +0 -0
  36. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  37. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  38. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  39. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/validation.py +0 -0
  40. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/common.py +0 -0
  42. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/__init__.py +0 -0
  43. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  44. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  45. {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.4.2.dev24
3
+ Version: 0.5.0.dev1
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.4.2-dev.24"
8
+ version = "0.5.0-dev.1"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -9,7 +9,6 @@ import subprocess
9
9
  from fastapi import Query
10
10
  import time
11
11
  import os
12
- import pickle
13
12
  import atexit
14
13
  import redis
15
14
  import uuid
@@ -68,7 +67,31 @@ class CeleryQueueConfig:
68
67
  self.options = options
69
68
 
70
69
 
70
+ class TaskResult:
71
+ """Wrapper for task results that include binary parts alongside JSON data.
72
+ Tasks returning binary data (e.g. images) should return a TaskResult
73
+ so that serialized_io writes them as multipart parts instead of attempting
74
+ JSON serialization on bytes."""
75
+
76
+ def __init__(self, data, part=None):
77
+ self.data = data
78
+ self.part = part
79
+
80
+
71
81
  class CelerySerialized:
82
+ """Serializes task data to a boundary-delimited multipart file.
83
+
84
+ Format: metadata JSON part followed by optional binary/streamed parts,
85
+ separated by boundary markers (the task_id). Replaces pickle entirely.
86
+ """
87
+
88
+ BOUNDARY_PREFIX = b"--"
89
+ BOUNDARY_SUFFIX = b"\r\n"
90
+ BOUNDARY_END = b"--\r\n"
91
+ CONTENT_TYPE_JSON = b"Content-Type: application/json\r\n"
92
+ CONTENT_TYPE_BYTES = b"Content-Type: application/octet-stream\r\n"
93
+ HEADER_END = b"\r\n"
94
+
72
95
  def __init__(self, folder, redis_client, data=None):
73
96
  self.folder = folder
74
97
  self.data = data
@@ -77,18 +100,70 @@ class CelerySerialized:
77
100
  def set_task_id(self, task_id):
78
101
  self.task_id = task_id
79
102
 
80
- def dump(self):
103
+ def _boundary(self):
104
+ return self.task_id.encode()
105
+
106
+ def _write_boundary(self, f):
107
+ f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_SUFFIX)
108
+
109
+ def _write_end_boundary(self, f):
110
+ f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_END)
111
+
112
+ def dump(self, parts=None, part_is_list=False):
113
+ """Write metadata + optional parts in multipart format.
114
+ parts: optional list of bytes objects to write as additional parts.
115
+ part_is_list: whether the original part was a list (preserves type on read).
116
+ """
81
117
  assert self.task_id
118
+ if isinstance(self.data, dict):
119
+ metadata = self.data
120
+ else:
121
+ metadata = {"data": self.data}
122
+ metadata["part_count"] = len(parts) if parts else 0
123
+ metadata["part_is_list"] = part_is_list
82
124
  with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
83
- with open(os.path.join(self.folder, self.task_id), "wb") as target_file:
84
- pickle.dump(self.data, file=target_file, protocol=pickle.HIGHEST_PROTOCOL)
125
+ with open(os.path.join(self.folder, self.task_id), "wb") as f:
126
+ self._write_boundary(f)
127
+ f.write(self.CONTENT_TYPE_JSON)
128
+ f.write(self.HEADER_END)
129
+ f.write(json.dumps(metadata).encode())
130
+ f.write(b"\r\n")
131
+ if parts:
132
+ for part in parts:
133
+ self._write_boundary(f)
134
+ f.write(self.CONTENT_TYPE_BYTES)
135
+ f.write(self.HEADER_END)
136
+ f.write(part)
137
+ f.write(b"\r\n")
138
+ self._write_end_boundary(f)
85
139
 
86
140
  def load(self):
141
+ """Read multipart file. Returns (metadata_dict, list_of_bytes_parts)."""
87
142
  assert self.task_id
143
+ boundary = self.BOUNDARY_PREFIX + self._boundary()
144
+ end_boundary = self.BOUNDARY_PREFIX + self._boundary() + b"--"
88
145
  with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
89
- with open(os.path.join(self.folder, self.task_id), "rb") as source_file:
90
- data = pickle.load(source_file)
91
- return data
146
+ with open(os.path.join(self.folder, self.task_id), "rb") as f:
147
+ raw = f.read()
148
+ sections = raw.split(boundary)
149
+ metadata = None
150
+ parts = []
151
+ for section in sections:
152
+ section = section.strip(b"\r\n")
153
+ if not section or section == b"--":
154
+ continue
155
+ header_end = section.find(b"\r\n\r\n")
156
+ if header_end == -1:
157
+ continue
158
+ header = section[:header_end]
159
+ body = section[header_end + 4:]
160
+ if body.endswith(b"\r\n"):
161
+ body = body[:-2]
162
+ if b"application/json" in header:
163
+ metadata = json.loads(body)
164
+ else:
165
+ parts.append(body)
166
+ return metadata or {}, parts
92
167
 
93
168
 
94
169
  class CeleryExecutor(object):
@@ -129,7 +204,10 @@ class CeleryExecutor(object):
129
204
  self.get_thread_pool = None
130
205
  self.set_thread_pool = None
131
206
  self.io_thread_pool = None
132
- self.queue_threadlocks = {k: threading.Lock() for k in self.queue_config.keys()}
207
+ self.queue_semaphores = {
208
+ k: threading.Semaphore(v.max_tasks_in_queue) if v.max_tasks_in_queue else None
209
+ for k, v in self.queue_config.items()
210
+ }
133
211
 
134
212
  # Folder setup
135
213
  self.base_folder = Path(base_folder).resolve()
@@ -520,9 +598,16 @@ class CeleryExecutor(object):
520
598
  task_id = task.request.id
521
599
  serialized_data = CelerySerialized(folder=self.in_folder, redis_client=self.redis_client)
522
600
  serialized_data.set_task_id(task_id)
523
- data = serialized_data.load()
601
+ metadata, parts = serialized_data.load()
524
602
  del serialized_data
525
- args, kwargs = data.get("args", []), data.get("kwargs", {})
603
+ args, kwargs = metadata.get("args", []), metadata.get("kwargs", {})
604
+
605
+ part_count = metadata.get("part_count", 0)
606
+ if part_count > 0:
607
+ if metadata.get("part_is_list", part_count > 1):
608
+ args = [parts] + args
609
+ else:
610
+ args = [parts[0]] + args
526
611
 
527
612
  if self.app.conf.task_always_eager:
528
613
  queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
@@ -533,9 +618,25 @@ class CeleryExecutor(object):
533
618
  queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
534
619
  task.request.resources = (self.resource_registry or {}).get(queue, {})
535
620
  out_data = task_fn(task, *args, **kwargs)
536
- serialized_data = CelerySerialized(folder=self.out_folder, data=out_data, redis_client=self.redis_client)
621
+ if isinstance(out_data, TaskResult):
622
+ out_part_is_list = isinstance(out_data.part, list)
623
+ if out_data.part is None:
624
+ out_parts = None
625
+ elif out_part_is_list:
626
+ out_parts = out_data.part
627
+ else:
628
+ out_parts = [out_data.part]
629
+ serialized_data = CelerySerialized(
630
+ folder=self.out_folder, data=out_data.data, redis_client=self.redis_client
631
+ )
632
+ else:
633
+ out_parts = None
634
+ out_part_is_list = False
635
+ serialized_data = CelerySerialized(
636
+ folder=self.out_folder, data=out_data, redis_client=self.redis_client
637
+ )
537
638
  serialized_data.set_task_id(task_id)
538
- serialized_data.dump()
639
+ serialized_data.dump(parts=out_parts, part_is_list=out_part_is_list)
539
640
  del serialized_data
540
641
  if os.path.isfile(os.path.join(self.in_folder, task_id)):
541
642
  with RedisFileLock(self.redis_client, os.path.join(self.in_folder, task_id)):
@@ -576,36 +677,23 @@ class CeleryExecutor(object):
576
677
 
577
678
  return decorator
578
679
 
579
- async def send_task(self, task_fn, args=[], kwargs={}, **options) -> str:
680
+ async def send_task(self, task_fn, args=None, kwargs=None, part=None, **options) -> str:
681
+ args = args if args is not None else []
682
+ kwargs = kwargs if kwargs is not None else {}
580
683
  if self.app.conf.task_always_eager and "dev_preload" not in self.app.conf:
581
684
  self.preload_on_worker_init()
582
685
  self.app.conf.dev_preload = True
583
686
 
584
- def _reserve_queue_slot(queue_name):
687
+ def _check_queue_llen(queue_name):
585
688
  if self._queue_stalled.get(queue_name, False):
586
689
  raise CeleryExecutor.QueueStalledException(
587
690
  f"Queue '{queue_name}' is stalled. Service temporarily unavailable."
588
691
  )
589
- limit = self.queue_config[queue_name].max_tasks_in_queue
590
- if limit:
591
- reservation_key = f"queue:reserved:{queue_name}"
592
- with self.queue_threadlocks[queue_name]:
593
- queue_count = self.redis_client.llen(queue_name)
594
- reserved_count = int(self.redis_client.get(reservation_key) or 0)
595
- total_count = queue_count + reserved_count
596
- if total_count >= limit:
597
- raise CeleryExecutor.QueueFullException(
598
- f"Queue '{queue_name}' has reached its limit of {limit} tasks!"
599
- )
600
- self.redis_client.incr(reservation_key)
601
- return True
602
- return False
603
-
604
- def _release_queue_slot(queue_name):
605
- limit = self.queue_config[queue_name].max_tasks_in_queue
606
- if limit:
607
- reservation_key = f"queue:reserved:{queue_name}"
608
- self.redis_client.decr(reservation_key)
692
+ if self.redis_client.llen(queue_name) >= self.queue_config[queue_name].max_tasks_in_queue:
693
+ raise CeleryExecutor.QueueFullException(
694
+ f"Queue '{queue_name}' has reached its limit of "
695
+ f"{self.queue_config[queue_name].max_tasks_in_queue} tasks!"
696
+ )
609
697
 
610
698
  def _write_task_data(in_folder, task_args, task_kwargs, task_id):
611
699
  serialized_data = CelerySerialized(
@@ -629,19 +717,37 @@ class CeleryExecutor(object):
629
717
  queue_name = self.app.conf.task_default_routing_key
630
718
  queue_name = getattr(task_fn, "queue", queue_name)
631
719
  queue_name = options.get("queue", queue_name)
632
- reserved = False
720
+
721
+ sem = self.queue_semaphores.get(queue_name)
722
+ acquired = False
723
+ if sem is not None:
724
+ if not sem.acquire(blocking=False):
725
+ raise CeleryExecutor.QueueFullException(
726
+ f"Queue '{queue_name}' has reached its limit of "
727
+ f"{self.queue_config[queue_name].max_tasks_in_queue} tasks!"
728
+ )
729
+ acquired = True
730
+
633
731
  try:
634
- reserved = await asyncio.get_running_loop().run_in_executor(
635
- self.set_thread_pool, _reserve_queue_slot, queue_name
636
- )
637
- await asyncio.get_running_loop().run_in_executor(
638
- self.io_thread_pool,
639
- _write_task_data,
640
- self.in_folder,
641
- args,
642
- kwargs,
643
- task_id,
644
- )
732
+ if acquired:
733
+ await asyncio.get_running_loop().run_in_executor(
734
+ self.set_thread_pool, _check_queue_llen, queue_name
735
+ )
736
+
737
+ if part is not None:
738
+ await self._write_task_data_with_part(
739
+ task_id, args, kwargs, part
740
+ )
741
+ else:
742
+ await asyncio.get_running_loop().run_in_executor(
743
+ self.io_thread_pool,
744
+ _write_task_data,
745
+ self.in_folder,
746
+ args,
747
+ kwargs,
748
+ task_id,
749
+ )
750
+
645
751
  await asyncio.get_running_loop().run_in_executor(
646
752
  self.set_thread_pool, _send_task, task_fn, task_id, options
647
753
  )
@@ -652,11 +758,61 @@ class CeleryExecutor(object):
652
758
  logger.info(f"Cancelling task {task_id} due to disconnect!")
653
759
  await self.terminate_task(task_id)
654
760
  raise
761
+ except Exception:
762
+ try:
763
+ await self.terminate_task(task_id)
764
+ except Exception:
765
+ pass
766
+ raise
655
767
  finally:
656
- if reserved:
657
- await asyncio.get_running_loop().run_in_executor(self.set_thread_pool, _release_queue_slot, queue_name)
768
+ if acquired:
769
+ sem.release()
658
770
  return task_id
659
771
 
772
+ async def _write_task_data_with_part(self, task_id, args, kwargs, part):
773
+ """Write task data with a streamed part to the multipart file.
774
+ The part becomes the first arg on the worker side.
775
+ """
776
+ boundary = CelerySerialized.BOUNDARY_PREFIX + task_id.encode()
777
+ boundary_line = boundary + CelerySerialized.BOUNDARY_SUFFIX
778
+ end_boundary_line = boundary + CelerySerialized.BOUNDARY_END
779
+
780
+ is_list = isinstance(part, list)
781
+ items = part if is_list else [part]
782
+ part_count = len(items)
783
+
784
+ metadata = {"args": args, "kwargs": kwargs, "part_count": part_count, "part_is_list": is_list}
785
+ metadata_bytes = json.dumps(metadata).encode()
786
+
787
+ file_path = os.path.join(self.in_folder, task_id)
788
+ with open(file_path, "wb") as f:
789
+ f.write(boundary_line)
790
+ f.write(CelerySerialized.CONTENT_TYPE_JSON)
791
+ f.write(CelerySerialized.HEADER_END)
792
+ f.write(metadata_bytes)
793
+ f.write(b"\r\n")
794
+
795
+ for item in items:
796
+ f.write(boundary_line)
797
+ f.write(CelerySerialized.CONTENT_TYPE_BYTES)
798
+ f.write(CelerySerialized.HEADER_END)
799
+ if hasattr(item, "__aiter__"):
800
+ async for chunk in item:
801
+ f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
802
+ elif hasattr(item, "read"):
803
+ while True:
804
+ chunk = await item.read(65536)
805
+ if not chunk:
806
+ break
807
+ f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
808
+ elif isinstance(item, bytes):
809
+ f.write(item)
810
+ else:
811
+ raise TypeError(f"Unsupported part item type: {type(item)}")
812
+ f.write(b"\r\n")
813
+
814
+ f.write(end_boundary_line)
815
+
660
816
  async def terminate_task(self, task_id):
661
817
  def _terminate_task(celery_app, task_id):
662
818
  celery_app.control.revoke(task_id, terminate=True)
@@ -710,8 +866,15 @@ class CeleryExecutor(object):
710
866
  def _read_task_data(out_folder, task_id):
711
867
  serialized_data = CelerySerialized(folder=out_folder, redis_client=self.redis_client)
712
868
  serialized_data.set_task_id(task_id)
713
- result = serialized_data.load()
714
- return result
869
+ metadata, parts = serialized_data.load()
870
+ data = metadata.get("data", metadata)
871
+ part_count = metadata.get("part_count", 0)
872
+ if part_count > 0:
873
+ if metadata.get("part_is_list", part_count > 1):
874
+ return TaskResult(data=data, part=parts)
875
+ else:
876
+ return TaskResult(data=data, part=parts[0])
877
+ return data
715
878
 
716
879
  def _remove_task_data(celery_app, in_folder, out_folder, task_id):
717
880
  celery_app.AsyncResult(task_id).forget()
@@ -737,8 +900,10 @@ class CeleryExecutor(object):
737
900
  )
738
901
  return result
739
902
 
740
- async def send_and_wait_task(self, task_fn, args=[], kwargs={}, timeout=60, **options):
741
- task_id = await self.send_task(task_fn, args, kwargs, **options)
903
+ async def send_and_wait_task(self, task_fn, args=None, kwargs=None, part=None, timeout=60, **options):
904
+ args = args if args is not None else []
905
+ kwargs = kwargs if kwargs is not None else {}
906
+ task_id = await self.send_task(task_fn, args, kwargs, part=part, **options)
742
907
  ready = False
743
908
  state = None
744
909
  start_time = time.time()
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from fastapi import Request, Body
2
+ from fastapi import Request
3
3
  from fastapi.responses import JSONResponse, StreamingResponse
4
4
  from pydantic import BaseModel
5
5
  from typing import List, Optional, Literal, Any
@@ -17,8 +17,6 @@ import traceback
17
17
  from copy import copy
18
18
  import logging
19
19
 
20
- from octostar.client import make_client
21
-
22
20
  MAX_ERROR_MESSAGE_BYTES = 256
23
21
  MAX_ERROR_TRACEBACK_BYTES = 10240
24
22
 
@@ -99,100 +97,6 @@ class Route(ABC):
99
97
  return self
100
98
 
101
99
 
102
- class OctostarRoute(Route):
103
- def __init__(self, app, tasks_routes, celery_executor=None, router=None):
104
- self.app = app
105
- self._router = router
106
- self.routed_funcs = []
107
- self.tasks_routes = tasks_routes
108
- self.celery_executor = celery_executor
109
- self.endpoints = {}
110
- self.define_routes()
111
-
112
- def register_route(self, op, octostar_task):
113
- self.endpoints[op.strip("/")] = octostar_task
114
-
115
- def define_routes(self):
116
- if self.celery_executor:
117
-
118
- @Route.route(self, path="/task-state/{task_id}")
119
- async def get_task_status(task_id: str) -> JSONResponse:
120
- task_status = await self.tasks_routes.get_task(task_id, pop=False)
121
- task_status = task_status.model_dump(mode="json")["data"]["task_state"]
122
- return JSONResponse(task_status)
123
-
124
- @Route.route(self, path="/task-result/{task_id}")
125
- async def get_task_result(task_id: str) -> JSONResponse:
126
- return_data = await self.tasks_routes.get_task(task_id, pop=True)
127
- return_data = return_data.model_dump(mode="json")["data"]["data"]
128
- return JSONResponse(return_data)
129
-
130
- @Route.route(self, path="/{op}", methods=["POST"])
131
- async def send_task(
132
- op: str,
133
- os_context: dict = Body(...),
134
- jwt: str = Body(...),
135
- params: dict = Body(dict()),
136
- ) -> str:
137
- """
138
- Any request coming from Octostar (e.g. manifest) should enter from here.
139
- """
140
- path_params = []
141
- op = op.split("/")
142
- if len(op) > 1:
143
- path_params = op[1:]
144
- op = op[0]
145
- query_params = params
146
- client = make_client(jwt)
147
- if op not in self.endpoints.keys():
148
- raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
149
- task_id = await self.celery_executor.send_task(
150
- self.endpoints[op], args=[os_context, client, query_params]
151
- )
152
- return task_id
153
-
154
- else:
155
-
156
- @Route.route(self, path="/{op}", methods=["POST"])
157
- async def call_task(
158
- op: str,
159
- os_context: dict = Body(...),
160
- jwt: str = Body(...),
161
- params: dict = Body(dict()),
162
- ) -> str:
163
- """
164
- Any request coming from Octostar (e.g. manifest) should enter from here.
165
- """
166
- path_params = []
167
- op = op.split("/")
168
- if len(op) > 1:
169
- path_params = op[1:]
170
- op = op[0]
171
- query_params = params
172
- client = make_client(jwt)
173
- if op not in self.endpoints.keys():
174
- raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
175
- result = await self.endpoints[op](os_context, client, query_params)
176
- return result
177
-
178
- @staticmethod
179
- def octostar_task(celery_executor, *args, **opts):
180
- def decorator(func):
181
- if celery_executor:
182
- serialized_func = celery_executor.serialized_io(func)
183
- task_func = celery_executor.app.task(*args, **opts)(serialized_func)
184
- else:
185
-
186
- @wraps(func)
187
- def octostar_func(*args, **kwargs):
188
- return func(None, *args, **kwargs)
189
-
190
- task_func = octostar_func
191
- return task_func
192
-
193
- return decorator
194
-
195
-
196
100
  class CommonModels(object):
197
101
  class OKResponseModel(BaseModel):
198
102
  message: str = "OK"
@@ -1216,18 +1216,20 @@ class NifiRoute(Route):
1216
1216
  op = op[0]
1217
1217
  query_params = request.query_params
1218
1218
  processor_suffix = query_params["processor_suffix"]
1219
- body = await request.json()
1220
1219
  processor_name = "processor." + self.processor_name + "." + op + "." + processor_suffix
1221
1220
  if op not in self.endpoints.keys():
1222
1221
  raise StarletteHTTPException(403, f"Route {op} is forbidden for NiFi.")
1223
- task_id = await self.celery_executor.send_task(self.endpoints[op], args=[body, processor_name])
1222
+ task_id = await self.celery_executor.send_task(
1223
+ self.endpoints[op], args=[processor_name], part=request.stream()
1224
+ )
1224
1225
  return task_id
1225
1226
 
1226
1227
  @staticmethod
1227
1228
  def nifi_task(celery_executor, *args, **opts):
1228
1229
  def decorator(func):
1229
1230
  @wraps(func)
1230
- def nifi_func(task, body, processor_name, *args, **kwargs):
1231
+ def nifi_func(task, body_bytes, processor_name, *args, **kwargs):
1232
+ body = json.loads(body_bytes)
1231
1233
  with NifiContextManager(body) as nifi_context:
1232
1234
  entity_batches = nifi_context.receive_input(body, processor_name)
1233
1235
  entity_batches = func(
@@ -15,13 +15,13 @@ def get_label_keys(type, ontology):
15
15
  return list(label_keys.keys())
16
16
 
17
17
  def get_label(record, ontology):
18
- if record.get("os_materialized_label") not in (None, ""):
19
- return str(record.get("os_materialized_label"))
20
- if record.get("os_custom_label") not in (None, ""):
21
- return str(record.get("os_custom_label"))
18
+ if record.get("os_entity_label_materialized") not in (None, ""):
19
+ return str(record.get("os_entity_label_materialized")).strip()
20
+ if record.get("os_entity_label") not in (None, ""):
21
+ return str(record.get("os_entity_label")).strip()
22
22
  label_keys = get_label_keys(record["entity_type"], ontology)
23
23
  fields = [record.get(field) for field in label_keys]
24
24
  fields = [f for f in fields if f is not None]
25
25
  fields = [str(f) for f in fields if f]
26
26
  label = " ".join(fields)
27
- return label or None
27
+ return label.strip() or None