streamlit-octostar-utils 0.4.2.dev25__tar.gz → 0.5.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/celery.py +180 -24
  4. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/fastapi.py +1 -97
  5. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/nifi.py +5 -3
  6. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/inheritance.py +5 -5
  7. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/LICENSE +0 -0
  8. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/README.md +0 -0
  9. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/__init__.py +0 -0
  10. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  11. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
  12. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  13. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  14. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  15. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  16. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  17. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  18. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  19. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  20. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  21. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  22. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  23. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/__init__.py +0 -0
  24. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/dict.py +0 -0
  25. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/filetypes.py +0 -0
  26. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  27. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  28. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/timestamp.py +0 -0
  29. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  30. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  31. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/language.py +0 -0
  32. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/ner.py +0 -0
  33. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  34. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/client.py +0 -0
  35. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/context.py +0 -0
  36. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  37. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  38. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  39. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/validation.py +0 -0
  40. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/common.py +0 -0
  42. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/__init__.py +0 -0
  43. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  44. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  45. {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.4.2.dev25
3
+ Version: 0.5.0.dev1
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.4.2-dev.25"
8
+ version = "0.5.0-dev.1"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -9,7 +9,6 @@ import subprocess
9
9
  from fastapi import Query
10
10
  import time
11
11
  import os
12
- import pickle
13
12
  import atexit
14
13
  import redis
15
14
  import uuid
@@ -68,7 +67,31 @@ class CeleryQueueConfig:
68
67
  self.options = options
69
68
 
70
69
 
70
+ class TaskResult:
71
+ """Wrapper for task results that include binary parts alongside JSON data.
72
+ Tasks returning binary data (e.g. images) should return a TaskResult
73
+ so that serialized_io writes them as multipart parts instead of attempting
74
+ JSON serialization on bytes."""
75
+
76
+ def __init__(self, data, part=None):
77
+ self.data = data
78
+ self.part = part
79
+
80
+
71
81
  class CelerySerialized:
82
+ """Serializes task data to a boundary-delimited multipart file.
83
+
84
+ Format: metadata JSON part followed by optional binary/streamed parts,
85
+ separated by boundary markers (the task_id). Replaces pickle entirely.
86
+ """
87
+
88
+ BOUNDARY_PREFIX = b"--"
89
+ BOUNDARY_SUFFIX = b"\r\n"
90
+ BOUNDARY_END = b"--\r\n"
91
+ CONTENT_TYPE_JSON = b"Content-Type: application/json\r\n"
92
+ CONTENT_TYPE_BYTES = b"Content-Type: application/octet-stream\r\n"
93
+ HEADER_END = b"\r\n"
94
+
72
95
  def __init__(self, folder, redis_client, data=None):
73
96
  self.folder = folder
74
97
  self.data = data
@@ -77,18 +100,70 @@ class CelerySerialized:
77
100
  def set_task_id(self, task_id):
78
101
  self.task_id = task_id
79
102
 
80
- def dump(self):
103
+ def _boundary(self):
104
+ return self.task_id.encode()
105
+
106
+ def _write_boundary(self, f):
107
+ f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_SUFFIX)
108
+
109
+ def _write_end_boundary(self, f):
110
+ f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_END)
111
+
112
+ def dump(self, parts=None, part_is_list=False):
113
+ """Write metadata + optional parts in multipart format.
114
+ parts: optional list of bytes objects to write as additional parts.
115
+ part_is_list: whether the original part was a list (preserves type on read).
116
+ """
81
117
  assert self.task_id
118
+ if isinstance(self.data, dict):
119
+ metadata = self.data
120
+ else:
121
+ metadata = {"data": self.data}
122
+ metadata["part_count"] = len(parts) if parts else 0
123
+ metadata["part_is_list"] = part_is_list
82
124
  with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
83
- with open(os.path.join(self.folder, self.task_id), "wb") as target_file:
84
- pickle.dump(self.data, file=target_file, protocol=pickle.HIGHEST_PROTOCOL)
125
+ with open(os.path.join(self.folder, self.task_id), "wb") as f:
126
+ self._write_boundary(f)
127
+ f.write(self.CONTENT_TYPE_JSON)
128
+ f.write(self.HEADER_END)
129
+ f.write(json.dumps(metadata).encode())
130
+ f.write(b"\r\n")
131
+ if parts:
132
+ for part in parts:
133
+ self._write_boundary(f)
134
+ f.write(self.CONTENT_TYPE_BYTES)
135
+ f.write(self.HEADER_END)
136
+ f.write(part)
137
+ f.write(b"\r\n")
138
+ self._write_end_boundary(f)
85
139
 
86
140
  def load(self):
141
+ """Read multipart file. Returns (metadata_dict, list_of_bytes_parts)."""
87
142
  assert self.task_id
143
+ boundary = self.BOUNDARY_PREFIX + self._boundary()
144
+ end_boundary = self.BOUNDARY_PREFIX + self._boundary() + b"--"
88
145
  with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
89
- with open(os.path.join(self.folder, self.task_id), "rb") as source_file:
90
- data = pickle.load(source_file)
91
- return data
146
+ with open(os.path.join(self.folder, self.task_id), "rb") as f:
147
+ raw = f.read()
148
+ sections = raw.split(boundary)
149
+ metadata = None
150
+ parts = []
151
+ for section in sections:
152
+ section = section.strip(b"\r\n")
153
+ if not section or section == b"--":
154
+ continue
155
+ header_end = section.find(b"\r\n\r\n")
156
+ if header_end == -1:
157
+ continue
158
+ header = section[:header_end]
159
+ body = section[header_end + 4:]
160
+ if body.endswith(b"\r\n"):
161
+ body = body[:-2]
162
+ if b"application/json" in header:
163
+ metadata = json.loads(body)
164
+ else:
165
+ parts.append(body)
166
+ return metadata or {}, parts
92
167
 
93
168
 
94
169
  class CeleryExecutor(object):
@@ -523,9 +598,16 @@ class CeleryExecutor(object):
523
598
  task_id = task.request.id
524
599
  serialized_data = CelerySerialized(folder=self.in_folder, redis_client=self.redis_client)
525
600
  serialized_data.set_task_id(task_id)
526
- data = serialized_data.load()
601
+ metadata, parts = serialized_data.load()
527
602
  del serialized_data
528
- args, kwargs = data.get("args", []), data.get("kwargs", {})
603
+ args, kwargs = metadata.get("args", []), metadata.get("kwargs", {})
604
+
605
+ part_count = metadata.get("part_count", 0)
606
+ if part_count > 0:
607
+ if metadata.get("part_is_list", part_count > 1):
608
+ args = [parts] + args
609
+ else:
610
+ args = [parts[0]] + args
529
611
 
530
612
  if self.app.conf.task_always_eager:
531
613
  queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
@@ -536,9 +618,25 @@ class CeleryExecutor(object):
536
618
  queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
537
619
  task.request.resources = (self.resource_registry or {}).get(queue, {})
538
620
  out_data = task_fn(task, *args, **kwargs)
539
- serialized_data = CelerySerialized(folder=self.out_folder, data=out_data, redis_client=self.redis_client)
621
+ if isinstance(out_data, TaskResult):
622
+ out_part_is_list = isinstance(out_data.part, list)
623
+ if out_data.part is None:
624
+ out_parts = None
625
+ elif out_part_is_list:
626
+ out_parts = out_data.part
627
+ else:
628
+ out_parts = [out_data.part]
629
+ serialized_data = CelerySerialized(
630
+ folder=self.out_folder, data=out_data.data, redis_client=self.redis_client
631
+ )
632
+ else:
633
+ out_parts = None
634
+ out_part_is_list = False
635
+ serialized_data = CelerySerialized(
636
+ folder=self.out_folder, data=out_data, redis_client=self.redis_client
637
+ )
540
638
  serialized_data.set_task_id(task_id)
541
- serialized_data.dump()
639
+ serialized_data.dump(parts=out_parts, part_is_list=out_part_is_list)
542
640
  del serialized_data
543
641
  if os.path.isfile(os.path.join(self.in_folder, task_id)):
544
642
  with RedisFileLock(self.redis_client, os.path.join(self.in_folder, task_id)):
@@ -579,7 +677,7 @@ class CeleryExecutor(object):
579
677
 
580
678
  return decorator
581
679
 
582
- async def send_task(self, task_fn, args=None, kwargs=None, **options) -> str:
680
+ async def send_task(self, task_fn, args=None, kwargs=None, part=None, **options) -> str:
583
681
  args = args if args is not None else []
584
682
  kwargs = kwargs if kwargs is not None else {}
585
683
  if self.app.conf.task_always_eager and "dev_preload" not in self.app.conf:
@@ -635,14 +733,21 @@ class CeleryExecutor(object):
635
733
  await asyncio.get_running_loop().run_in_executor(
636
734
  self.set_thread_pool, _check_queue_llen, queue_name
637
735
  )
638
- await asyncio.get_running_loop().run_in_executor(
639
- self.io_thread_pool,
640
- _write_task_data,
641
- self.in_folder,
642
- args,
643
- kwargs,
644
- task_id,
645
- )
736
+
737
+ if part is not None:
738
+ await self._write_task_data_with_part(
739
+ task_id, args, kwargs, part
740
+ )
741
+ else:
742
+ await asyncio.get_running_loop().run_in_executor(
743
+ self.io_thread_pool,
744
+ _write_task_data,
745
+ self.in_folder,
746
+ args,
747
+ kwargs,
748
+ task_id,
749
+ )
750
+
646
751
  await asyncio.get_running_loop().run_in_executor(
647
752
  self.set_thread_pool, _send_task, task_fn, task_id, options
648
753
  )
@@ -664,6 +769,50 @@ class CeleryExecutor(object):
664
769
  sem.release()
665
770
  return task_id
666
771
 
772
+ async def _write_task_data_with_part(self, task_id, args, kwargs, part):
773
+ """Write task data with a streamed part to the multipart file.
774
+ The part becomes the first arg on the worker side.
775
+ """
776
+ boundary = CelerySerialized.BOUNDARY_PREFIX + task_id.encode()
777
+ boundary_line = boundary + CelerySerialized.BOUNDARY_SUFFIX
778
+ end_boundary_line = boundary + CelerySerialized.BOUNDARY_END
779
+
780
+ is_list = isinstance(part, list)
781
+ items = part if is_list else [part]
782
+ part_count = len(items)
783
+
784
+ metadata = {"args": args, "kwargs": kwargs, "part_count": part_count, "part_is_list": is_list}
785
+ metadata_bytes = json.dumps(metadata).encode()
786
+
787
+ file_path = os.path.join(self.in_folder, task_id)
788
+ with open(file_path, "wb") as f:
789
+ f.write(boundary_line)
790
+ f.write(CelerySerialized.CONTENT_TYPE_JSON)
791
+ f.write(CelerySerialized.HEADER_END)
792
+ f.write(metadata_bytes)
793
+ f.write(b"\r\n")
794
+
795
+ for item in items:
796
+ f.write(boundary_line)
797
+ f.write(CelerySerialized.CONTENT_TYPE_BYTES)
798
+ f.write(CelerySerialized.HEADER_END)
799
+ if hasattr(item, "__aiter__"):
800
+ async for chunk in item:
801
+ f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
802
+ elif hasattr(item, "read"):
803
+ while True:
804
+ chunk = await item.read(65536)
805
+ if not chunk:
806
+ break
807
+ f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
808
+ elif isinstance(item, bytes):
809
+ f.write(item)
810
+ else:
811
+ raise TypeError(f"Unsupported part item type: {type(item)}")
812
+ f.write(b"\r\n")
813
+
814
+ f.write(end_boundary_line)
815
+
667
816
  async def terminate_task(self, task_id):
668
817
  def _terminate_task(celery_app, task_id):
669
818
  celery_app.control.revoke(task_id, terminate=True)
@@ -717,8 +866,15 @@ class CeleryExecutor(object):
717
866
  def _read_task_data(out_folder, task_id):
718
867
  serialized_data = CelerySerialized(folder=out_folder, redis_client=self.redis_client)
719
868
  serialized_data.set_task_id(task_id)
720
- result = serialized_data.load()
721
- return result
869
+ metadata, parts = serialized_data.load()
870
+ data = metadata.get("data", metadata)
871
+ part_count = metadata.get("part_count", 0)
872
+ if part_count > 0:
873
+ if metadata.get("part_is_list", part_count > 1):
874
+ return TaskResult(data=data, part=parts)
875
+ else:
876
+ return TaskResult(data=data, part=parts[0])
877
+ return data
722
878
 
723
879
  def _remove_task_data(celery_app, in_folder, out_folder, task_id):
724
880
  celery_app.AsyncResult(task_id).forget()
@@ -744,10 +900,10 @@ class CeleryExecutor(object):
744
900
  )
745
901
  return result
746
902
 
747
- async def send_and_wait_task(self, task_fn, args=None, kwargs=None, timeout=60, **options):
903
+ async def send_and_wait_task(self, task_fn, args=None, kwargs=None, part=None, timeout=60, **options):
748
904
  args = args if args is not None else []
749
905
  kwargs = kwargs if kwargs is not None else {}
750
- task_id = await self.send_task(task_fn, args, kwargs, **options)
906
+ task_id = await self.send_task(task_fn, args, kwargs, part=part, **options)
751
907
  ready = False
752
908
  state = None
753
909
  start_time = time.time()
@@ -1,5 +1,5 @@
1
1
  import asyncio
2
- from fastapi import Request, Body
2
+ from fastapi import Request
3
3
  from fastapi.responses import JSONResponse, StreamingResponse
4
4
  from pydantic import BaseModel
5
5
  from typing import List, Optional, Literal, Any
@@ -17,8 +17,6 @@ import traceback
17
17
  from copy import copy
18
18
  import logging
19
19
 
20
- from octostar.client import make_client
21
-
22
20
  MAX_ERROR_MESSAGE_BYTES = 256
23
21
  MAX_ERROR_TRACEBACK_BYTES = 10240
24
22
 
@@ -99,100 +97,6 @@ class Route(ABC):
99
97
  return self
100
98
 
101
99
 
102
- class OctostarRoute(Route):
103
- def __init__(self, app, tasks_routes, celery_executor=None, router=None):
104
- self.app = app
105
- self._router = router
106
- self.routed_funcs = []
107
- self.tasks_routes = tasks_routes
108
- self.celery_executor = celery_executor
109
- self.endpoints = {}
110
- self.define_routes()
111
-
112
- def register_route(self, op, octostar_task):
113
- self.endpoints[op.strip("/")] = octostar_task
114
-
115
- def define_routes(self):
116
- if self.celery_executor:
117
-
118
- @Route.route(self, path="/task-state/{task_id}")
119
- async def get_task_status(task_id: str) -> JSONResponse:
120
- task_status = await self.tasks_routes.get_task(task_id, pop=False)
121
- task_status = task_status.model_dump(mode="json")["data"]["task_state"]
122
- return JSONResponse(task_status)
123
-
124
- @Route.route(self, path="/task-result/{task_id}")
125
- async def get_task_result(task_id: str) -> JSONResponse:
126
- return_data = await self.tasks_routes.get_task(task_id, pop=True)
127
- return_data = return_data.model_dump(mode="json")["data"]["data"]
128
- return JSONResponse(return_data)
129
-
130
- @Route.route(self, path="/{op}", methods=["POST"])
131
- async def send_task(
132
- op: str,
133
- os_context: dict = Body(...),
134
- jwt: str = Body(...),
135
- params: dict = Body(dict()),
136
- ) -> str:
137
- """
138
- Any request coming from Octostar (e.g. manifest) should enter from here.
139
- """
140
- path_params = []
141
- op = op.split("/")
142
- if len(op) > 1:
143
- path_params = op[1:]
144
- op = op[0]
145
- query_params = params
146
- client = make_client(jwt)
147
- if op not in self.endpoints.keys():
148
- raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
149
- task_id = await self.celery_executor.send_task(
150
- self.endpoints[op], args=[os_context, client, query_params]
151
- )
152
- return task_id
153
-
154
- else:
155
-
156
- @Route.route(self, path="/{op}", methods=["POST"])
157
- async def call_task(
158
- op: str,
159
- os_context: dict = Body(...),
160
- jwt: str = Body(...),
161
- params: dict = Body(dict()),
162
- ) -> str:
163
- """
164
- Any request coming from Octostar (e.g. manifest) should enter from here.
165
- """
166
- path_params = []
167
- op = op.split("/")
168
- if len(op) > 1:
169
- path_params = op[1:]
170
- op = op[0]
171
- query_params = params
172
- client = make_client(jwt)
173
- if op not in self.endpoints.keys():
174
- raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
175
- result = await self.endpoints[op](os_context, client, query_params)
176
- return result
177
-
178
- @staticmethod
179
- def octostar_task(celery_executor, *args, **opts):
180
- def decorator(func):
181
- if celery_executor:
182
- serialized_func = celery_executor.serialized_io(func)
183
- task_func = celery_executor.app.task(*args, **opts)(serialized_func)
184
- else:
185
-
186
- @wraps(func)
187
- def octostar_func(*args, **kwargs):
188
- return func(None, *args, **kwargs)
189
-
190
- task_func = octostar_func
191
- return task_func
192
-
193
- return decorator
194
-
195
-
196
100
  class CommonModels(object):
197
101
  class OKResponseModel(BaseModel):
198
102
  message: str = "OK"
@@ -1216,18 +1216,20 @@ class NifiRoute(Route):
1216
1216
  op = op[0]
1217
1217
  query_params = request.query_params
1218
1218
  processor_suffix = query_params["processor_suffix"]
1219
- body = await request.json()
1220
1219
  processor_name = "processor." + self.processor_name + "." + op + "." + processor_suffix
1221
1220
  if op not in self.endpoints.keys():
1222
1221
  raise StarletteHTTPException(403, f"Route {op} is forbidden for NiFi.")
1223
- task_id = await self.celery_executor.send_task(self.endpoints[op], args=[body, processor_name])
1222
+ task_id = await self.celery_executor.send_task(
1223
+ self.endpoints[op], args=[processor_name], part=request.stream()
1224
+ )
1224
1225
  return task_id
1225
1226
 
1226
1227
  @staticmethod
1227
1228
  def nifi_task(celery_executor, *args, **opts):
1228
1229
  def decorator(func):
1229
1230
  @wraps(func)
1230
- def nifi_func(task, body, processor_name, *args, **kwargs):
1231
+ def nifi_func(task, body_bytes, processor_name, *args, **kwargs):
1232
+ body = json.loads(body_bytes)
1231
1233
  with NifiContextManager(body) as nifi_context:
1232
1234
  entity_batches = nifi_context.receive_input(body, processor_name)
1233
1235
  entity_batches = func(
@@ -15,13 +15,13 @@ def get_label_keys(type, ontology):
15
15
  return list(label_keys.keys())
16
16
 
17
17
  def get_label(record, ontology):
18
- if record.get("os_materialized_label") not in (None, ""):
19
- return str(record.get("os_materialized_label"))
20
- if record.get("os_custom_label") not in (None, ""):
21
- return str(record.get("os_custom_label"))
18
+ if record.get("os_entity_label_materialized") not in (None, ""):
19
+ return str(record.get("os_entity_label_materialized")).strip()
20
+ if record.get("os_entity_label") not in (None, ""):
21
+ return str(record.get("os_entity_label")).strip()
22
22
  label_keys = get_label_keys(record["entity_type"], ontology)
23
23
  fields = [record.get(field) for field in label_keys]
24
24
  fields = [f for f in fields if f is not None]
25
25
  fields = [str(f) for f in fields if f]
26
26
  label = " ".join(fields)
27
- return label or None
27
+ return label.strip() or None