streamlit-octostar-utils 0.4.2.dev25__tar.gz → 0.5.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/celery.py +180 -24
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/fastapi.py +1 -97
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/nifi.py +5 -3
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/inheritance.py +5 -5
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/LICENSE +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/README.md +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/relationships.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev25 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
|
@@ -9,7 +9,6 @@ import subprocess
|
|
|
9
9
|
from fastapi import Query
|
|
10
10
|
import time
|
|
11
11
|
import os
|
|
12
|
-
import pickle
|
|
13
12
|
import atexit
|
|
14
13
|
import redis
|
|
15
14
|
import uuid
|
|
@@ -68,7 +67,31 @@ class CeleryQueueConfig:
|
|
|
68
67
|
self.options = options
|
|
69
68
|
|
|
70
69
|
|
|
70
|
+
class TaskResult:
|
|
71
|
+
"""Wrapper for task results that include binary parts alongside JSON data.
|
|
72
|
+
Tasks returning binary data (e.g. images) should return a TaskResult
|
|
73
|
+
so that serialized_io writes them as multipart parts instead of attempting
|
|
74
|
+
JSON serialization on bytes."""
|
|
75
|
+
|
|
76
|
+
def __init__(self, data, part=None):
|
|
77
|
+
self.data = data
|
|
78
|
+
self.part = part
|
|
79
|
+
|
|
80
|
+
|
|
71
81
|
class CelerySerialized:
|
|
82
|
+
"""Serializes task data to a boundary-delimited multipart file.
|
|
83
|
+
|
|
84
|
+
Format: metadata JSON part followed by optional binary/streamed parts,
|
|
85
|
+
separated by boundary markers (the task_id). Replaces pickle entirely.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
BOUNDARY_PREFIX = b"--"
|
|
89
|
+
BOUNDARY_SUFFIX = b"\r\n"
|
|
90
|
+
BOUNDARY_END = b"--\r\n"
|
|
91
|
+
CONTENT_TYPE_JSON = b"Content-Type: application/json\r\n"
|
|
92
|
+
CONTENT_TYPE_BYTES = b"Content-Type: application/octet-stream\r\n"
|
|
93
|
+
HEADER_END = b"\r\n"
|
|
94
|
+
|
|
72
95
|
def __init__(self, folder, redis_client, data=None):
|
|
73
96
|
self.folder = folder
|
|
74
97
|
self.data = data
|
|
@@ -77,18 +100,70 @@ class CelerySerialized:
|
|
|
77
100
|
def set_task_id(self, task_id):
|
|
78
101
|
self.task_id = task_id
|
|
79
102
|
|
|
80
|
-
def
|
|
103
|
+
def _boundary(self):
|
|
104
|
+
return self.task_id.encode()
|
|
105
|
+
|
|
106
|
+
def _write_boundary(self, f):
|
|
107
|
+
f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_SUFFIX)
|
|
108
|
+
|
|
109
|
+
def _write_end_boundary(self, f):
|
|
110
|
+
f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_END)
|
|
111
|
+
|
|
112
|
+
def dump(self, parts=None, part_is_list=False):
|
|
113
|
+
"""Write metadata + optional parts in multipart format.
|
|
114
|
+
parts: optional list of bytes objects to write as additional parts.
|
|
115
|
+
part_is_list: whether the original part was a list (preserves type on read).
|
|
116
|
+
"""
|
|
81
117
|
assert self.task_id
|
|
118
|
+
if isinstance(self.data, dict):
|
|
119
|
+
metadata = self.data
|
|
120
|
+
else:
|
|
121
|
+
metadata = {"data": self.data}
|
|
122
|
+
metadata["part_count"] = len(parts) if parts else 0
|
|
123
|
+
metadata["part_is_list"] = part_is_list
|
|
82
124
|
with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
|
|
83
|
-
with open(os.path.join(self.folder, self.task_id), "wb") as
|
|
84
|
-
|
|
125
|
+
with open(os.path.join(self.folder, self.task_id), "wb") as f:
|
|
126
|
+
self._write_boundary(f)
|
|
127
|
+
f.write(self.CONTENT_TYPE_JSON)
|
|
128
|
+
f.write(self.HEADER_END)
|
|
129
|
+
f.write(json.dumps(metadata).encode())
|
|
130
|
+
f.write(b"\r\n")
|
|
131
|
+
if parts:
|
|
132
|
+
for part in parts:
|
|
133
|
+
self._write_boundary(f)
|
|
134
|
+
f.write(self.CONTENT_TYPE_BYTES)
|
|
135
|
+
f.write(self.HEADER_END)
|
|
136
|
+
f.write(part)
|
|
137
|
+
f.write(b"\r\n")
|
|
138
|
+
self._write_end_boundary(f)
|
|
85
139
|
|
|
86
140
|
def load(self):
|
|
141
|
+
"""Read multipart file. Returns (metadata_dict, list_of_bytes_parts)."""
|
|
87
142
|
assert self.task_id
|
|
143
|
+
boundary = self.BOUNDARY_PREFIX + self._boundary()
|
|
144
|
+
end_boundary = self.BOUNDARY_PREFIX + self._boundary() + b"--"
|
|
88
145
|
with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
|
|
89
|
-
with open(os.path.join(self.folder, self.task_id), "rb") as
|
|
90
|
-
|
|
91
|
-
|
|
146
|
+
with open(os.path.join(self.folder, self.task_id), "rb") as f:
|
|
147
|
+
raw = f.read()
|
|
148
|
+
sections = raw.split(boundary)
|
|
149
|
+
metadata = None
|
|
150
|
+
parts = []
|
|
151
|
+
for section in sections:
|
|
152
|
+
section = section.strip(b"\r\n")
|
|
153
|
+
if not section or section == b"--":
|
|
154
|
+
continue
|
|
155
|
+
header_end = section.find(b"\r\n\r\n")
|
|
156
|
+
if header_end == -1:
|
|
157
|
+
continue
|
|
158
|
+
header = section[:header_end]
|
|
159
|
+
body = section[header_end + 4:]
|
|
160
|
+
if body.endswith(b"\r\n"):
|
|
161
|
+
body = body[:-2]
|
|
162
|
+
if b"application/json" in header:
|
|
163
|
+
metadata = json.loads(body)
|
|
164
|
+
else:
|
|
165
|
+
parts.append(body)
|
|
166
|
+
return metadata or {}, parts
|
|
92
167
|
|
|
93
168
|
|
|
94
169
|
class CeleryExecutor(object):
|
|
@@ -523,9 +598,16 @@ class CeleryExecutor(object):
|
|
|
523
598
|
task_id = task.request.id
|
|
524
599
|
serialized_data = CelerySerialized(folder=self.in_folder, redis_client=self.redis_client)
|
|
525
600
|
serialized_data.set_task_id(task_id)
|
|
526
|
-
|
|
601
|
+
metadata, parts = serialized_data.load()
|
|
527
602
|
del serialized_data
|
|
528
|
-
args, kwargs =
|
|
603
|
+
args, kwargs = metadata.get("args", []), metadata.get("kwargs", {})
|
|
604
|
+
|
|
605
|
+
part_count = metadata.get("part_count", 0)
|
|
606
|
+
if part_count > 0:
|
|
607
|
+
if metadata.get("part_is_list", part_count > 1):
|
|
608
|
+
args = [parts] + args
|
|
609
|
+
else:
|
|
610
|
+
args = [parts[0]] + args
|
|
529
611
|
|
|
530
612
|
if self.app.conf.task_always_eager:
|
|
531
613
|
queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
|
|
@@ -536,9 +618,25 @@ class CeleryExecutor(object):
|
|
|
536
618
|
queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
|
|
537
619
|
task.request.resources = (self.resource_registry or {}).get(queue, {})
|
|
538
620
|
out_data = task_fn(task, *args, **kwargs)
|
|
539
|
-
|
|
621
|
+
if isinstance(out_data, TaskResult):
|
|
622
|
+
out_part_is_list = isinstance(out_data.part, list)
|
|
623
|
+
if out_data.part is None:
|
|
624
|
+
out_parts = None
|
|
625
|
+
elif out_part_is_list:
|
|
626
|
+
out_parts = out_data.part
|
|
627
|
+
else:
|
|
628
|
+
out_parts = [out_data.part]
|
|
629
|
+
serialized_data = CelerySerialized(
|
|
630
|
+
folder=self.out_folder, data=out_data.data, redis_client=self.redis_client
|
|
631
|
+
)
|
|
632
|
+
else:
|
|
633
|
+
out_parts = None
|
|
634
|
+
out_part_is_list = False
|
|
635
|
+
serialized_data = CelerySerialized(
|
|
636
|
+
folder=self.out_folder, data=out_data, redis_client=self.redis_client
|
|
637
|
+
)
|
|
540
638
|
serialized_data.set_task_id(task_id)
|
|
541
|
-
serialized_data.dump()
|
|
639
|
+
serialized_data.dump(parts=out_parts, part_is_list=out_part_is_list)
|
|
542
640
|
del serialized_data
|
|
543
641
|
if os.path.isfile(os.path.join(self.in_folder, task_id)):
|
|
544
642
|
with RedisFileLock(self.redis_client, os.path.join(self.in_folder, task_id)):
|
|
@@ -579,7 +677,7 @@ class CeleryExecutor(object):
|
|
|
579
677
|
|
|
580
678
|
return decorator
|
|
581
679
|
|
|
582
|
-
async def send_task(self, task_fn, args=None, kwargs=None, **options) -> str:
|
|
680
|
+
async def send_task(self, task_fn, args=None, kwargs=None, part=None, **options) -> str:
|
|
583
681
|
args = args if args is not None else []
|
|
584
682
|
kwargs = kwargs if kwargs is not None else {}
|
|
585
683
|
if self.app.conf.task_always_eager and "dev_preload" not in self.app.conf:
|
|
@@ -635,14 +733,21 @@ class CeleryExecutor(object):
|
|
|
635
733
|
await asyncio.get_running_loop().run_in_executor(
|
|
636
734
|
self.set_thread_pool, _check_queue_llen, queue_name
|
|
637
735
|
)
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
736
|
+
|
|
737
|
+
if part is not None:
|
|
738
|
+
await self._write_task_data_with_part(
|
|
739
|
+
task_id, args, kwargs, part
|
|
740
|
+
)
|
|
741
|
+
else:
|
|
742
|
+
await asyncio.get_running_loop().run_in_executor(
|
|
743
|
+
self.io_thread_pool,
|
|
744
|
+
_write_task_data,
|
|
745
|
+
self.in_folder,
|
|
746
|
+
args,
|
|
747
|
+
kwargs,
|
|
748
|
+
task_id,
|
|
749
|
+
)
|
|
750
|
+
|
|
646
751
|
await asyncio.get_running_loop().run_in_executor(
|
|
647
752
|
self.set_thread_pool, _send_task, task_fn, task_id, options
|
|
648
753
|
)
|
|
@@ -664,6 +769,50 @@ class CeleryExecutor(object):
|
|
|
664
769
|
sem.release()
|
|
665
770
|
return task_id
|
|
666
771
|
|
|
772
|
+
async def _write_task_data_with_part(self, task_id, args, kwargs, part):
|
|
773
|
+
"""Write task data with a streamed part to the multipart file.
|
|
774
|
+
The part becomes the first arg on the worker side.
|
|
775
|
+
"""
|
|
776
|
+
boundary = CelerySerialized.BOUNDARY_PREFIX + task_id.encode()
|
|
777
|
+
boundary_line = boundary + CelerySerialized.BOUNDARY_SUFFIX
|
|
778
|
+
end_boundary_line = boundary + CelerySerialized.BOUNDARY_END
|
|
779
|
+
|
|
780
|
+
is_list = isinstance(part, list)
|
|
781
|
+
items = part if is_list else [part]
|
|
782
|
+
part_count = len(items)
|
|
783
|
+
|
|
784
|
+
metadata = {"args": args, "kwargs": kwargs, "part_count": part_count, "part_is_list": is_list}
|
|
785
|
+
metadata_bytes = json.dumps(metadata).encode()
|
|
786
|
+
|
|
787
|
+
file_path = os.path.join(self.in_folder, task_id)
|
|
788
|
+
with open(file_path, "wb") as f:
|
|
789
|
+
f.write(boundary_line)
|
|
790
|
+
f.write(CelerySerialized.CONTENT_TYPE_JSON)
|
|
791
|
+
f.write(CelerySerialized.HEADER_END)
|
|
792
|
+
f.write(metadata_bytes)
|
|
793
|
+
f.write(b"\r\n")
|
|
794
|
+
|
|
795
|
+
for item in items:
|
|
796
|
+
f.write(boundary_line)
|
|
797
|
+
f.write(CelerySerialized.CONTENT_TYPE_BYTES)
|
|
798
|
+
f.write(CelerySerialized.HEADER_END)
|
|
799
|
+
if hasattr(item, "__aiter__"):
|
|
800
|
+
async for chunk in item:
|
|
801
|
+
f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
|
|
802
|
+
elif hasattr(item, "read"):
|
|
803
|
+
while True:
|
|
804
|
+
chunk = await item.read(65536)
|
|
805
|
+
if not chunk:
|
|
806
|
+
break
|
|
807
|
+
f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
|
|
808
|
+
elif isinstance(item, bytes):
|
|
809
|
+
f.write(item)
|
|
810
|
+
else:
|
|
811
|
+
raise TypeError(f"Unsupported part item type: {type(item)}")
|
|
812
|
+
f.write(b"\r\n")
|
|
813
|
+
|
|
814
|
+
f.write(end_boundary_line)
|
|
815
|
+
|
|
667
816
|
async def terminate_task(self, task_id):
|
|
668
817
|
def _terminate_task(celery_app, task_id):
|
|
669
818
|
celery_app.control.revoke(task_id, terminate=True)
|
|
@@ -717,8 +866,15 @@ class CeleryExecutor(object):
|
|
|
717
866
|
def _read_task_data(out_folder, task_id):
|
|
718
867
|
serialized_data = CelerySerialized(folder=out_folder, redis_client=self.redis_client)
|
|
719
868
|
serialized_data.set_task_id(task_id)
|
|
720
|
-
|
|
721
|
-
|
|
869
|
+
metadata, parts = serialized_data.load()
|
|
870
|
+
data = metadata.get("data", metadata)
|
|
871
|
+
part_count = metadata.get("part_count", 0)
|
|
872
|
+
if part_count > 0:
|
|
873
|
+
if metadata.get("part_is_list", part_count > 1):
|
|
874
|
+
return TaskResult(data=data, part=parts)
|
|
875
|
+
else:
|
|
876
|
+
return TaskResult(data=data, part=parts[0])
|
|
877
|
+
return data
|
|
722
878
|
|
|
723
879
|
def _remove_task_data(celery_app, in_folder, out_folder, task_id):
|
|
724
880
|
celery_app.AsyncResult(task_id).forget()
|
|
@@ -744,10 +900,10 @@ class CeleryExecutor(object):
|
|
|
744
900
|
)
|
|
745
901
|
return result
|
|
746
902
|
|
|
747
|
-
async def send_and_wait_task(self, task_fn, args=None, kwargs=None, timeout=60, **options):
|
|
903
|
+
async def send_and_wait_task(self, task_fn, args=None, kwargs=None, part=None, timeout=60, **options):
|
|
748
904
|
args = args if args is not None else []
|
|
749
905
|
kwargs = kwargs if kwargs is not None else {}
|
|
750
|
-
task_id = await self.send_task(task_fn, args, kwargs, **options)
|
|
906
|
+
task_id = await self.send_task(task_fn, args, kwargs, part=part, **options)
|
|
751
907
|
ready = False
|
|
752
908
|
state = None
|
|
753
909
|
start_time = time.time()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from fastapi import Request
|
|
2
|
+
from fastapi import Request
|
|
3
3
|
from fastapi.responses import JSONResponse, StreamingResponse
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
from typing import List, Optional, Literal, Any
|
|
@@ -17,8 +17,6 @@ import traceback
|
|
|
17
17
|
from copy import copy
|
|
18
18
|
import logging
|
|
19
19
|
|
|
20
|
-
from octostar.client import make_client
|
|
21
|
-
|
|
22
20
|
MAX_ERROR_MESSAGE_BYTES = 256
|
|
23
21
|
MAX_ERROR_TRACEBACK_BYTES = 10240
|
|
24
22
|
|
|
@@ -99,100 +97,6 @@ class Route(ABC):
|
|
|
99
97
|
return self
|
|
100
98
|
|
|
101
99
|
|
|
102
|
-
class OctostarRoute(Route):
|
|
103
|
-
def __init__(self, app, tasks_routes, celery_executor=None, router=None):
|
|
104
|
-
self.app = app
|
|
105
|
-
self._router = router
|
|
106
|
-
self.routed_funcs = []
|
|
107
|
-
self.tasks_routes = tasks_routes
|
|
108
|
-
self.celery_executor = celery_executor
|
|
109
|
-
self.endpoints = {}
|
|
110
|
-
self.define_routes()
|
|
111
|
-
|
|
112
|
-
def register_route(self, op, octostar_task):
|
|
113
|
-
self.endpoints[op.strip("/")] = octostar_task
|
|
114
|
-
|
|
115
|
-
def define_routes(self):
|
|
116
|
-
if self.celery_executor:
|
|
117
|
-
|
|
118
|
-
@Route.route(self, path="/task-state/{task_id}")
|
|
119
|
-
async def get_task_status(task_id: str) -> JSONResponse:
|
|
120
|
-
task_status = await self.tasks_routes.get_task(task_id, pop=False)
|
|
121
|
-
task_status = task_status.model_dump(mode="json")["data"]["task_state"]
|
|
122
|
-
return JSONResponse(task_status)
|
|
123
|
-
|
|
124
|
-
@Route.route(self, path="/task-result/{task_id}")
|
|
125
|
-
async def get_task_result(task_id: str) -> JSONResponse:
|
|
126
|
-
return_data = await self.tasks_routes.get_task(task_id, pop=True)
|
|
127
|
-
return_data = return_data.model_dump(mode="json")["data"]["data"]
|
|
128
|
-
return JSONResponse(return_data)
|
|
129
|
-
|
|
130
|
-
@Route.route(self, path="/{op}", methods=["POST"])
|
|
131
|
-
async def send_task(
|
|
132
|
-
op: str,
|
|
133
|
-
os_context: dict = Body(...),
|
|
134
|
-
jwt: str = Body(...),
|
|
135
|
-
params: dict = Body(dict()),
|
|
136
|
-
) -> str:
|
|
137
|
-
"""
|
|
138
|
-
Any request coming from Octostar (e.g. manifest) should enter from here.
|
|
139
|
-
"""
|
|
140
|
-
path_params = []
|
|
141
|
-
op = op.split("/")
|
|
142
|
-
if len(op) > 1:
|
|
143
|
-
path_params = op[1:]
|
|
144
|
-
op = op[0]
|
|
145
|
-
query_params = params
|
|
146
|
-
client = make_client(jwt)
|
|
147
|
-
if op not in self.endpoints.keys():
|
|
148
|
-
raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
|
|
149
|
-
task_id = await self.celery_executor.send_task(
|
|
150
|
-
self.endpoints[op], args=[os_context, client, query_params]
|
|
151
|
-
)
|
|
152
|
-
return task_id
|
|
153
|
-
|
|
154
|
-
else:
|
|
155
|
-
|
|
156
|
-
@Route.route(self, path="/{op}", methods=["POST"])
|
|
157
|
-
async def call_task(
|
|
158
|
-
op: str,
|
|
159
|
-
os_context: dict = Body(...),
|
|
160
|
-
jwt: str = Body(...),
|
|
161
|
-
params: dict = Body(dict()),
|
|
162
|
-
) -> str:
|
|
163
|
-
"""
|
|
164
|
-
Any request coming from Octostar (e.g. manifest) should enter from here.
|
|
165
|
-
"""
|
|
166
|
-
path_params = []
|
|
167
|
-
op = op.split("/")
|
|
168
|
-
if len(op) > 1:
|
|
169
|
-
path_params = op[1:]
|
|
170
|
-
op = op[0]
|
|
171
|
-
query_params = params
|
|
172
|
-
client = make_client(jwt)
|
|
173
|
-
if op not in self.endpoints.keys():
|
|
174
|
-
raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
|
|
175
|
-
result = await self.endpoints[op](os_context, client, query_params)
|
|
176
|
-
return result
|
|
177
|
-
|
|
178
|
-
@staticmethod
|
|
179
|
-
def octostar_task(celery_executor, *args, **opts):
|
|
180
|
-
def decorator(func):
|
|
181
|
-
if celery_executor:
|
|
182
|
-
serialized_func = celery_executor.serialized_io(func)
|
|
183
|
-
task_func = celery_executor.app.task(*args, **opts)(serialized_func)
|
|
184
|
-
else:
|
|
185
|
-
|
|
186
|
-
@wraps(func)
|
|
187
|
-
def octostar_func(*args, **kwargs):
|
|
188
|
-
return func(None, *args, **kwargs)
|
|
189
|
-
|
|
190
|
-
task_func = octostar_func
|
|
191
|
-
return task_func
|
|
192
|
-
|
|
193
|
-
return decorator
|
|
194
|
-
|
|
195
|
-
|
|
196
100
|
class CommonModels(object):
|
|
197
101
|
class OKResponseModel(BaseModel):
|
|
198
102
|
message: str = "OK"
|
|
@@ -1216,18 +1216,20 @@ class NifiRoute(Route):
|
|
|
1216
1216
|
op = op[0]
|
|
1217
1217
|
query_params = request.query_params
|
|
1218
1218
|
processor_suffix = query_params["processor_suffix"]
|
|
1219
|
-
body = await request.json()
|
|
1220
1219
|
processor_name = "processor." + self.processor_name + "." + op + "." + processor_suffix
|
|
1221
1220
|
if op not in self.endpoints.keys():
|
|
1222
1221
|
raise StarletteHTTPException(403, f"Route {op} is forbidden for NiFi.")
|
|
1223
|
-
task_id = await self.celery_executor.send_task(
|
|
1222
|
+
task_id = await self.celery_executor.send_task(
|
|
1223
|
+
self.endpoints[op], args=[processor_name], part=request.stream()
|
|
1224
|
+
)
|
|
1224
1225
|
return task_id
|
|
1225
1226
|
|
|
1226
1227
|
@staticmethod
|
|
1227
1228
|
def nifi_task(celery_executor, *args, **opts):
|
|
1228
1229
|
def decorator(func):
|
|
1229
1230
|
@wraps(func)
|
|
1230
|
-
def nifi_func(task,
|
|
1231
|
+
def nifi_func(task, body_bytes, processor_name, *args, **kwargs):
|
|
1232
|
+
body = json.loads(body_bytes)
|
|
1231
1233
|
with NifiContextManager(body) as nifi_context:
|
|
1232
1234
|
entity_batches = nifi_context.receive_input(body, processor_name)
|
|
1233
1235
|
entity_batches = func(
|
|
@@ -15,13 +15,13 @@ def get_label_keys(type, ontology):
|
|
|
15
15
|
return list(label_keys.keys())
|
|
16
16
|
|
|
17
17
|
def get_label(record, ontology):
|
|
18
|
-
if record.get("
|
|
19
|
-
return str(record.get("
|
|
20
|
-
if record.get("
|
|
21
|
-
return str(record.get("
|
|
18
|
+
if record.get("os_entity_label_materialized") not in (None, ""):
|
|
19
|
+
return str(record.get("os_entity_label_materialized")).strip()
|
|
20
|
+
if record.get("os_entity_label") not in (None, ""):
|
|
21
|
+
return str(record.get("os_entity_label")).strip()
|
|
22
22
|
label_keys = get_label_keys(record["entity_type"], ontology)
|
|
23
23
|
fields = [record.get(field) for field in label_keys]
|
|
24
24
|
fields = [f for f in fields if f is not None]
|
|
25
25
|
fields = [str(f) for f in fields if f]
|
|
26
26
|
label = " ".join(fields)
|
|
27
|
-
return label or None
|
|
27
|
+
return label.strip() or None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|