streamlit-octostar-utils 0.4.2.dev24__tar.gz → 0.5.0.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/celery.py +217 -52
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/fastapi.py +1 -97
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/nifi.py +5 -3
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/inheritance.py +5 -5
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/LICENSE +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/README.md +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/relationships.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.4.2.dev24 → streamlit_octostar_utils-0.5.0.dev1}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
|
@@ -9,7 +9,6 @@ import subprocess
|
|
|
9
9
|
from fastapi import Query
|
|
10
10
|
import time
|
|
11
11
|
import os
|
|
12
|
-
import pickle
|
|
13
12
|
import atexit
|
|
14
13
|
import redis
|
|
15
14
|
import uuid
|
|
@@ -68,7 +67,31 @@ class CeleryQueueConfig:
|
|
|
68
67
|
self.options = options
|
|
69
68
|
|
|
70
69
|
|
|
70
|
+
class TaskResult:
|
|
71
|
+
"""Wrapper for task results that include binary parts alongside JSON data.
|
|
72
|
+
Tasks returning binary data (e.g. images) should return a TaskResult
|
|
73
|
+
so that serialized_io writes them as multipart parts instead of attempting
|
|
74
|
+
JSON serialization on bytes."""
|
|
75
|
+
|
|
76
|
+
def __init__(self, data, part=None):
|
|
77
|
+
self.data = data
|
|
78
|
+
self.part = part
|
|
79
|
+
|
|
80
|
+
|
|
71
81
|
class CelerySerialized:
|
|
82
|
+
"""Serializes task data to a boundary-delimited multipart file.
|
|
83
|
+
|
|
84
|
+
Format: metadata JSON part followed by optional binary/streamed parts,
|
|
85
|
+
separated by boundary markers (the task_id). Replaces pickle entirely.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
BOUNDARY_PREFIX = b"--"
|
|
89
|
+
BOUNDARY_SUFFIX = b"\r\n"
|
|
90
|
+
BOUNDARY_END = b"--\r\n"
|
|
91
|
+
CONTENT_TYPE_JSON = b"Content-Type: application/json\r\n"
|
|
92
|
+
CONTENT_TYPE_BYTES = b"Content-Type: application/octet-stream\r\n"
|
|
93
|
+
HEADER_END = b"\r\n"
|
|
94
|
+
|
|
72
95
|
def __init__(self, folder, redis_client, data=None):
|
|
73
96
|
self.folder = folder
|
|
74
97
|
self.data = data
|
|
@@ -77,18 +100,70 @@ class CelerySerialized:
|
|
|
77
100
|
def set_task_id(self, task_id):
|
|
78
101
|
self.task_id = task_id
|
|
79
102
|
|
|
80
|
-
def
|
|
103
|
+
def _boundary(self):
|
|
104
|
+
return self.task_id.encode()
|
|
105
|
+
|
|
106
|
+
def _write_boundary(self, f):
|
|
107
|
+
f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_SUFFIX)
|
|
108
|
+
|
|
109
|
+
def _write_end_boundary(self, f):
|
|
110
|
+
f.write(self.BOUNDARY_PREFIX + self._boundary() + self.BOUNDARY_END)
|
|
111
|
+
|
|
112
|
+
def dump(self, parts=None, part_is_list=False):
|
|
113
|
+
"""Write metadata + optional parts in multipart format.
|
|
114
|
+
parts: optional list of bytes objects to write as additional parts.
|
|
115
|
+
part_is_list: whether the original part was a list (preserves type on read).
|
|
116
|
+
"""
|
|
81
117
|
assert self.task_id
|
|
118
|
+
if isinstance(self.data, dict):
|
|
119
|
+
metadata = self.data
|
|
120
|
+
else:
|
|
121
|
+
metadata = {"data": self.data}
|
|
122
|
+
metadata["part_count"] = len(parts) if parts else 0
|
|
123
|
+
metadata["part_is_list"] = part_is_list
|
|
82
124
|
with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
|
|
83
|
-
with open(os.path.join(self.folder, self.task_id), "wb") as
|
|
84
|
-
|
|
125
|
+
with open(os.path.join(self.folder, self.task_id), "wb") as f:
|
|
126
|
+
self._write_boundary(f)
|
|
127
|
+
f.write(self.CONTENT_TYPE_JSON)
|
|
128
|
+
f.write(self.HEADER_END)
|
|
129
|
+
f.write(json.dumps(metadata).encode())
|
|
130
|
+
f.write(b"\r\n")
|
|
131
|
+
if parts:
|
|
132
|
+
for part in parts:
|
|
133
|
+
self._write_boundary(f)
|
|
134
|
+
f.write(self.CONTENT_TYPE_BYTES)
|
|
135
|
+
f.write(self.HEADER_END)
|
|
136
|
+
f.write(part)
|
|
137
|
+
f.write(b"\r\n")
|
|
138
|
+
self._write_end_boundary(f)
|
|
85
139
|
|
|
86
140
|
def load(self):
|
|
141
|
+
"""Read multipart file. Returns (metadata_dict, list_of_bytes_parts)."""
|
|
87
142
|
assert self.task_id
|
|
143
|
+
boundary = self.BOUNDARY_PREFIX + self._boundary()
|
|
144
|
+
end_boundary = self.BOUNDARY_PREFIX + self._boundary() + b"--"
|
|
88
145
|
with RedisFileLock(self.redis_client, os.path.join(self.folder, self.task_id)):
|
|
89
|
-
with open(os.path.join(self.folder, self.task_id), "rb") as
|
|
90
|
-
|
|
91
|
-
|
|
146
|
+
with open(os.path.join(self.folder, self.task_id), "rb") as f:
|
|
147
|
+
raw = f.read()
|
|
148
|
+
sections = raw.split(boundary)
|
|
149
|
+
metadata = None
|
|
150
|
+
parts = []
|
|
151
|
+
for section in sections:
|
|
152
|
+
section = section.strip(b"\r\n")
|
|
153
|
+
if not section or section == b"--":
|
|
154
|
+
continue
|
|
155
|
+
header_end = section.find(b"\r\n\r\n")
|
|
156
|
+
if header_end == -1:
|
|
157
|
+
continue
|
|
158
|
+
header = section[:header_end]
|
|
159
|
+
body = section[header_end + 4:]
|
|
160
|
+
if body.endswith(b"\r\n"):
|
|
161
|
+
body = body[:-2]
|
|
162
|
+
if b"application/json" in header:
|
|
163
|
+
metadata = json.loads(body)
|
|
164
|
+
else:
|
|
165
|
+
parts.append(body)
|
|
166
|
+
return metadata or {}, parts
|
|
92
167
|
|
|
93
168
|
|
|
94
169
|
class CeleryExecutor(object):
|
|
@@ -129,7 +204,10 @@ class CeleryExecutor(object):
|
|
|
129
204
|
self.get_thread_pool = None
|
|
130
205
|
self.set_thread_pool = None
|
|
131
206
|
self.io_thread_pool = None
|
|
132
|
-
self.
|
|
207
|
+
self.queue_semaphores = {
|
|
208
|
+
k: threading.Semaphore(v.max_tasks_in_queue) if v.max_tasks_in_queue else None
|
|
209
|
+
for k, v in self.queue_config.items()
|
|
210
|
+
}
|
|
133
211
|
|
|
134
212
|
# Folder setup
|
|
135
213
|
self.base_folder = Path(base_folder).resolve()
|
|
@@ -520,9 +598,16 @@ class CeleryExecutor(object):
|
|
|
520
598
|
task_id = task.request.id
|
|
521
599
|
serialized_data = CelerySerialized(folder=self.in_folder, redis_client=self.redis_client)
|
|
522
600
|
serialized_data.set_task_id(task_id)
|
|
523
|
-
|
|
601
|
+
metadata, parts = serialized_data.load()
|
|
524
602
|
del serialized_data
|
|
525
|
-
args, kwargs =
|
|
603
|
+
args, kwargs = metadata.get("args", []), metadata.get("kwargs", {})
|
|
604
|
+
|
|
605
|
+
part_count = metadata.get("part_count", 0)
|
|
606
|
+
if part_count > 0:
|
|
607
|
+
if metadata.get("part_is_list", part_count > 1):
|
|
608
|
+
args = [parts] + args
|
|
609
|
+
else:
|
|
610
|
+
args = [parts[0]] + args
|
|
526
611
|
|
|
527
612
|
if self.app.conf.task_always_eager:
|
|
528
613
|
queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
|
|
@@ -533,9 +618,25 @@ class CeleryExecutor(object):
|
|
|
533
618
|
queue = task.request.delivery_info.get("routing_key", self.app.conf.task_default_routing_key)
|
|
534
619
|
task.request.resources = (self.resource_registry or {}).get(queue, {})
|
|
535
620
|
out_data = task_fn(task, *args, **kwargs)
|
|
536
|
-
|
|
621
|
+
if isinstance(out_data, TaskResult):
|
|
622
|
+
out_part_is_list = isinstance(out_data.part, list)
|
|
623
|
+
if out_data.part is None:
|
|
624
|
+
out_parts = None
|
|
625
|
+
elif out_part_is_list:
|
|
626
|
+
out_parts = out_data.part
|
|
627
|
+
else:
|
|
628
|
+
out_parts = [out_data.part]
|
|
629
|
+
serialized_data = CelerySerialized(
|
|
630
|
+
folder=self.out_folder, data=out_data.data, redis_client=self.redis_client
|
|
631
|
+
)
|
|
632
|
+
else:
|
|
633
|
+
out_parts = None
|
|
634
|
+
out_part_is_list = False
|
|
635
|
+
serialized_data = CelerySerialized(
|
|
636
|
+
folder=self.out_folder, data=out_data, redis_client=self.redis_client
|
|
637
|
+
)
|
|
537
638
|
serialized_data.set_task_id(task_id)
|
|
538
|
-
serialized_data.dump()
|
|
639
|
+
serialized_data.dump(parts=out_parts, part_is_list=out_part_is_list)
|
|
539
640
|
del serialized_data
|
|
540
641
|
if os.path.isfile(os.path.join(self.in_folder, task_id)):
|
|
541
642
|
with RedisFileLock(self.redis_client, os.path.join(self.in_folder, task_id)):
|
|
@@ -576,36 +677,23 @@ class CeleryExecutor(object):
|
|
|
576
677
|
|
|
577
678
|
return decorator
|
|
578
679
|
|
|
579
|
-
async def send_task(self, task_fn, args=
|
|
680
|
+
async def send_task(self, task_fn, args=None, kwargs=None, part=None, **options) -> str:
|
|
681
|
+
args = args if args is not None else []
|
|
682
|
+
kwargs = kwargs if kwargs is not None else {}
|
|
580
683
|
if self.app.conf.task_always_eager and "dev_preload" not in self.app.conf:
|
|
581
684
|
self.preload_on_worker_init()
|
|
582
685
|
self.app.conf.dev_preload = True
|
|
583
686
|
|
|
584
|
-
def
|
|
687
|
+
def _check_queue_llen(queue_name):
|
|
585
688
|
if self._queue_stalled.get(queue_name, False):
|
|
586
689
|
raise CeleryExecutor.QueueStalledException(
|
|
587
690
|
f"Queue '{queue_name}' is stalled. Service temporarily unavailable."
|
|
588
691
|
)
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
reserved_count = int(self.redis_client.get(reservation_key) or 0)
|
|
595
|
-
total_count = queue_count + reserved_count
|
|
596
|
-
if total_count >= limit:
|
|
597
|
-
raise CeleryExecutor.QueueFullException(
|
|
598
|
-
f"Queue '{queue_name}' has reached its limit of {limit} tasks!"
|
|
599
|
-
)
|
|
600
|
-
self.redis_client.incr(reservation_key)
|
|
601
|
-
return True
|
|
602
|
-
return False
|
|
603
|
-
|
|
604
|
-
def _release_queue_slot(queue_name):
|
|
605
|
-
limit = self.queue_config[queue_name].max_tasks_in_queue
|
|
606
|
-
if limit:
|
|
607
|
-
reservation_key = f"queue:reserved:{queue_name}"
|
|
608
|
-
self.redis_client.decr(reservation_key)
|
|
692
|
+
if self.redis_client.llen(queue_name) >= self.queue_config[queue_name].max_tasks_in_queue:
|
|
693
|
+
raise CeleryExecutor.QueueFullException(
|
|
694
|
+
f"Queue '{queue_name}' has reached its limit of "
|
|
695
|
+
f"{self.queue_config[queue_name].max_tasks_in_queue} tasks!"
|
|
696
|
+
)
|
|
609
697
|
|
|
610
698
|
def _write_task_data(in_folder, task_args, task_kwargs, task_id):
|
|
611
699
|
serialized_data = CelerySerialized(
|
|
@@ -629,19 +717,37 @@ class CeleryExecutor(object):
|
|
|
629
717
|
queue_name = self.app.conf.task_default_routing_key
|
|
630
718
|
queue_name = getattr(task_fn, "queue", queue_name)
|
|
631
719
|
queue_name = options.get("queue", queue_name)
|
|
632
|
-
|
|
720
|
+
|
|
721
|
+
sem = self.queue_semaphores.get(queue_name)
|
|
722
|
+
acquired = False
|
|
723
|
+
if sem is not None:
|
|
724
|
+
if not sem.acquire(blocking=False):
|
|
725
|
+
raise CeleryExecutor.QueueFullException(
|
|
726
|
+
f"Queue '{queue_name}' has reached its limit of "
|
|
727
|
+
f"{self.queue_config[queue_name].max_tasks_in_queue} tasks!"
|
|
728
|
+
)
|
|
729
|
+
acquired = True
|
|
730
|
+
|
|
633
731
|
try:
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
self.
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
732
|
+
if acquired:
|
|
733
|
+
await asyncio.get_running_loop().run_in_executor(
|
|
734
|
+
self.set_thread_pool, _check_queue_llen, queue_name
|
|
735
|
+
)
|
|
736
|
+
|
|
737
|
+
if part is not None:
|
|
738
|
+
await self._write_task_data_with_part(
|
|
739
|
+
task_id, args, kwargs, part
|
|
740
|
+
)
|
|
741
|
+
else:
|
|
742
|
+
await asyncio.get_running_loop().run_in_executor(
|
|
743
|
+
self.io_thread_pool,
|
|
744
|
+
_write_task_data,
|
|
745
|
+
self.in_folder,
|
|
746
|
+
args,
|
|
747
|
+
kwargs,
|
|
748
|
+
task_id,
|
|
749
|
+
)
|
|
750
|
+
|
|
645
751
|
await asyncio.get_running_loop().run_in_executor(
|
|
646
752
|
self.set_thread_pool, _send_task, task_fn, task_id, options
|
|
647
753
|
)
|
|
@@ -652,11 +758,61 @@ class CeleryExecutor(object):
|
|
|
652
758
|
logger.info(f"Cancelling task {task_id} due to disconnect!")
|
|
653
759
|
await self.terminate_task(task_id)
|
|
654
760
|
raise
|
|
761
|
+
except Exception:
|
|
762
|
+
try:
|
|
763
|
+
await self.terminate_task(task_id)
|
|
764
|
+
except Exception:
|
|
765
|
+
pass
|
|
766
|
+
raise
|
|
655
767
|
finally:
|
|
656
|
-
if
|
|
657
|
-
|
|
768
|
+
if acquired:
|
|
769
|
+
sem.release()
|
|
658
770
|
return task_id
|
|
659
771
|
|
|
772
|
+
async def _write_task_data_with_part(self, task_id, args, kwargs, part):
|
|
773
|
+
"""Write task data with a streamed part to the multipart file.
|
|
774
|
+
The part becomes the first arg on the worker side.
|
|
775
|
+
"""
|
|
776
|
+
boundary = CelerySerialized.BOUNDARY_PREFIX + task_id.encode()
|
|
777
|
+
boundary_line = boundary + CelerySerialized.BOUNDARY_SUFFIX
|
|
778
|
+
end_boundary_line = boundary + CelerySerialized.BOUNDARY_END
|
|
779
|
+
|
|
780
|
+
is_list = isinstance(part, list)
|
|
781
|
+
items = part if is_list else [part]
|
|
782
|
+
part_count = len(items)
|
|
783
|
+
|
|
784
|
+
metadata = {"args": args, "kwargs": kwargs, "part_count": part_count, "part_is_list": is_list}
|
|
785
|
+
metadata_bytes = json.dumps(metadata).encode()
|
|
786
|
+
|
|
787
|
+
file_path = os.path.join(self.in_folder, task_id)
|
|
788
|
+
with open(file_path, "wb") as f:
|
|
789
|
+
f.write(boundary_line)
|
|
790
|
+
f.write(CelerySerialized.CONTENT_TYPE_JSON)
|
|
791
|
+
f.write(CelerySerialized.HEADER_END)
|
|
792
|
+
f.write(metadata_bytes)
|
|
793
|
+
f.write(b"\r\n")
|
|
794
|
+
|
|
795
|
+
for item in items:
|
|
796
|
+
f.write(boundary_line)
|
|
797
|
+
f.write(CelerySerialized.CONTENT_TYPE_BYTES)
|
|
798
|
+
f.write(CelerySerialized.HEADER_END)
|
|
799
|
+
if hasattr(item, "__aiter__"):
|
|
800
|
+
async for chunk in item:
|
|
801
|
+
f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
|
|
802
|
+
elif hasattr(item, "read"):
|
|
803
|
+
while True:
|
|
804
|
+
chunk = await item.read(65536)
|
|
805
|
+
if not chunk:
|
|
806
|
+
break
|
|
807
|
+
f.write(chunk if isinstance(chunk, bytes) else chunk.encode())
|
|
808
|
+
elif isinstance(item, bytes):
|
|
809
|
+
f.write(item)
|
|
810
|
+
else:
|
|
811
|
+
raise TypeError(f"Unsupported part item type: {type(item)}")
|
|
812
|
+
f.write(b"\r\n")
|
|
813
|
+
|
|
814
|
+
f.write(end_boundary_line)
|
|
815
|
+
|
|
660
816
|
async def terminate_task(self, task_id):
|
|
661
817
|
def _terminate_task(celery_app, task_id):
|
|
662
818
|
celery_app.control.revoke(task_id, terminate=True)
|
|
@@ -710,8 +866,15 @@ class CeleryExecutor(object):
|
|
|
710
866
|
def _read_task_data(out_folder, task_id):
|
|
711
867
|
serialized_data = CelerySerialized(folder=out_folder, redis_client=self.redis_client)
|
|
712
868
|
serialized_data.set_task_id(task_id)
|
|
713
|
-
|
|
714
|
-
|
|
869
|
+
metadata, parts = serialized_data.load()
|
|
870
|
+
data = metadata.get("data", metadata)
|
|
871
|
+
part_count = metadata.get("part_count", 0)
|
|
872
|
+
if part_count > 0:
|
|
873
|
+
if metadata.get("part_is_list", part_count > 1):
|
|
874
|
+
return TaskResult(data=data, part=parts)
|
|
875
|
+
else:
|
|
876
|
+
return TaskResult(data=data, part=parts[0])
|
|
877
|
+
return data
|
|
715
878
|
|
|
716
879
|
def _remove_task_data(celery_app, in_folder, out_folder, task_id):
|
|
717
880
|
celery_app.AsyncResult(task_id).forget()
|
|
@@ -737,8 +900,10 @@ class CeleryExecutor(object):
|
|
|
737
900
|
)
|
|
738
901
|
return result
|
|
739
902
|
|
|
740
|
-
async def send_and_wait_task(self, task_fn, args=
|
|
741
|
-
|
|
903
|
+
async def send_and_wait_task(self, task_fn, args=None, kwargs=None, part=None, timeout=60, **options):
|
|
904
|
+
args = args if args is not None else []
|
|
905
|
+
kwargs = kwargs if kwargs is not None else {}
|
|
906
|
+
task_id = await self.send_task(task_fn, args, kwargs, part=part, **options)
|
|
742
907
|
ready = False
|
|
743
908
|
state = None
|
|
744
909
|
start_time = time.time()
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from fastapi import Request
|
|
2
|
+
from fastapi import Request
|
|
3
3
|
from fastapi.responses import JSONResponse, StreamingResponse
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
from typing import List, Optional, Literal, Any
|
|
@@ -17,8 +17,6 @@ import traceback
|
|
|
17
17
|
from copy import copy
|
|
18
18
|
import logging
|
|
19
19
|
|
|
20
|
-
from octostar.client import make_client
|
|
21
|
-
|
|
22
20
|
MAX_ERROR_MESSAGE_BYTES = 256
|
|
23
21
|
MAX_ERROR_TRACEBACK_BYTES = 10240
|
|
24
22
|
|
|
@@ -99,100 +97,6 @@ class Route(ABC):
|
|
|
99
97
|
return self
|
|
100
98
|
|
|
101
99
|
|
|
102
|
-
class OctostarRoute(Route):
|
|
103
|
-
def __init__(self, app, tasks_routes, celery_executor=None, router=None):
|
|
104
|
-
self.app = app
|
|
105
|
-
self._router = router
|
|
106
|
-
self.routed_funcs = []
|
|
107
|
-
self.tasks_routes = tasks_routes
|
|
108
|
-
self.celery_executor = celery_executor
|
|
109
|
-
self.endpoints = {}
|
|
110
|
-
self.define_routes()
|
|
111
|
-
|
|
112
|
-
def register_route(self, op, octostar_task):
|
|
113
|
-
self.endpoints[op.strip("/")] = octostar_task
|
|
114
|
-
|
|
115
|
-
def define_routes(self):
|
|
116
|
-
if self.celery_executor:
|
|
117
|
-
|
|
118
|
-
@Route.route(self, path="/task-state/{task_id}")
|
|
119
|
-
async def get_task_status(task_id: str) -> JSONResponse:
|
|
120
|
-
task_status = await self.tasks_routes.get_task(task_id, pop=False)
|
|
121
|
-
task_status = task_status.model_dump(mode="json")["data"]["task_state"]
|
|
122
|
-
return JSONResponse(task_status)
|
|
123
|
-
|
|
124
|
-
@Route.route(self, path="/task-result/{task_id}")
|
|
125
|
-
async def get_task_result(task_id: str) -> JSONResponse:
|
|
126
|
-
return_data = await self.tasks_routes.get_task(task_id, pop=True)
|
|
127
|
-
return_data = return_data.model_dump(mode="json")["data"]["data"]
|
|
128
|
-
return JSONResponse(return_data)
|
|
129
|
-
|
|
130
|
-
@Route.route(self, path="/{op}", methods=["POST"])
|
|
131
|
-
async def send_task(
|
|
132
|
-
op: str,
|
|
133
|
-
os_context: dict = Body(...),
|
|
134
|
-
jwt: str = Body(...),
|
|
135
|
-
params: dict = Body(dict()),
|
|
136
|
-
) -> str:
|
|
137
|
-
"""
|
|
138
|
-
Any request coming from Octostar (e.g. manifest) should enter from here.
|
|
139
|
-
"""
|
|
140
|
-
path_params = []
|
|
141
|
-
op = op.split("/")
|
|
142
|
-
if len(op) > 1:
|
|
143
|
-
path_params = op[1:]
|
|
144
|
-
op = op[0]
|
|
145
|
-
query_params = params
|
|
146
|
-
client = make_client(jwt)
|
|
147
|
-
if op not in self.endpoints.keys():
|
|
148
|
-
raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
|
|
149
|
-
task_id = await self.celery_executor.send_task(
|
|
150
|
-
self.endpoints[op], args=[os_context, client, query_params]
|
|
151
|
-
)
|
|
152
|
-
return task_id
|
|
153
|
-
|
|
154
|
-
else:
|
|
155
|
-
|
|
156
|
-
@Route.route(self, path="/{op}", methods=["POST"])
|
|
157
|
-
async def call_task(
|
|
158
|
-
op: str,
|
|
159
|
-
os_context: dict = Body(...),
|
|
160
|
-
jwt: str = Body(...),
|
|
161
|
-
params: dict = Body(dict()),
|
|
162
|
-
) -> str:
|
|
163
|
-
"""
|
|
164
|
-
Any request coming from Octostar (e.g. manifest) should enter from here.
|
|
165
|
-
"""
|
|
166
|
-
path_params = []
|
|
167
|
-
op = op.split("/")
|
|
168
|
-
if len(op) > 1:
|
|
169
|
-
path_params = op[1:]
|
|
170
|
-
op = op[0]
|
|
171
|
-
query_params = params
|
|
172
|
-
client = make_client(jwt)
|
|
173
|
-
if op not in self.endpoints.keys():
|
|
174
|
-
raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
|
|
175
|
-
result = await self.endpoints[op](os_context, client, query_params)
|
|
176
|
-
return result
|
|
177
|
-
|
|
178
|
-
@staticmethod
|
|
179
|
-
def octostar_task(celery_executor, *args, **opts):
|
|
180
|
-
def decorator(func):
|
|
181
|
-
if celery_executor:
|
|
182
|
-
serialized_func = celery_executor.serialized_io(func)
|
|
183
|
-
task_func = celery_executor.app.task(*args, **opts)(serialized_func)
|
|
184
|
-
else:
|
|
185
|
-
|
|
186
|
-
@wraps(func)
|
|
187
|
-
def octostar_func(*args, **kwargs):
|
|
188
|
-
return func(None, *args, **kwargs)
|
|
189
|
-
|
|
190
|
-
task_func = octostar_func
|
|
191
|
-
return task_func
|
|
192
|
-
|
|
193
|
-
return decorator
|
|
194
|
-
|
|
195
|
-
|
|
196
100
|
class CommonModels(object):
|
|
197
101
|
class OKResponseModel(BaseModel):
|
|
198
102
|
message: str = "OK"
|
|
@@ -1216,18 +1216,20 @@ class NifiRoute(Route):
|
|
|
1216
1216
|
op = op[0]
|
|
1217
1217
|
query_params = request.query_params
|
|
1218
1218
|
processor_suffix = query_params["processor_suffix"]
|
|
1219
|
-
body = await request.json()
|
|
1220
1219
|
processor_name = "processor." + self.processor_name + "." + op + "." + processor_suffix
|
|
1221
1220
|
if op not in self.endpoints.keys():
|
|
1222
1221
|
raise StarletteHTTPException(403, f"Route {op} is forbidden for NiFi.")
|
|
1223
|
-
task_id = await self.celery_executor.send_task(
|
|
1222
|
+
task_id = await self.celery_executor.send_task(
|
|
1223
|
+
self.endpoints[op], args=[processor_name], part=request.stream()
|
|
1224
|
+
)
|
|
1224
1225
|
return task_id
|
|
1225
1226
|
|
|
1226
1227
|
@staticmethod
|
|
1227
1228
|
def nifi_task(celery_executor, *args, **opts):
|
|
1228
1229
|
def decorator(func):
|
|
1229
1230
|
@wraps(func)
|
|
1230
|
-
def nifi_func(task,
|
|
1231
|
+
def nifi_func(task, body_bytes, processor_name, *args, **kwargs):
|
|
1232
|
+
body = json.loads(body_bytes)
|
|
1231
1233
|
with NifiContextManager(body) as nifi_context:
|
|
1232
1234
|
entity_batches = nifi_context.receive_input(body, processor_name)
|
|
1233
1235
|
entity_batches = func(
|
|
@@ -15,13 +15,13 @@ def get_label_keys(type, ontology):
|
|
|
15
15
|
return list(label_keys.keys())
|
|
16
16
|
|
|
17
17
|
def get_label(record, ontology):
|
|
18
|
-
if record.get("
|
|
19
|
-
return str(record.get("
|
|
20
|
-
if record.get("
|
|
21
|
-
return str(record.get("
|
|
18
|
+
if record.get("os_entity_label_materialized") not in (None, ""):
|
|
19
|
+
return str(record.get("os_entity_label_materialized")).strip()
|
|
20
|
+
if record.get("os_entity_label") not in (None, ""):
|
|
21
|
+
return str(record.get("os_entity_label")).strip()
|
|
22
22
|
label_keys = get_label_keys(record["entity_type"], ontology)
|
|
23
23
|
fields = [record.get(field) for field in label_keys]
|
|
24
24
|
fields = [f for f in fields if f is not None]
|
|
25
25
|
fields = [str(f) for f in fields if f]
|
|
26
26
|
label = " ".join(fields)
|
|
27
|
-
return label or None
|
|
27
|
+
return label.strip() or None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|