streamlit-octostar-utils 0.5.5.dev1__tar.gz → 0.5.6.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/nifi.py +16 -218
- streamlit_octostar_utils-0.5.6.dev2/streamlit_octostar_utils/core/opensearch_conversion.py +354 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/LICENSE +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/README.md +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/nlp/language.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/ontology/relationships.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.5.5.dev1 → streamlit_octostar_utils-0.5.6.dev2}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
|
@@ -17,7 +17,6 @@ from starlette.exceptions import HTTPException as StarletteHTTPException
|
|
|
17
17
|
|
|
18
18
|
from octostar.utils.workspace import upsert_entities
|
|
19
19
|
from octostar.utils.ontology import fetch_ontology_data
|
|
20
|
-
from octostar.utils.workspace.permissions import get_permissions, PermissionLevel
|
|
21
20
|
from octostar.utils.pipeline import update_processing_status
|
|
22
21
|
|
|
23
22
|
from octostar.client import make_client
|
|
@@ -54,144 +53,6 @@ OS_RESERVED_FIELDS = [
|
|
|
54
53
|
MAX_IN_MEMORY_SIZE_BYTES = 5_242_880
|
|
55
54
|
|
|
56
55
|
|
|
57
|
-
class NifiPriority:
|
|
58
|
-
COMPONENT_WIDTHS = (10, 10, 1, 20)
|
|
59
|
-
SEPARATOR = "."
|
|
60
|
-
MAX_FRAGMENT_DEPTH = 9
|
|
61
|
-
BASE62_CHARS = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
|
|
62
|
-
BASE = len(BASE62_CHARS)
|
|
63
|
-
_BASE62_SET = frozenset(BASE62_CHARS)
|
|
64
|
-
|
|
65
|
-
@staticmethod
|
|
66
|
-
def _normalize_component(value, width):
|
|
67
|
-
if value is None:
|
|
68
|
-
return "z" * width
|
|
69
|
-
s = str(value)
|
|
70
|
-
invalid = set(s) - NifiPriority._BASE62_SET
|
|
71
|
-
if invalid:
|
|
72
|
-
raise ValueError(f"Invalid base62 character(s): {sorted(invalid)!r}")
|
|
73
|
-
if len(s) > width:
|
|
74
|
-
return "z" * width
|
|
75
|
-
return s.rjust(width, "0")
|
|
76
|
-
|
|
77
|
-
def __init__(self, op_reserved=None, user_prio=None, fragment_prio=0, entity_timestamp=0):
|
|
78
|
-
w = self.COMPONENT_WIDTHS
|
|
79
|
-
self.op_reserved = self._normalize_component(op_reserved, w[0])
|
|
80
|
-
self.user_prio = self._normalize_component(user_prio, w[1])
|
|
81
|
-
self.fragment_prio = int(fragment_prio)
|
|
82
|
-
self.entity_timestamp = int(entity_timestamp)
|
|
83
|
-
|
|
84
|
-
@staticmethod
|
|
85
|
-
def _encode_base62(value, width):
|
|
86
|
-
chars = NifiPriority.BASE62_CHARS
|
|
87
|
-
base = NifiPriority.BASE
|
|
88
|
-
if value < 0:
|
|
89
|
-
value = 0
|
|
90
|
-
if value == 0:
|
|
91
|
-
return "0" * width
|
|
92
|
-
result = []
|
|
93
|
-
v = value
|
|
94
|
-
while v > 0:
|
|
95
|
-
result.append(chars[v % base])
|
|
96
|
-
v //= base
|
|
97
|
-
result.reverse()
|
|
98
|
-
s = "".join(result)
|
|
99
|
-
if len(s) > width:
|
|
100
|
-
return chars[-1] * width
|
|
101
|
-
return s.rjust(width, "0")
|
|
102
|
-
|
|
103
|
-
@staticmethod
|
|
104
|
-
def _decode_base62(s):
|
|
105
|
-
chars = NifiPriority.BASE62_CHARS
|
|
106
|
-
result = 0
|
|
107
|
-
for c in s:
|
|
108
|
-
idx = chars.find(c)
|
|
109
|
-
if idx < 0:
|
|
110
|
-
raise ValueError(f"Invalid base62 character: {c!r}")
|
|
111
|
-
result = result * NifiPriority.BASE + idx
|
|
112
|
-
return result
|
|
113
|
-
|
|
114
|
-
@classmethod
|
|
115
|
-
def from_string(cls, s):
|
|
116
|
-
if not s or cls.SEPARATOR not in s:
|
|
117
|
-
return cls()
|
|
118
|
-
parts = s.split(cls.SEPARATOR)
|
|
119
|
-
if len(parts) != 4:
|
|
120
|
-
return cls()
|
|
121
|
-
try:
|
|
122
|
-
for part in parts:
|
|
123
|
-
if set(part) - cls._BASE62_SET:
|
|
124
|
-
return cls()
|
|
125
|
-
return cls(
|
|
126
|
-
op_reserved=parts[0],
|
|
127
|
-
user_prio=parts[1],
|
|
128
|
-
fragment_prio=cls._decode_base62(parts[2]),
|
|
129
|
-
entity_timestamp=cls._decode_base62(parts[3]),
|
|
130
|
-
)
|
|
131
|
-
except (ValueError, TypeError):
|
|
132
|
-
return cls()
|
|
133
|
-
|
|
134
|
-
@classmethod
|
|
135
|
-
def from_dict(cls, d):
|
|
136
|
-
if not d:
|
|
137
|
-
return cls()
|
|
138
|
-
return cls(
|
|
139
|
-
op_reserved=d.get("op_reserved"),
|
|
140
|
-
user_prio=d.get("user_prio"),
|
|
141
|
-
fragment_prio=d.get("fragment_prio", 0),
|
|
142
|
-
entity_timestamp=d.get("entity_timestamp", 0),
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
@classmethod
|
|
146
|
-
def from_entity(cls, entity):
|
|
147
|
-
existing = entity.request.get("priority", {})
|
|
148
|
-
prio = cls.from_dict(existing)
|
|
149
|
-
prio.fragment_prio = cls.compute_fragment_depth(entity)
|
|
150
|
-
ts_str = entity.request.get("entity_timestamp")
|
|
151
|
-
if ts_str:
|
|
152
|
-
try:
|
|
153
|
-
dt = string_to_datetime(ts_str)
|
|
154
|
-
prio.entity_timestamp = int(dt.timestamp() * 1000)
|
|
155
|
-
except Exception:
|
|
156
|
-
prio.entity_timestamp = 0
|
|
157
|
-
else:
|
|
158
|
-
prio.entity_timestamp = 0
|
|
159
|
-
return prio
|
|
160
|
-
|
|
161
|
-
@staticmethod
|
|
162
|
-
def compute_fragment_depth(entity):
|
|
163
|
-
stack = entity.request.get("config", {}).get("fragment", {}).get("fragments_stack", [])
|
|
164
|
-
depth = 0
|
|
165
|
-
for key in stack:
|
|
166
|
-
try:
|
|
167
|
-
info = NifiFragmenter.get_fragment_info(entity, key)
|
|
168
|
-
if info.get("index", 0) != 0:
|
|
169
|
-
depth += 1
|
|
170
|
-
else:
|
|
171
|
-
break
|
|
172
|
-
except (KeyError, RuntimeError):
|
|
173
|
-
break
|
|
174
|
-
return min(depth, 9)
|
|
175
|
-
|
|
176
|
-
def to_string(self):
|
|
177
|
-
w = self.COMPONENT_WIDTHS
|
|
178
|
-
return self.SEPARATOR.join([
|
|
179
|
-
self.op_reserved,
|
|
180
|
-
self.user_prio,
|
|
181
|
-
self._encode_base62(self.fragment_prio, w[2]),
|
|
182
|
-
self._encode_base62(self.entity_timestamp, w[3]),
|
|
183
|
-
])
|
|
184
|
-
|
|
185
|
-
def to_dict(self):
|
|
186
|
-
return {
|
|
187
|
-
"op_reserved": self.op_reserved,
|
|
188
|
-
"user_prio": self.user_prio,
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
def apply_to_entity(self, entity):
|
|
192
|
-
entity.request["priority"] = self.to_dict()
|
|
193
|
-
|
|
194
|
-
|
|
195
56
|
class NifiProxyEntityModel(BaseModel):
|
|
196
57
|
entity_id: str
|
|
197
58
|
entity_type: str
|
|
@@ -223,8 +84,6 @@ class NifiEntityModel(BaseModel):
|
|
|
223
84
|
is_temporary: bool = False
|
|
224
85
|
exception: dict = Field(default_factory=dict)
|
|
225
86
|
last_processor_name: Optional[str] = None
|
|
226
|
-
fallback_os_workspace: Optional[str] = None
|
|
227
|
-
priority: dict = Field(default_factory=dict)
|
|
228
87
|
|
|
229
88
|
class RecordModel(BaseModel):
|
|
230
89
|
model_config = ConfigDict(extra="allow")
|
|
@@ -614,7 +473,6 @@ class NifiContextManager(object):
|
|
|
614
473
|
def __init__(self, json_data, lazy_sync=True):
|
|
615
474
|
if not json_data:
|
|
616
475
|
raise ValueError("Nifi context manager received list of 0 entities")
|
|
617
|
-
self.permissions = {}
|
|
618
476
|
self.in_batches = None
|
|
619
477
|
self.out_entities = None
|
|
620
478
|
self.nonlazy_sync_ids = set()
|
|
@@ -667,8 +525,6 @@ class NifiContextManager(object):
|
|
|
667
525
|
key=lambda x: string_to_datetime(x.request.get("entity_timestamp")),
|
|
668
526
|
)
|
|
669
527
|
entities = list({e.record["entity_id"]: e for e in entities}.values())
|
|
670
|
-
for entity in entities:
|
|
671
|
-
NifiPriority.from_entity(entity).apply_to_entity(entity)
|
|
672
528
|
entities = [
|
|
673
529
|
(
|
|
674
530
|
jsondict_hash(NifiContextManager._config_get(entity, processor_name)),
|
|
@@ -693,16 +549,6 @@ class NifiContextManager(object):
|
|
|
693
549
|
def __enter__(self):
|
|
694
550
|
return self
|
|
695
551
|
|
|
696
|
-
def get_workspaces_permissions(self, workspace_ids):
|
|
697
|
-
permissions_to_fetch = list(set(workspace_ids).difference(set(list(self.permissions.keys()))))
|
|
698
|
-
if permissions_to_fetch:
|
|
699
|
-
permissions = get_permissions.sync(permissions_to_fetch, client=self.client)
|
|
700
|
-
self.permissions.update(permissions)
|
|
701
|
-
permissions = {}
|
|
702
|
-
for k in workspace_ids:
|
|
703
|
-
permissions[k] = self.permissions.get(k, PermissionLevel.NONE)
|
|
704
|
-
return permissions
|
|
705
|
-
|
|
706
552
|
def request_entity_sync(
|
|
707
553
|
self,
|
|
708
554
|
entity,
|
|
@@ -743,9 +589,6 @@ class NifiContextManager(object):
|
|
|
743
589
|
)
|
|
744
590
|
self.out_entities = list({e.record["entity_id"]: e for e in all_entities}.values())
|
|
745
591
|
self.sync_entities()
|
|
746
|
-
for entity in self.out_entities:
|
|
747
|
-
prio = NifiPriority.from_entity(entity)
|
|
748
|
-
entity.request["nifi_attributes"]["priority"] = prio.to_string()
|
|
749
592
|
return [entity for entity in self.jsonify(self.out_entities)["content"]]
|
|
750
593
|
|
|
751
594
|
def raise_exception(self, entity, exc):
|
|
@@ -772,16 +615,18 @@ class NifiContextManager(object):
|
|
|
772
615
|
import logging
|
|
773
616
|
_lock_logger = logging.getLogger(__name__)
|
|
774
617
|
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
(e, e.get("entity_timestamp"))
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
618
|
+
def _read_entries():
|
|
619
|
+
entries = []
|
|
620
|
+
for e in entities:
|
|
621
|
+
if isinstance(e, dict):
|
|
622
|
+
entries.append((e, e.get("entity_timestamp")))
|
|
623
|
+
else:
|
|
624
|
+
entries.append(
|
|
625
|
+
(e.record, e.request.get("entity_timestamp") if e.request else None)
|
|
626
|
+
)
|
|
627
|
+
return entries
|
|
628
|
+
|
|
629
|
+
records = _read_entries()
|
|
785
630
|
|
|
786
631
|
long_expiry = (datetime.now(timezone.utc) + timedelta(seconds=timeout)).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
787
632
|
statuses = [
|
|
@@ -815,6 +660,7 @@ class NifiContextManager(object):
|
|
|
815
660
|
try:
|
|
816
661
|
yield True
|
|
817
662
|
finally:
|
|
663
|
+
records = _read_entries()
|
|
818
664
|
short_expiry = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
819
665
|
statuses = [
|
|
820
666
|
{
|
|
@@ -849,10 +695,8 @@ class NifiContextManager(object):
|
|
|
849
695
|
self._sync_upsert_entities(entities_to_upsert)
|
|
850
696
|
self._sync_fetch_relationships(entities, fetch_rel_entities, fetch_concept_rels)
|
|
851
697
|
|
|
852
|
-
now_ts = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
853
698
|
for entity in all_entities_to_modify:
|
|
854
699
|
entity.request["is_temporary"] = False
|
|
855
|
-
entity.request["entity_timestamp"] = now_ts
|
|
856
700
|
|
|
857
701
|
for entity in entities:
|
|
858
702
|
entity.sync_params = {}
|
|
@@ -1114,29 +958,7 @@ class NifiEntity(object):
|
|
|
1114
958
|
|
|
1115
959
|
@property
|
|
1116
960
|
def write_os_workspace(self):
|
|
1117
|
-
|
|
1118
|
-
[
|
|
1119
|
-
e
|
|
1120
|
-
for e in [
|
|
1121
|
-
self.record.get("os_workspace"),
|
|
1122
|
-
self.request.get("fallback_os_workspace"),
|
|
1123
|
-
]
|
|
1124
|
-
if e
|
|
1125
|
-
]
|
|
1126
|
-
)
|
|
1127
|
-
if (
|
|
1128
|
-
self.record.get("os_workspace")
|
|
1129
|
-
and (permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE) >= PermissionLevel.WRITE
|
|
1130
|
-
):
|
|
1131
|
-
return self.record["os_workspace"]
|
|
1132
|
-
elif (
|
|
1133
|
-
self.request.get("fallback_os_workspace")
|
|
1134
|
-
and (permissions.get(self.request.get("fallback_os_workspace")) or PermissionLevel.NONE)
|
|
1135
|
-
>= PermissionLevel.WRITE
|
|
1136
|
-
):
|
|
1137
|
-
return self.request["fallback_os_workspace"]
|
|
1138
|
-
else:
|
|
1139
|
-
return None
|
|
961
|
+
return self.record.get("os_workspace")
|
|
1140
962
|
|
|
1141
963
|
@property
|
|
1142
964
|
def label(self):
|
|
@@ -1164,29 +986,6 @@ class NifiEntity(object):
|
|
|
1164
986
|
options={"verify_signature": False},
|
|
1165
987
|
)
|
|
1166
988
|
|
|
1167
|
-
@property
|
|
1168
|
-
def priority(self):
|
|
1169
|
-
return NifiPriority.from_entity(self)
|
|
1170
|
-
|
|
1171
|
-
@priority.setter
|
|
1172
|
-
def priority(self, value):
|
|
1173
|
-
if isinstance(value, NifiPriority):
|
|
1174
|
-
self.request["priority"] = value.to_dict()
|
|
1175
|
-
elif isinstance(value, dict):
|
|
1176
|
-
self.request["priority"] = value
|
|
1177
|
-
else:
|
|
1178
|
-
raise TypeError("priority must be a NifiPriority or dict")
|
|
1179
|
-
|
|
1180
|
-
def set_user_priority(self, value):
|
|
1181
|
-
prio = self.priority
|
|
1182
|
-
prio.user_prio = NifiPriority._normalize_component(value, NifiPriority.COMPONENT_WIDTHS[1])
|
|
1183
|
-
self.priority = prio
|
|
1184
|
-
|
|
1185
|
-
def set_op_priority(self, value):
|
|
1186
|
-
prio = self.priority
|
|
1187
|
-
prio.op_reserved = NifiPriority._normalize_component(value, NifiPriority.COMPONENT_WIDTHS[0])
|
|
1188
|
-
self.priority = prio
|
|
1189
|
-
|
|
1190
989
|
def update_last_timestamp(self):
|
|
1191
990
|
self.record["os_last_updated_at"] = now()
|
|
1192
991
|
|
|
@@ -1377,8 +1176,6 @@ class NifiEntity(object):
|
|
|
1377
1176
|
"is_temporary": True,
|
|
1378
1177
|
"exception": {},
|
|
1379
1178
|
"last_processor_name": None,
|
|
1380
|
-
"fallback_os_workspace": self.request["fallback_os_workspace"],
|
|
1381
|
-
"priority": deepcopy(self.request.get("priority", {})),
|
|
1382
1179
|
}
|
|
1383
1180
|
child_entity = NifiEntity(
|
|
1384
1181
|
self.context,
|
|
@@ -1604,6 +1401,7 @@ class NifiEntity(object):
|
|
|
1604
1401
|
os_entity_uid=None,
|
|
1605
1402
|
os_relationship_uid=None,
|
|
1606
1403
|
os_entity_type=FRAGMENT_ENTITY_NAME,
|
|
1404
|
+
os_parent_uid=None,
|
|
1607
1405
|
previous_fragment_uid=None,
|
|
1608
1406
|
previous_fragment_relationship_uid=None,
|
|
1609
1407
|
previous_fragment_relationship=PREVIOUS_FRAGMENT_RELATIONSHIP,
|
|
@@ -1616,7 +1414,7 @@ class NifiEntity(object):
|
|
|
1616
1414
|
fields = {
|
|
1617
1415
|
**{k: v for k, v in self.record.items() if k.startswith("fragment") and v is not None},
|
|
1618
1416
|
**fields,
|
|
1619
|
-
"os_parent_uid": self.record["os_entity_uid"],
|
|
1417
|
+
"os_parent_uid": os_parent_uid or self.record["os_entity_uid"],
|
|
1620
1418
|
"source_entity_uid": source_entity_uid,
|
|
1621
1419
|
"previous_entity_uid": previous_fragment_uid,
|
|
1622
1420
|
"next_entity_uid": next_fragment_uid,
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Mapping-aware client-side conversion of Python values to OpenSearch types.
|
|
2
|
+
|
|
3
|
+
Recursively walks a data dict alongside an OpenSearch index mapping and
|
|
4
|
+
coerces Python values so they match the expected field types (text, keyword,
|
|
5
|
+
integer, date, binary, knn_vector, nested, etc.).
|
|
6
|
+
|
|
7
|
+
Primary entry point:
|
|
8
|
+
``convert_clientside(data, curr_mapping)``
|
|
9
|
+
|
|
10
|
+
The *curr_mapping* can be either the raw ``{"properties": {...}}`` tree or
|
|
11
|
+
just the inner ``properties`` dict -- the function handles both.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .dict import travel_dict
|
|
15
|
+
from .timestamp import string_to_datetime
|
|
16
|
+
import json
|
|
17
|
+
import base64
|
|
18
|
+
import logging
|
|
19
|
+
import datetime as dt
|
|
20
|
+
from typing import Dict, Any
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
vector_nomenclature_order = [
|
|
25
|
+
"model_name",
|
|
26
|
+
"dim",
|
|
27
|
+
"model_version",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
conversion_matrix = {
|
|
31
|
+
("str", "text"): lambda d: (d, False),
|
|
32
|
+
("str", "keyword"): lambda d: (d, False),
|
|
33
|
+
("str", "boolean"): lambda d: (bool(d), False),
|
|
34
|
+
("str", "integer"): lambda d: (int(d), False),
|
|
35
|
+
("str", "long"): lambda d: (int(d), False),
|
|
36
|
+
("str", "float"): lambda d: (float(d), False),
|
|
37
|
+
("str", "double"): lambda d: (float(d), False),
|
|
38
|
+
("str", "date"): lambda d: (
|
|
39
|
+
string_to_datetime(d).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
40
|
+
False,
|
|
41
|
+
),
|
|
42
|
+
("str", "binary"): lambda d: (
|
|
43
|
+
base64.b64encode(d.encode("utf-8")).decode("utf-8"),
|
|
44
|
+
False,
|
|
45
|
+
),
|
|
46
|
+
("str", "object"): lambda d: (json.loads(d), True),
|
|
47
|
+
("str", "nested"): lambda d: (json.loads(d), True),
|
|
48
|
+
("bool", "text"): lambda d: (str(d), False),
|
|
49
|
+
("bool", "keyword"): lambda d: (str(d), False),
|
|
50
|
+
("bool", "boolean"): lambda d: (d, False),
|
|
51
|
+
("bool", "integer"): lambda d: (int(d), False),
|
|
52
|
+
("bool", "long"): lambda d: (int(d), False),
|
|
53
|
+
("bool", "float"): lambda d: (float(int(d)), False),
|
|
54
|
+
("bool", "double"): lambda d: (float(int(d)), False),
|
|
55
|
+
("bool", "binary"): lambda d: (
|
|
56
|
+
base64.b64encode(str(int(d)).encode("utf-8")).decode("utf-8"),
|
|
57
|
+
False,
|
|
58
|
+
),
|
|
59
|
+
("int", "text"): lambda d: (str(d), False),
|
|
60
|
+
("int", "keyword"): lambda d: (str(d), False),
|
|
61
|
+
("int", "boolean"): lambda d: (bool(d), False),
|
|
62
|
+
("int", "integer"): lambda d: (d, False),
|
|
63
|
+
("int", "long"): lambda d: (d, False),
|
|
64
|
+
("int", "float"): lambda d: (float(d), False),
|
|
65
|
+
("int", "double"): lambda d: (float(d), False),
|
|
66
|
+
("int", "date"): lambda d: (
|
|
67
|
+
dt.datetime.fromtimestamp(d).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
68
|
+
False,
|
|
69
|
+
),
|
|
70
|
+
("int", "binary"): lambda d: (
|
|
71
|
+
base64.b64encode(str(d).encode("utf-8")).decode("utf-8"),
|
|
72
|
+
False,
|
|
73
|
+
),
|
|
74
|
+
("float", "text"): lambda d: (str(d), False),
|
|
75
|
+
("float", "keyword"): lambda d: (str(d), False),
|
|
76
|
+
("float", "boolean"): lambda d: (bool(d), False),
|
|
77
|
+
("float", "integer"): lambda d: (int(d), False),
|
|
78
|
+
("float", "long"): lambda d: (int(d), False),
|
|
79
|
+
("float", "float"): lambda d: (d, False),
|
|
80
|
+
("float", "double"): lambda d: (d, False),
|
|
81
|
+
("float", "date"): lambda d: (
|
|
82
|
+
dt.datetime.fromtimestamp(int(d)).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
83
|
+
False,
|
|
84
|
+
),
|
|
85
|
+
("float", "binary"): lambda d: (
|
|
86
|
+
base64.b64encode(str(d).encode("utf-8")).decode("utf-8"),
|
|
87
|
+
False,
|
|
88
|
+
),
|
|
89
|
+
("bytes", "text"): lambda d: (d.decode("utf-8"), False),
|
|
90
|
+
("bytes", "keyword"): lambda d: (d.decode("utf-8"), False),
|
|
91
|
+
("bytes", "boolean"): lambda d: (
|
|
92
|
+
d.decode("utf-8").lower() in ["true", "1", "yes", "y", "on"],
|
|
93
|
+
False,
|
|
94
|
+
),
|
|
95
|
+
("bytes", "integer"): lambda d: (int(d.decode("utf-8")), False),
|
|
96
|
+
("bytes", "long"): lambda d: (int(d.decode("utf-8")), False),
|
|
97
|
+
("bytes", "float"): lambda d: (float(d.decode("utf-8")), False),
|
|
98
|
+
("bytes", "double"): lambda d: (float(d.decode("utf-8")), False),
|
|
99
|
+
("bytes", "date"): lambda d: (
|
|
100
|
+
string_to_datetime(d.decode("utf-8")).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
101
|
+
False,
|
|
102
|
+
),
|
|
103
|
+
("bytes", "binary"): lambda d: (base64.b64encode(d).decode("utf-8"), False),
|
|
104
|
+
("bytes", "object"): lambda d: (json.loads(d.decode("utf-8")), True),
|
|
105
|
+
("bytes", "nested"): lambda d: (json.loads(d.decode("utf-8")), True),
|
|
106
|
+
("dict", "text"): lambda d: (json.dumps(d), False),
|
|
107
|
+
("dict", "keyword"): lambda d: (json.dumps(d), False),
|
|
108
|
+
("dict", "boolean"): lambda d: (bool(d), False),
|
|
109
|
+
("dict", "object"): lambda d: (d, True),
|
|
110
|
+
("dict", "nested"): lambda d: (d, True),
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def has_opensearch_type(mapping):
|
|
115
|
+
return "type" in mapping and isinstance(mapping["type"], str)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def has_opensearch_properties(mapping):
|
|
119
|
+
return (
|
|
120
|
+
"properties" in mapping
|
|
121
|
+
and isinstance(mapping["properties"], dict)
|
|
122
|
+
and ("type" not in mapping or has_opensearch_type(mapping))
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def mapping_from_data(data, mapping):
|
|
127
|
+
python_to_os_types = {
|
|
128
|
+
"str": "text",
|
|
129
|
+
"bool": "boolean",
|
|
130
|
+
"int": "long",
|
|
131
|
+
"float": "double",
|
|
132
|
+
}
|
|
133
|
+
if not mapping:
|
|
134
|
+
mapping = {}
|
|
135
|
+
if data is None:
|
|
136
|
+
return mapping
|
|
137
|
+
datatype = type(data).__name__
|
|
138
|
+
if datatype == "NoneType" or data == {} or data == []:
|
|
139
|
+
return {}
|
|
140
|
+
if datatype not in ["str", "bool", "int", "float", "bytes", "dict", "list"]:
|
|
141
|
+
raise TypeError()
|
|
142
|
+
elif datatype == "dict":
|
|
143
|
+
for key, value in data.items():
|
|
144
|
+
mapping[key] = mapping_from_data(value, {})
|
|
145
|
+
elif datatype == "list":
|
|
146
|
+
mapping = mapping_from_data(data[0], {})
|
|
147
|
+
else:
|
|
148
|
+
if datatype == "bytes":
|
|
149
|
+
datatype = "str"
|
|
150
|
+
datatype = python_to_os_types[datatype]
|
|
151
|
+
if datatype == "text":
|
|
152
|
+
try:
|
|
153
|
+
dt.datetime.fromisoformat(data)
|
|
154
|
+
datatype = "date"
|
|
155
|
+
except Exception:
|
|
156
|
+
pass
|
|
157
|
+
mapping = {"type": datatype}
|
|
158
|
+
return mapping
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def convert_data_type(data, curr_mapping, key):
|
|
162
|
+
data_elem = travel_dict(data, key, "r", True)
|
|
163
|
+
if data_elem is None or data_elem == []:
|
|
164
|
+
return
|
|
165
|
+
data_type = type(data_elem).__name__
|
|
166
|
+
if not curr_mapping:
|
|
167
|
+
curr_mapping = mapping_from_data(data_elem, {})
|
|
168
|
+
if not has_opensearch_type(curr_mapping):
|
|
169
|
+
mapping_type = "object"
|
|
170
|
+
else:
|
|
171
|
+
mapping_type = curr_mapping.get("type", "object")
|
|
172
|
+
recurse = False
|
|
173
|
+
try:
|
|
174
|
+
converted_elem, recurse = conversion_matrix[(data_type, mapping_type)](
|
|
175
|
+
data_elem
|
|
176
|
+
)
|
|
177
|
+
except BaseException as e:
|
|
178
|
+
logger.warning(f"{type(e).__name__}: {str(e)}")
|
|
179
|
+
converted_elem = None
|
|
180
|
+
if converted_elem and recurse:
|
|
181
|
+
converted_elem = convert_clientside(converted_elem, curr_mapping)
|
|
182
|
+
travel_dict(data, key, "w", True)(converted_elem)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def handle_incompatible_data_type(data, key):
|
|
186
|
+
value = travel_dict(data, key, "r", True)
|
|
187
|
+
datatype = type(value).__name__
|
|
188
|
+
if datatype == "NoneType":
|
|
189
|
+
return data
|
|
190
|
+
if datatype not in ["str", "bool", "int", "float", "bytes", "dict", "list"]:
|
|
191
|
+
travel_dict(data, key, "w")(str(value))
|
|
192
|
+
return data
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def handle_data_dict(data, curr_mapping, key):
|
|
196
|
+
curr_data = travel_dict(data, key, "r")
|
|
197
|
+
if (
|
|
198
|
+
curr_mapping
|
|
199
|
+
and has_opensearch_type(curr_mapping)
|
|
200
|
+
and curr_mapping.get("type") not in ["object", "nested"]
|
|
201
|
+
):
|
|
202
|
+
convert_data_type(data, curr_mapping, key)
|
|
203
|
+
else:
|
|
204
|
+
for subkey in list(curr_data.keys()):
|
|
205
|
+
convert_clientside(data, curr_mapping.get(subkey), key + [subkey])
|
|
206
|
+
sub_val = curr_data.get(subkey)
|
|
207
|
+
if isinstance(sub_val, dict):
|
|
208
|
+
sub_val.pop("#type", None)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def get_vector_name(metadata: Dict[str, Any]) -> str:
|
|
212
|
+
vector_name = "vector"
|
|
213
|
+
for prop in vector_nomenclature_order:
|
|
214
|
+
if prop in metadata:
|
|
215
|
+
vector_name += f"_{metadata[prop]}"
|
|
216
|
+
return vector_name
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def validate_vector_data(vector_data, curr_mapping):
|
|
220
|
+
try:
|
|
221
|
+
assert "data" in vector_data, "'data' field missing in vector_data"
|
|
222
|
+
assert (
|
|
223
|
+
isinstance(vector_data["data"], list) and vector_data["data"]
|
|
224
|
+
), "'data' must be a non-empty list"
|
|
225
|
+
assert "metadata" in vector_data, "'metadata' field missing in vector_data"
|
|
226
|
+
assert isinstance(
|
|
227
|
+
vector_data["metadata"], dict
|
|
228
|
+
), "'metadata' must be a dictionary"
|
|
229
|
+
assert "dim" in vector_data["metadata"], "'dim' field missing in 'metadata'"
|
|
230
|
+
assert (
|
|
231
|
+
isinstance(vector_data["metadata"]["dim"], int)
|
|
232
|
+
and vector_data["metadata"]["dim"] > 0
|
|
233
|
+
), "'dim' must be a positive integer"
|
|
234
|
+
assert vector_data["metadata"]["dim"] == len(
|
|
235
|
+
vector_data["data"]
|
|
236
|
+
), "'dim' must match the length of 'data'"
|
|
237
|
+
return True
|
|
238
|
+
except AssertionError as e:
|
|
239
|
+
logger.warning(f"Validation failed: {str(e)}")
|
|
240
|
+
return False
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def handle_data_vector(data, curr_mapping, key, vector_data=None):
|
|
244
|
+
if vector_data is None:
|
|
245
|
+
vector_data = travel_dict(data, key, "r")
|
|
246
|
+
|
|
247
|
+
if not validate_vector_data(vector_data, curr_mapping):
|
|
248
|
+
convert_clientside(data, curr_mapping, key)
|
|
249
|
+
return
|
|
250
|
+
|
|
251
|
+
vector_name = get_vector_name(vector_data["metadata"])
|
|
252
|
+
|
|
253
|
+
if vector_name not in curr_mapping:
|
|
254
|
+
logger.warning(
|
|
255
|
+
f"Found valid vector in input data but no field '{vector_name}' allocated in OpenSearch for it"
|
|
256
|
+
)
|
|
257
|
+
convert_clientside(data, curr_mapping, key)
|
|
258
|
+
return
|
|
259
|
+
|
|
260
|
+
travel_dict(data, key, "w")(
|
|
261
|
+
{
|
|
262
|
+
vector_name: {
|
|
263
|
+
"value": vector_data["data"],
|
|
264
|
+
},
|
|
265
|
+
"#type": "VECTOR",
|
|
266
|
+
}
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def handle_data_list(data, curr_mapping, key):
|
|
271
|
+
def _align_elems_to_mapping(
|
|
272
|
+
data, super_path, i, path, elem_structure, curr_mapping
|
|
273
|
+
):
|
|
274
|
+
path = []
|
|
275
|
+
for key in elem_structure.keys() | curr_mapping.keys():
|
|
276
|
+
sub_path = path + [key]
|
|
277
|
+
if key not in curr_mapping:
|
|
278
|
+
continue
|
|
279
|
+
if key not in elem_structure:
|
|
280
|
+
convert_clientside(data, elem_structure, super_path + [i] + sub_path)
|
|
281
|
+
else:
|
|
282
|
+
if isinstance(elem_structure[key], dict) and isinstance(
|
|
283
|
+
curr_mapping[key], dict
|
|
284
|
+
):
|
|
285
|
+
_align_elems_to_mapping(
|
|
286
|
+
data,
|
|
287
|
+
super_path,
|
|
288
|
+
i,
|
|
289
|
+
sub_path,
|
|
290
|
+
elem_structure[key],
|
|
291
|
+
curr_mapping[key],
|
|
292
|
+
)
|
|
293
|
+
elif elem_structure[key] != curr_mapping[key]:
|
|
294
|
+
convert_clientside(
|
|
295
|
+
data, elem_structure, super_path + [i] + sub_path
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
curr_data = travel_dict(data, key, "r")
|
|
299
|
+
if len(curr_data) == 0:
|
|
300
|
+
return
|
|
301
|
+
|
|
302
|
+
elem_structure = {}
|
|
303
|
+
for i, elem in enumerate(curr_data):
|
|
304
|
+
data = convert_clientside(data, curr_mapping, key + [i])
|
|
305
|
+
elem = travel_dict(data, key + [i], "r")
|
|
306
|
+
elem_structure = {**mapping_from_data(elem, {}), **elem_structure}
|
|
307
|
+
elem_structure = {**elem_structure, **curr_mapping}
|
|
308
|
+
|
|
309
|
+
if isinstance(curr_data[0], dict) and curr_data[0].get("#type") == "VECTOR":
|
|
310
|
+
vectors_data = {}
|
|
311
|
+
for i, elem in enumerate(curr_data):
|
|
312
|
+
elem.pop("#type", None)
|
|
313
|
+
for vector_name in elem:
|
|
314
|
+
if vector_name not in vectors_data:
|
|
315
|
+
vectors_data[vector_name] = []
|
|
316
|
+
vectors_data[vector_name].append({"value": elem[vector_name]["value"]})
|
|
317
|
+
travel_dict(data, key, "w")(vectors_data)
|
|
318
|
+
elem_structure.pop("#type")
|
|
319
|
+
|
|
320
|
+
if elem_structure != curr_mapping:
|
|
321
|
+
_align_elems_to_mapping(data, key, i, [], elem_structure, curr_mapping)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def convert_clientside(data, curr_mapping, parent_keylist=[]):
|
|
325
|
+
"""Recursively convert *data* so values match the OpenSearch *curr_mapping*.
|
|
326
|
+
|
|
327
|
+
*curr_mapping* may be a full mapping dict (with a ``"properties"`` key) or
|
|
328
|
+
the inner properties dict directly. *parent_keylist* is used internally
|
|
329
|
+
for recursive traversal and should normally be left empty.
|
|
330
|
+
"""
|
|
331
|
+
if not curr_mapping:
|
|
332
|
+
curr_mapping = {}
|
|
333
|
+
if has_opensearch_properties(curr_mapping):
|
|
334
|
+
curr_mapping = curr_mapping["properties"]
|
|
335
|
+
if parent_keylist:
|
|
336
|
+
curr_data = travel_dict(data, parent_keylist, "r")
|
|
337
|
+
else:
|
|
338
|
+
curr_data = data
|
|
339
|
+
datatype = type(curr_data).__name__
|
|
340
|
+
if datatype not in ["str", "bool", "int", "float", "bytes", "dict", "list"]:
|
|
341
|
+
handle_incompatible_data_type(data, parent_keylist)
|
|
342
|
+
convert_data_type(data, curr_mapping, parent_keylist)
|
|
343
|
+
elif datatype == "dict":
|
|
344
|
+
if curr_data.get("#type") == "VECTOR":
|
|
345
|
+
curr_data.pop("#type", None)
|
|
346
|
+
handle_data_vector(data, curr_mapping, parent_keylist)
|
|
347
|
+
else:
|
|
348
|
+
curr_data.pop("#type", None)
|
|
349
|
+
handle_data_dict(data, curr_mapping, parent_keylist)
|
|
350
|
+
elif datatype == "list":
|
|
351
|
+
handle_data_list(data, curr_mapping, parent_keylist)
|
|
352
|
+
else:
|
|
353
|
+
convert_data_type(data, curr_mapping, parent_keylist)
|
|
354
|
+
return data
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|