streamlit-octostar-utils 0.2.12a2__tar.gz → 0.2.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/nifi.py +143 -72
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/nlp/language.py +8 -2
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/ontology/inheritance.py +1 -1
- streamlit_octostar_utils-0.2.14/streamlit_octostar_utils/ontology/relationships.py +28 -0
- streamlit_octostar_utils-0.2.12a2/streamlit_octostar_utils/ontology/expand_entities.py +0 -594
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/LICENSE +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/README.md +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -24,7 +24,6 @@ from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
|
|
24
24
|
from ..core.timestamp import now, string_to_datetime
|
25
25
|
from .fastapi import DefaultErrorRoute, Route
|
26
26
|
from ..ontology.inheritance import is_child_concept as is_child_concept_fn, get_label_keys
|
27
|
-
from ..ontology.expand_entities import expand_entities
|
28
27
|
|
29
28
|
RELATIONSHIP_ENTITY_NAME = "os_relationship"
|
30
29
|
LOCAL_RELATIONSHIP_ENTITY_NAME = "os_workspace_relationship"
|
@@ -163,12 +162,16 @@ class NifiEntityProxy(object):
|
|
163
162
|
if child_entity.uid == uid_to_search:
|
164
163
|
found_entity = child_entity
|
165
164
|
else:
|
166
|
-
found_entity = _recursive_search_expanded_proxy(
|
165
|
+
found_entity = _recursive_search_expanded_proxy(
|
166
|
+
child_entity._proxy, uid_to_search
|
167
|
+
)
|
167
168
|
if found_entity:
|
168
169
|
return found_entity
|
169
170
|
|
170
171
|
if not self._proxy:
|
171
|
-
main_entities = itertools.chain(
|
172
|
+
main_entities = itertools.chain(
|
173
|
+
*[b.entities for b in self.context.in_batches]
|
174
|
+
)
|
172
175
|
main_entities = {e.record["entity_id"]: e for e in main_entities}
|
173
176
|
if main_entities.get(self.uid):
|
174
177
|
self._proxy = main_entities.get(self.uid)
|
@@ -179,7 +182,9 @@ class NifiEntityProxy(object):
|
|
179
182
|
self._proxy = found_entity._proxy
|
180
183
|
return self._proxy
|
181
184
|
## TODO: Try to get the entity from the database with query_ontology()
|
182
|
-
raise AttributeError(
|
185
|
+
raise AttributeError(
|
186
|
+
f"Cannot find children with UUID {self.uid}! It may exist in the database?"
|
187
|
+
)
|
183
188
|
|
184
189
|
def __getattr__(self, name):
|
185
190
|
if name in self.__dict__:
|
@@ -213,24 +218,28 @@ class NifiFragmenter(object):
|
|
213
218
|
raise ValueError("Must have at least 2 entities for fragmentation")
|
214
219
|
identifier = str(uuid.uuid4())
|
215
220
|
for i, entity in enumerate(fragments):
|
216
|
-
travel_dict(
|
217
|
-
|
218
|
-
)
|
221
|
+
travel_dict(
|
222
|
+
entity.request["nifi_attributes"], fragmenter_keylist.split("."), "w"
|
223
|
+
)({"identifier": identifier, "count": count, "index": i})
|
219
224
|
if "fragment" not in entity.request["config"]:
|
220
225
|
entity.request["config"]["fragment"] = {}
|
221
226
|
if "fragments_stack" not in entity.request["config"]["fragment"]:
|
222
227
|
entity.request["config"]["fragment"]["fragments_stack"] = []
|
223
|
-
entity.request["config"]["fragment"]["fragments_stack"].insert(
|
224
|
-
|
225
|
-
"fragments_stack"
|
226
|
-
]
|
227
|
-
travel_dict(entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w")(
|
228
|
-
{"identifier": identifier, "count": count, "index": i}
|
228
|
+
entity.request["config"]["fragment"]["fragments_stack"].insert(
|
229
|
+
0, fragmenter_keylist
|
229
230
|
)
|
231
|
+
entity.request["nifi_attributes"]["fragments_stack"] = entity.request[
|
232
|
+
"config"
|
233
|
+
]["fragment"]["fragments_stack"]
|
234
|
+
travel_dict(
|
235
|
+
entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w"
|
236
|
+
)({"identifier": identifier, "count": count, "index": i})
|
230
237
|
|
231
238
|
def push_defragment_strategy(fragment, defragmenter_config):
|
232
239
|
pointer = fragment.request["config"]
|
233
|
-
last_fragmenter_keylist = fragment.request["config"]["fragment"][
|
240
|
+
last_fragmenter_keylist = fragment.request["config"]["fragment"][
|
241
|
+
"fragments_stack"
|
242
|
+
][0]
|
234
243
|
for k in ("fragment." + last_fragmenter_keylist).split("."):
|
235
244
|
if not pointer.get(k):
|
236
245
|
pointer[k] = {}
|
@@ -249,7 +258,7 @@ class NifiEntityBatch(object):
|
|
249
258
|
|
250
259
|
class NifiContextManager(object):
|
251
260
|
HEADLESS_PROCESSOR_NAME = "headless"
|
252
|
-
|
261
|
+
|
253
262
|
class SyncFlag(Enum):
|
254
263
|
UPSERT_ENTITY_ALL = 0 # bool
|
255
264
|
UPSERT_ENTITY_SPECIFIC_FIELDS = 1 # 'fields': list of record fields
|
@@ -270,7 +279,9 @@ class NifiContextManager(object):
|
|
270
279
|
@property
|
271
280
|
def ontology(self):
|
272
281
|
if not self._ontology:
|
273
|
-
self._ontology = fetch_ontology_data.sync(
|
282
|
+
self._ontology = fetch_ontology_data.sync(
|
283
|
+
ontology_name=self.ontology_name, client=self.client
|
284
|
+
)
|
274
285
|
return self._ontology
|
275
286
|
|
276
287
|
def _config_get(entity, keylist):
|
@@ -339,7 +350,9 @@ class NifiContextManager(object):
|
|
339
350
|
return self
|
340
351
|
|
341
352
|
def get_workspaces_permissions(self, workspace_ids):
|
342
|
-
permissions_to_fetch = list(
|
353
|
+
permissions_to_fetch = list(
|
354
|
+
set(workspace_ids).difference(set(list(self.permissions.keys())))
|
355
|
+
)
|
343
356
|
if permissions_to_fetch:
|
344
357
|
permissions = get_permissions.sync(permissions_to_fetch, client=self.client)
|
345
358
|
self.permissions.update(permissions)
|
@@ -369,13 +382,18 @@ class NifiContextManager(object):
|
|
369
382
|
entities.append(entity)
|
370
383
|
for child_entity in entity.children_entities:
|
371
384
|
if not child_entity.drop_on_output:
|
372
|
-
if
|
385
|
+
if (
|
386
|
+
child_entity.output_as_independent
|
387
|
+
or child_entity.output_as_child
|
388
|
+
):
|
373
389
|
if processor_name != NifiContextManager.HEADLESS_PROCESSOR_NAME:
|
374
390
|
child_entity.request["last_processor_name"] = processor_name
|
375
391
|
if child_entity.output_as_independent:
|
376
392
|
if not child_entity._proxy:
|
377
393
|
child_entity.fetch_proxy()
|
378
|
-
entities.extend(
|
394
|
+
entities.extend(
|
395
|
+
_process_entity(child_entity._proxy, processor_name)
|
396
|
+
)
|
379
397
|
return entities
|
380
398
|
|
381
399
|
entities = itertools.chain(*[b.entities for b in entity_batches])
|
@@ -386,7 +404,9 @@ class NifiContextManager(object):
|
|
386
404
|
all_entities,
|
387
405
|
key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
|
388
406
|
)
|
389
|
-
self.out_entities = list(
|
407
|
+
self.out_entities = list(
|
408
|
+
{e.record["entity_id"]: e for e in all_entities}.values()
|
409
|
+
)
|
390
410
|
self.sync_entities()
|
391
411
|
return [entity for entity in self.jsonify(self.out_entities)["content"]]
|
392
412
|
|
@@ -394,19 +414,23 @@ class NifiContextManager(object):
|
|
394
414
|
error_response = DefaultErrorRoute.format_error(exc)
|
395
415
|
entity.request["exception"]["code"] = error_response.status_code
|
396
416
|
entity.request["exception"]["body"] = json.loads(error_response.body)["message"]
|
397
|
-
travel_dict(
|
398
|
-
entity.request["
|
399
|
-
)
|
400
|
-
travel_dict(
|
401
|
-
entity.request["
|
402
|
-
)
|
417
|
+
travel_dict(
|
418
|
+
entity.request["nifi_attributes"], ["invokehttp", "response", "body"], "w"
|
419
|
+
)(entity.request["exception"]["body"])
|
420
|
+
travel_dict(
|
421
|
+
entity.request["nifi_attributes"], ["invokehttp", "response", "code"], "w"
|
422
|
+
)(entity.request["exception"]["code"])
|
403
423
|
entity.request["nifi_attributes"]["raised_exc"] = True
|
404
424
|
|
405
425
|
def sync_entities(self):
|
406
426
|
if not self.lazy_sync:
|
407
427
|
entities = self.out_entities
|
408
428
|
else:
|
409
|
-
entities = [
|
429
|
+
entities = [
|
430
|
+
e
|
431
|
+
for e in self.out_entities
|
432
|
+
if e.record["entity_id"] in self.nonlazy_sync_ids
|
433
|
+
]
|
410
434
|
if not entities:
|
411
435
|
return
|
412
436
|
reserved_fields = [
|
@@ -436,16 +460,18 @@ class NifiContextManager(object):
|
|
436
460
|
for entity in entities:
|
437
461
|
if entity.sync_params.get(NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS):
|
438
462
|
concept_name = entity.record["entity_type"]
|
439
|
-
rels_to_fetch = entity.sync_params.get(
|
463
|
+
rels_to_fetch = entity.sync_params.get(
|
464
|
+
NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS, []
|
465
|
+
)
|
440
466
|
for rel in rels_to_fetch:
|
441
467
|
if rel not in fetch_relationships_entities:
|
442
468
|
fetch_relationships_entities[rel] = []
|
443
469
|
fetch_relationships_entities[rel].append(entity)
|
444
470
|
if concept_name not in fetch_concept_relationships:
|
445
471
|
fetch_concept_relationships[concept_name] = set()
|
446
|
-
fetch_concept_relationships[concept_name] = fetch_concept_relationships[
|
447
|
-
|
448
|
-
)
|
472
|
+
fetch_concept_relationships[concept_name] = fetch_concept_relationships[
|
473
|
+
concept_name
|
474
|
+
].union(set(rels_to_fetch))
|
449
475
|
for k in fetch_concept_relationships.keys():
|
450
476
|
fetch_concept_relationships[k] = list(fetch_concept_relationships[k])
|
451
477
|
# UPSERT ENTITIES
|
@@ -491,7 +517,7 @@ class NifiContextManager(object):
|
|
491
517
|
file.request["is_temporary"] = False
|
492
518
|
file.request["entity_timestamp"] = file.record["os_last_updated_at"]
|
493
519
|
# FETCH RELATIONSHIPS
|
494
|
-
|
520
|
+
'''
|
495
521
|
if fetch_relationships_entities:
|
496
522
|
relationship_mappings_info = relationship_mappings.sync_detailed(
|
497
523
|
client=self.client
|
@@ -547,7 +573,7 @@ class NifiContextManager(object):
|
|
547
573
|
child_rel.request["entity_timestamp"] = rel.get(
|
548
574
|
"os_last_updated_at"
|
549
575
|
)
|
550
|
-
|
576
|
+
'''
|
551
577
|
# CLEAN SYNC PARAMS
|
552
578
|
for entity in entities:
|
553
579
|
entity.sync_params = {}
|
@@ -556,10 +582,15 @@ class NifiContextManager(object):
|
|
556
582
|
for entity in entities:
|
557
583
|
fields = set()
|
558
584
|
|
559
|
-
if
|
585
|
+
if (
|
586
|
+
entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_ALL)
|
587
|
+
or entity.request["is_temporary"]
|
588
|
+
):
|
560
589
|
fields = fields.union(set(list(entity.record.keys())))
|
561
590
|
|
562
|
-
if entity.sync_params.get(
|
591
|
+
if entity.sync_params.get(
|
592
|
+
NifiContextManager.SyncFlag.UPSERT_ENTITY_SPECIFIC_FIELDS
|
593
|
+
):
|
563
594
|
fields = fields.union(
|
564
595
|
set(
|
565
596
|
entity.sync_params.get(
|
@@ -570,7 +601,9 @@ class NifiContextManager(object):
|
|
570
601
|
)
|
571
602
|
)
|
572
603
|
if fields:
|
573
|
-
entities_to_upsert.append(
|
604
|
+
entities_to_upsert.append(
|
605
|
+
(entity, [f for f in list(fields) if f not in reserved_fields])
|
606
|
+
)
|
574
607
|
|
575
608
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
576
609
|
if exc_val is not None:
|
@@ -584,7 +617,9 @@ class NifiContextManager(object):
|
|
584
617
|
for entity in entities:
|
585
618
|
if isinstance(entity, NifiEntityProxy):
|
586
619
|
children.extend(entity.children_entities)
|
587
|
-
children.extend(
|
620
|
+
children.extend(
|
621
|
+
_recursive_collect_proxies(entity.children_entities)
|
622
|
+
)
|
588
623
|
return children
|
589
624
|
|
590
625
|
all_proxies = _recursive_collect_proxies(entities)
|
@@ -597,21 +632,15 @@ class NifiContextManager(object):
|
|
597
632
|
|
598
633
|
|
599
634
|
class NifiEntity(object):
|
600
|
-
def __init__(
|
635
|
+
def __init__(
|
636
|
+
self, context, request, record, annotations, all_independent_uids, children=[], contents=None
|
637
|
+
):
|
601
638
|
self.context = context
|
602
639
|
self.request = request
|
603
640
|
self.record = record
|
604
641
|
self.annotations = annotations
|
605
|
-
assert (
|
606
|
-
|
607
|
-
and self.record.get("entity_id")
|
608
|
-
and self.record["os_entity_uid"] == self.record["entity_id"]
|
609
|
-
)
|
610
|
-
assert (
|
611
|
-
self.record.get("os_concept")
|
612
|
-
and self.record.get("entity_type")
|
613
|
-
and self.record["os_concept"] == self.record["entity_type"]
|
614
|
-
)
|
642
|
+
assert self.record.get("os_entity_uid") and self.record.get("entity_id") and self.record["os_entity_uid"] == self.record["entity_id"]
|
643
|
+
assert self.record.get("os_concept") and self.record.get("entity_type") and self.record["os_concept"] == self.record["entity_type"]
|
615
644
|
if "entity_label" not in self.record:
|
616
645
|
self.record["entity_label"] = self.label
|
617
646
|
children = [c for c in children if isinstance(c, (str, dict))]
|
@@ -631,7 +660,9 @@ class NifiEntity(object):
|
|
631
660
|
child_types = [c["entity_type"] for c in proxy_entity_children] + [
|
632
661
|
c["record"]["entity_type"] for c in full_entity_children
|
633
662
|
]
|
634
|
-
output_as_child = [False] * len(proxy_entity_children) + [True] * len(
|
663
|
+
output_as_child = [False] * len(proxy_entity_children) + [True] * len(
|
664
|
+
full_entity_children
|
665
|
+
)
|
635
666
|
output_as_independent = [uid in all_independent_uids for uid in child_uids]
|
636
667
|
full_entity_children = [
|
637
668
|
NifiEntity(
|
@@ -645,7 +676,9 @@ class NifiEntity(object):
|
|
645
676
|
)
|
646
677
|
for c in full_entity_children
|
647
678
|
]
|
648
|
-
proxy_otm_children = [
|
679
|
+
proxy_otm_children = [
|
680
|
+
NifiOTMRelationshipProxy(**otm_child) for otm_child in proxy_otm_children
|
681
|
+
]
|
649
682
|
child_proxies = [None] * len(proxy_entity_children) + full_entity_children
|
650
683
|
self.children = [
|
651
684
|
NifiEntityProxy(
|
@@ -673,18 +706,21 @@ class NifiEntity(object):
|
|
673
706
|
|
674
707
|
@property
|
675
708
|
def sync_params(self):
|
676
|
-
return {
|
709
|
+
return {
|
710
|
+
NifiContextManager.SyncFlag[k]: v
|
711
|
+
for k, v in (self.request.get("sync_params") or {}).items()
|
712
|
+
}
|
677
713
|
|
678
714
|
@sync_params.setter
|
679
715
|
def sync_params(self, new_params):
|
680
716
|
self.request["sync_params"] = {
|
681
|
-
(k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v
|
717
|
+
(k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v
|
718
|
+
for k, v in new_params.items()
|
682
719
|
}
|
683
720
|
|
684
721
|
@property
|
685
722
|
def metadata(self):
|
686
723
|
return self.annotations
|
687
|
-
|
688
724
|
@metadata.setter
|
689
725
|
def metadata(self, new_metadata):
|
690
726
|
self.annotations = new_metadata
|
@@ -707,7 +743,9 @@ class NifiEntity(object):
|
|
707
743
|
contents_pointer = deepcopy(self.request["contents_pointer"])
|
708
744
|
ptr_location = contents_pointer.get("location")
|
709
745
|
if ptr_location == "attachment" and not contents_pointer.get("pointer"):
|
710
|
-
contents_pointer["pointer"] =
|
746
|
+
contents_pointer["pointer"] = (
|
747
|
+
f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
|
748
|
+
)
|
711
749
|
return self.request["contents_pointer"]
|
712
750
|
|
713
751
|
@contents_pointer.setter
|
@@ -727,7 +765,9 @@ class NifiEntity(object):
|
|
727
765
|
return list(
|
728
766
|
filter(
|
729
767
|
lambda x: isinstance(x, NifiOTMRelationshipProxy)
|
730
|
-
or is_child_concept_fn(
|
768
|
+
or is_child_concept_fn(
|
769
|
+
x.entity_type, RELATIONSHIP_ENTITY_NAME, self.context.ontology
|
770
|
+
),
|
731
771
|
self.children,
|
732
772
|
)
|
733
773
|
)
|
@@ -746,12 +786,18 @@ class NifiEntity(object):
|
|
746
786
|
)
|
747
787
|
if (
|
748
788
|
self.record.get("os_workspace")
|
749
|
-
and (
|
789
|
+
and (
|
790
|
+
permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE
|
791
|
+
)
|
792
|
+
>= PermissionLevel.WRITE
|
750
793
|
):
|
751
794
|
return self.record["os_workspace"]
|
752
795
|
elif (
|
753
796
|
self.request.get("fallback_os_workspace")
|
754
|
-
and (
|
797
|
+
and (
|
798
|
+
permissions.get(self.request.get("fallback_os_workspace"))
|
799
|
+
or PermissionLevel.NONE
|
800
|
+
)
|
755
801
|
>= PermissionLevel.WRITE
|
756
802
|
):
|
757
803
|
return self.request["fallback_os_workspace"]
|
@@ -785,10 +831,14 @@ class NifiEntity(object):
|
|
785
831
|
entity_type = None
|
786
832
|
if isinstance(self, NifiEntityProxy):
|
787
833
|
entity_type = self.entity_type
|
788
|
-
return entity_type == type or is_child_concept_fn(
|
834
|
+
return entity_type == type or is_child_concept_fn(
|
835
|
+
entity_type, type, self.context.ontology
|
836
|
+
)
|
789
837
|
else:
|
790
838
|
entity_type = self.record["entity_type"]
|
791
|
-
return
|
839
|
+
return (
|
840
|
+
entity_type == type or type in self.request["ontology_info"]["parents"]
|
841
|
+
)
|
792
842
|
|
793
843
|
def is_fragmented(self) -> bool:
|
794
844
|
return bool(self.request["config"].get("fragment", {}).get("fragments_stack"))
|
@@ -804,7 +854,7 @@ class NifiEntity(object):
|
|
804
854
|
if _is_sub_fragment_recursive(value):
|
805
855
|
return True
|
806
856
|
return False
|
807
|
-
|
857
|
+
|
808
858
|
if not self.is_fragmented():
|
809
859
|
return True
|
810
860
|
fragment = entity.request.get("config", {}).get("fragment", {})
|
@@ -831,7 +881,10 @@ class NifiEntity(object):
|
|
831
881
|
else:
|
832
882
|
proxy_entity_children.append(child)
|
833
883
|
proxy_entity_children = list({c.uid: c for c in proxy_entity_children}.values())
|
834
|
-
proxy_entity_children = [
|
884
|
+
proxy_entity_children = [
|
885
|
+
{"entity_id": c.uid, "entity_type": c.entity_type}
|
886
|
+
for c in proxy_entity_children
|
887
|
+
]
|
835
888
|
proxy_otm_children = list(
|
836
889
|
{
|
837
890
|
c.record["os_entity_uid_from"]
|
@@ -847,7 +900,9 @@ class NifiEntity(object):
|
|
847
900
|
full_entity_children,
|
848
901
|
key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
|
849
902
|
)
|
850
|
-
full_entity_children = list(
|
903
|
+
full_entity_children = list(
|
904
|
+
{c.uid: c.to_json() for c in full_entity_children}.values()
|
905
|
+
)
|
851
906
|
children = full_entity_children + proxy_entity_children + proxy_otm_children
|
852
907
|
return {
|
853
908
|
"request": self.request,
|
@@ -931,7 +986,11 @@ class NifiEntity(object):
|
|
931
986
|
):
|
932
987
|
return self._add_entity(
|
933
988
|
os_relationship_workspace,
|
934
|
-
(
|
989
|
+
(
|
990
|
+
LOCAL_RELATIONSHIP_ENTITY_NAME
|
991
|
+
if os_relationship_workspace
|
992
|
+
else RELATIONSHIP_ENTITY_NAME
|
993
|
+
),
|
935
994
|
{
|
936
995
|
**relationship_fields,
|
937
996
|
"os_entity_uid_from": os_entity_uid_from,
|
@@ -1013,7 +1072,9 @@ class NifiEntity(object):
|
|
1013
1072
|
os_relationship_type,
|
1014
1073
|
)
|
1015
1074
|
child_entity._contents = file
|
1016
|
-
child_entity.request["contents_pointer"] =
|
1075
|
+
child_entity.request["contents_pointer"] = (
|
1076
|
+
NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
|
1077
|
+
)
|
1017
1078
|
return child_entity, child_rel
|
1018
1079
|
|
1019
1080
|
def add_tag(self, os_workspace, name, group, order, color):
|
@@ -1026,20 +1087,21 @@ class NifiEntity(object):
|
|
1026
1087
|
)
|
1027
1088
|
|
1028
1089
|
def add_metadata(
|
1029
|
-
self,
|
1030
|
-
json,
|
1031
|
-
merge_method: Callable[[Any, Any], Any],
|
1032
|
-
recurse: Union[bool, int] = False,
|
1090
|
+
self, json, merge_method: Callable[[Any, Any], Any], recurse: Union[bool, int] = False,
|
1033
1091
|
):
|
1034
1092
|
if not self.metadata:
|
1035
1093
|
self.metadata = {}
|
1036
|
-
self.metadata = recursive_update_dict(
|
1094
|
+
self.metadata = recursive_update_dict(
|
1095
|
+
self.metadata, json, merge_method, recurse
|
1096
|
+
)
|
1037
1097
|
|
1038
1098
|
def propagate_metadata(self, to_entity, fields=None, merge_method=lambda _, v2: v2):
|
1039
1099
|
metadata_to_propagate = deepcopy(self.metadata)
|
1040
1100
|
if fields:
|
1041
1101
|
metadata_to_propagate = {k: v for k, v in self.metadata if k in fields}
|
1042
|
-
to_entity.metadata = recursive_update_dict(
|
1102
|
+
to_entity.metadata = recursive_update_dict(
|
1103
|
+
to_entity.metadata, metadata_to_propagate, merge_method
|
1104
|
+
)
|
1043
1105
|
|
1044
1106
|
|
1045
1107
|
def more_recent_than(record_a, record_b):
|
@@ -1098,10 +1160,19 @@ class NifiRoute(Route):
|
|
1098
1160
|
query_params = request.query_params
|
1099
1161
|
processor_suffix = query_params["processor_suffix"]
|
1100
1162
|
body = await request.json()
|
1101
|
-
processor_name =
|
1163
|
+
processor_name = (
|
1164
|
+
"processor."
|
1165
|
+
+ self.processor_name
|
1166
|
+
+ "."
|
1167
|
+
+ op.replace("-", "_")
|
1168
|
+
+ "."
|
1169
|
+
+ processor_suffix
|
1170
|
+
)
|
1102
1171
|
if op not in self.endpoints.keys():
|
1103
1172
|
raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
|
1104
|
-
task_id = await self.celery_executor.send_task(
|
1173
|
+
task_id = await self.celery_executor.send_task(
|
1174
|
+
self.endpoints[op], args=[body, processor_name]
|
1175
|
+
)
|
1105
1176
|
return task_id
|
1106
1177
|
|
1107
1178
|
@staticmethod
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
import py3langid as langid
|
3
|
-
|
3
|
+
from iso639 import Lang
|
4
4
|
|
5
5
|
|
6
6
|
def detect_language(text, min_confidence=None):
|
@@ -11,5 +11,11 @@ def detect_language(text, min_confidence=None):
|
|
11
11
|
if min_confidence and confidence < min_confidence:
|
12
12
|
return None, confidence
|
13
13
|
detected_lang = re.sub("[^A-Za-z]", "", detected_lang).lower()
|
14
|
-
detected_lang =
|
14
|
+
detected_lang = Lang(detected_lang).name.lower()
|
15
15
|
return detected_lang, confidence
|
16
|
+
|
17
|
+
def to_name(alpha2):
|
18
|
+
return Lang(alpha2).name.lower()
|
19
|
+
|
20
|
+
def to_alpha2(name):
|
21
|
+
return Lang(name).pt1
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from .inheritance import is_child_concept
|
2
|
+
|
3
|
+
|
4
|
+
def get_relationships_between_concepts(source, target, ontology):
|
5
|
+
ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
|
6
|
+
from_rels = ontology["concepts"][source]["relationships"]
|
7
|
+
from_rels = [ontology_rels[r] for r in from_rels]
|
8
|
+
from_rels = [
|
9
|
+
r
|
10
|
+
for r in from_rels
|
11
|
+
if is_child_concept(
|
12
|
+
target,
|
13
|
+
r["target_concept"],
|
14
|
+
{
|
15
|
+
"concepts": ontology["concepts"],
|
16
|
+
"relationships": list(ontology_rels.values()),
|
17
|
+
},
|
18
|
+
)
|
19
|
+
]
|
20
|
+
return from_rels
|
21
|
+
|
22
|
+
|
23
|
+
def invert_relationships(rels, ontology):
|
24
|
+
ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
|
25
|
+
inverses = []
|
26
|
+
for rel in rels:
|
27
|
+
inverses.append(ontology_rels[rel]["inverse_name"])
|
28
|
+
return inverses
|
@@ -1,594 +0,0 @@
|
|
1
|
-
from itertools import groupby, chain
|
2
|
-
import time
|
3
|
-
import uuid
|
4
|
-
from octostar.utils.ontology import multiquery_ontology
|
5
|
-
from octostar.utils.exceptions import StopAsyncIterationWithResult
|
6
|
-
|
7
|
-
from .inheritance import is_child_concept
|
8
|
-
|
9
|
-
REL_FETCHED_FIELDS = [
|
10
|
-
"os_relationship_name",
|
11
|
-
"os_entity_uid_from",
|
12
|
-
"os_entity_uid_to",
|
13
|
-
"os_entity_uid",
|
14
|
-
"os_entity_type_from",
|
15
|
-
"os_entity_type_to",
|
16
|
-
"os_workspace",
|
17
|
-
]
|
18
|
-
TARGETS_FETCHED_FIELDS = ["os_entity_uid", "os_concept", "entity_label", "os_workspace"]
|
19
|
-
|
20
|
-
|
21
|
-
class ExecutionMetrics:
|
22
|
-
def __init__(self):
|
23
|
-
self.n_queries = 0
|
24
|
-
self.n_relationships = 0
|
25
|
-
self.n_target_entities = 0
|
26
|
-
self.exec_time = 0.0
|
27
|
-
self.exec_times = {}
|
28
|
-
self.n_cancelled_queries = 0
|
29
|
-
self.relationship_names = set()
|
30
|
-
self.timeout = 0
|
31
|
-
|
32
|
-
def print_metrics(self):
|
33
|
-
print("Execution Metrics:")
|
34
|
-
print(f" Execution Time: {self.exec_time}")
|
35
|
-
print(f" Per-Type Execution Times: {self.exec_times}")
|
36
|
-
print(f" Number of Queries: {self.n_queries}")
|
37
|
-
print(f" Number of Cancelled Queries: {self.n_cancelled_queries}")
|
38
|
-
print(f" Timeout per Query: {self.timeout}")
|
39
|
-
print(f" Number of Relationships: {self.n_relationships}")
|
40
|
-
print(f" Number of Target Entities: {self.n_target_entities}")
|
41
|
-
print(f" Relationship Names: {self.relationship_names}")
|
42
|
-
|
43
|
-
|
44
|
-
async def _get_stream_result(stream):
|
45
|
-
result = None
|
46
|
-
try:
|
47
|
-
async for _ in stream:
|
48
|
-
pass
|
49
|
-
except StopAsyncIterationWithResult as e:
|
50
|
-
result = e.value
|
51
|
-
return result
|
52
|
-
|
53
|
-
|
54
|
-
def _left_join(left_list, right_list, left_keys, right_keys):
|
55
|
-
left_keys = left_keys if isinstance(left_keys, (list, tuple)) else [left_keys]
|
56
|
-
right_keys = right_keys if isinstance(right_keys, (list, tuple)) else [right_keys]
|
57
|
-
|
58
|
-
def _make_composite_key(item, keys):
|
59
|
-
return tuple(item[key] for key in keys)
|
60
|
-
|
61
|
-
left_list.sort(key=lambda item: _make_composite_key(item, left_keys))
|
62
|
-
right_list.sort(key=lambda item: _make_composite_key(item, right_keys))
|
63
|
-
left_groups = groupby(
|
64
|
-
left_list, key=lambda item: _make_composite_key(item, left_keys)
|
65
|
-
)
|
66
|
-
right_groups = groupby(
|
67
|
-
right_list, key=lambda item: _make_composite_key(item, right_keys)
|
68
|
-
)
|
69
|
-
right_dict = {key: list(group) for key, group in right_groups}
|
70
|
-
result = []
|
71
|
-
for left_key_val, left_items in left_groups:
|
72
|
-
left_items = list(left_items)
|
73
|
-
associated_rights = right_dict.get(left_key_val, [])
|
74
|
-
for left_item in left_items:
|
75
|
-
result.append((left_item, associated_rights))
|
76
|
-
return result
|
77
|
-
|
78
|
-
|
79
|
-
async def expand_otm(
|
80
|
-
expanded_entities,
|
81
|
-
entities_by_concept,
|
82
|
-
ontology_rels,
|
83
|
-
concepts_to_otm_rels,
|
84
|
-
metrics,
|
85
|
-
client,
|
86
|
-
timeout,
|
87
|
-
limit,
|
88
|
-
):
|
89
|
-
start_time = time.time()
|
90
|
-
otm_queries = []
|
91
|
-
target_fields = ",".join(
|
92
|
-
["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
|
93
|
-
)
|
94
|
-
for concept_name, rels in concepts_to_otm_rels.items():
|
95
|
-
for rel_name in rels:
|
96
|
-
rel = ontology_rels[rel_name]
|
97
|
-
source_properties = rel["source_properties"].split(",")
|
98
|
-
source_properties_values = [
|
99
|
-
[entity[p] for p in source_properties]
|
100
|
-
for entity in entities_by_concept[concept_name]
|
101
|
-
]
|
102
|
-
source_properties_values = [
|
103
|
-
values
|
104
|
-
for values in source_properties_values
|
105
|
-
if all(value is not None for value in values)
|
106
|
-
]
|
107
|
-
if source_properties_values:
|
108
|
-
target_prop_names = rel["target_properties"].split(",")
|
109
|
-
target_prop_names = ",".join(
|
110
|
-
["`" + prop + "`" for prop in target_prop_names]
|
111
|
-
)
|
112
|
-
source_properties_values = ",".join(
|
113
|
-
[
|
114
|
-
"(" + ",".join(["'" + value + "'" for value in values]) + ")"
|
115
|
-
for values in source_properties_values
|
116
|
-
]
|
117
|
-
)
|
118
|
-
if target_prop_names and source_properties_values:
|
119
|
-
query = f"""SELECT {target_fields} FROM `etimbr`.`{rel['target_concept']}` WHERE ({target_prop_names}) IN ({source_properties_values}) LIMIT {limit}"""
|
120
|
-
otm_queries.append(
|
121
|
-
{
|
122
|
-
"concept_name": concept_name,
|
123
|
-
"query": query,
|
124
|
-
"relationship_name": rel_name,
|
125
|
-
"source_properties": rel["source_properties"].split(","),
|
126
|
-
"target_properties": rel["target_properties"].split(","),
|
127
|
-
}
|
128
|
-
)
|
129
|
-
otm_queries = {str(i): otm_queries[i] for i in range(len(otm_queries))}
|
130
|
-
otm_stream = multiquery_ontology.streaming(
|
131
|
-
sql_queries={
|
132
|
-
query_id: query_data["query"]
|
133
|
-
for query_id, query_data in otm_queries.items()
|
134
|
-
},
|
135
|
-
client=client,
|
136
|
-
timeout=timeout,
|
137
|
-
)
|
138
|
-
metrics.n_queries += len(otm_queries)
|
139
|
-
all_results = await _get_stream_result(otm_stream)
|
140
|
-
for query_id, data in all_results.items():
|
141
|
-
if data == None:
|
142
|
-
metrics.n_cancelled_queries += 1
|
143
|
-
if not data:
|
144
|
-
continue
|
145
|
-
query_data = otm_queries[query_id]
|
146
|
-
data = _left_join(
|
147
|
-
[
|
148
|
-
e[0]
|
149
|
-
for e in expanded_entities.values()
|
150
|
-
if e[0]["os_concept"] == query_data["concept_name"]
|
151
|
-
],
|
152
|
-
data,
|
153
|
-
query_data["source_properties"],
|
154
|
-
query_data["target_properties"],
|
155
|
-
)
|
156
|
-
data = {
|
157
|
-
e[0]["os_entity_uid"]: (
|
158
|
-
e[0],
|
159
|
-
{
|
160
|
-
r["os_entity_uid"]: (
|
161
|
-
{"os_relationship_name": query_data["relationship_name"]},
|
162
|
-
{r["os_entity_uid"]: r},
|
163
|
-
)
|
164
|
-
for r in e[1]
|
165
|
-
},
|
166
|
-
)
|
167
|
-
for e in data
|
168
|
-
}
|
169
|
-
for entity_id, entities in data.items():
|
170
|
-
if entity_id not in expanded_entities:
|
171
|
-
expanded_entities[entity_id] = [entities[0], {}]
|
172
|
-
for sub_entity_id, sub_entities in entities[1].items():
|
173
|
-
if sub_entity_id not in expanded_entities[entity_id][1]:
|
174
|
-
expanded_entities[entity_id][1][sub_entity_id] = [
|
175
|
-
sub_entities[0],
|
176
|
-
{},
|
177
|
-
]
|
178
|
-
expanded_entities[entity_id][1][sub_entity_id][1].update(
|
179
|
-
sub_entities[1]
|
180
|
-
)
|
181
|
-
metrics.exec_times["otm"] = time.time() - start_time
|
182
|
-
return expanded_entities
|
183
|
-
|
184
|
-
|
185
|
-
async def expand_mtm_mixed(
|
186
|
-
expanded_entities,
|
187
|
-
entities_by_concept,
|
188
|
-
ontology,
|
189
|
-
concepts_to_mixed_rels,
|
190
|
-
ontology_rels,
|
191
|
-
metrics,
|
192
|
-
client,
|
193
|
-
timeout,
|
194
|
-
limit,
|
195
|
-
):
|
196
|
-
start_time = time.time()
|
197
|
-
mixed_queries = []
|
198
|
-
match_patterns = {}
|
199
|
-
rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
|
200
|
-
for concept_name, rels in concepts_to_mixed_rels.items():
|
201
|
-
if concept_name not in match_patterns:
|
202
|
-
match_patterns[concept_name] = []
|
203
|
-
match_patterns[concept_name].extend(
|
204
|
-
[entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
|
205
|
-
)
|
206
|
-
for concept_name, entity_ids in match_patterns.items():
|
207
|
-
rels = concepts_to_mixed_rels[concept_name]
|
208
|
-
entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
|
209
|
-
for rel_name in rels:
|
210
|
-
rel = ontology_rels[rel_name]
|
211
|
-
target_concept = rel["target_concept"]
|
212
|
-
relationship_fields = ",".join(
|
213
|
-
[
|
214
|
-
f"`{rel_name}[{target_concept}]_"
|
215
|
-
+ prop
|
216
|
-
+ "` AS "
|
217
|
-
+ "`rel__"
|
218
|
-
+ prop
|
219
|
-
+ "`"
|
220
|
-
for prop in REL_FETCHED_FIELDS
|
221
|
-
]
|
222
|
-
)
|
223
|
-
target_fields = ",".join(
|
224
|
-
[
|
225
|
-
f"`{rel_name}[{target_concept}]."
|
226
|
-
+ prop
|
227
|
-
+ "` AS "
|
228
|
-
+ "`tgt__"
|
229
|
-
+ prop
|
230
|
-
+ "`"
|
231
|
-
for prop in TARGETS_FETCHED_FIELDS
|
232
|
-
]
|
233
|
-
)
|
234
|
-
all_fields = (
|
235
|
-
f"{relationship_fields}, {target_fields}, `os_entity_uid`".strip(", ")
|
236
|
-
)
|
237
|
-
if entity_ids:
|
238
|
-
mixed_queries.append(
|
239
|
-
{
|
240
|
-
"query": f"SELECT {all_fields} FROM `dtimbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
|
241
|
-
}
|
242
|
-
)
|
243
|
-
mixed_queries = {str(i): mixed_queries[i] for i in range(len(mixed_queries))}
|
244
|
-
metrics.n_queries += len(mixed_queries)
|
245
|
-
local_stream = multiquery_ontology.streaming(
|
246
|
-
sql_queries={
|
247
|
-
query_id: query_data["query"]
|
248
|
-
for query_id, query_data in mixed_queries.items()
|
249
|
-
},
|
250
|
-
client=client,
|
251
|
-
timeout=timeout / 2.0,
|
252
|
-
)
|
253
|
-
all_results = await _get_stream_result(local_stream)
|
254
|
-
for (
|
255
|
-
_,
|
256
|
-
data,
|
257
|
-
) in (
|
258
|
-
all_results.items()
|
259
|
-
): ## TO BE TESTED (data is always empty at the moment due to timbr bug)
|
260
|
-
if data == None:
|
261
|
-
metrics.n_cancelled_queries += 1
|
262
|
-
if not data:
|
263
|
-
continue
|
264
|
-
data = _left_join(
|
265
|
-
[e[0] for e in expanded_entities.values()],
|
266
|
-
data,
|
267
|
-
"os_entity_uid",
|
268
|
-
"os_entity_uid",
|
269
|
-
)
|
270
|
-
for elem in data:
|
271
|
-
elem["#rel__os_entity_uid"] = elem["rel__os_entity_uid"] or (
|
272
|
-
"temp-" + str(uuid.uuid4())
|
273
|
-
)
|
274
|
-
data = {
|
275
|
-
e[0]["os_entity_uid"]: (
|
276
|
-
e[0],
|
277
|
-
{
|
278
|
-
rt["#rel__os_entity_uid"]: (
|
279
|
-
{k[5:]: v for k, v in rt.items() if k.startswith("rel__")},
|
280
|
-
{
|
281
|
-
rt["tgt__os_entity_uid"]: {
|
282
|
-
k[5:]: v for k, v in rt.items() if k.startswith("tgt__")
|
283
|
-
}
|
284
|
-
},
|
285
|
-
)
|
286
|
-
for rt in e[1]
|
287
|
-
},
|
288
|
-
)
|
289
|
-
for e in data
|
290
|
-
}
|
291
|
-
data = {
|
292
|
-
e[0]["os_entity_uid"]: (
|
293
|
-
e[0],
|
294
|
-
{r["os_entity_uid"]: ({}, {r["os_entity_uid"]: r}) for r in e[1]},
|
295
|
-
)
|
296
|
-
for e in data
|
297
|
-
}
|
298
|
-
for entity_id, entities in data.items():
|
299
|
-
if entity_id not in expanded_entities:
|
300
|
-
expanded_entities[entity_id] = [entities[0], {}]
|
301
|
-
for sub_entity_id, sub_entities in entities[1].items():
|
302
|
-
if sub_entity_id not in expanded_entities[entity_id][1]:
|
303
|
-
expanded_entities[entity_id][1][sub_entity_id] = [
|
304
|
-
sub_entities[0],
|
305
|
-
{},
|
306
|
-
]
|
307
|
-
expanded_entities[entity_id][1][sub_entity_id][1].update(
|
308
|
-
sub_entities[1]
|
309
|
-
)
|
310
|
-
metrics.exec_times["mtm_mixed"] = time.time() - start_time
|
311
|
-
return expanded_entities
|
312
|
-
|
313
|
-
|
314
|
-
async def expand_mtm_local(
|
315
|
-
expanded_entities,
|
316
|
-
entities_by_concept,
|
317
|
-
ontology_rels,
|
318
|
-
concepts_to_local_rels,
|
319
|
-
metrics,
|
320
|
-
client,
|
321
|
-
timeout,
|
322
|
-
limit,
|
323
|
-
):
|
324
|
-
start_time = time.time()
|
325
|
-
local_queries = {"from": [], "to": []}
|
326
|
-
match_patterns = {}
|
327
|
-
target_fields = ",".join(
|
328
|
-
["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
|
329
|
-
)
|
330
|
-
rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
|
331
|
-
for concept_name in concepts_to_local_rels.keys():
|
332
|
-
if concept_name not in match_patterns:
|
333
|
-
match_patterns[concept_name] = []
|
334
|
-
match_patterns[concept_name].extend(
|
335
|
-
[entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
|
336
|
-
)
|
337
|
-
for concept_name, entity_ids in match_patterns.items():
|
338
|
-
rel_names = concepts_to_local_rels[concept_name]
|
339
|
-
inverse_names = [
|
340
|
-
ontology_rels[rel_name]["inverse_name"] for rel_name in rel_names
|
341
|
-
]
|
342
|
-
rel_names = ",".join(["'" + rel_name + "'" for rel_name in rel_names])
|
343
|
-
inverse_names = ",".join(["'" + rel_name + "'" for rel_name in inverse_names])
|
344
|
-
entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
|
345
|
-
if rel_names and inverse_names and entity_ids:
|
346
|
-
local_queries["from"].append(
|
347
|
-
{
|
348
|
-
"query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({rel_names}) AND `os_entity_uid_from` IN ({entity_ids}) LIMIT {limit}"
|
349
|
-
}
|
350
|
-
)
|
351
|
-
local_queries["to"].append(
|
352
|
-
{
|
353
|
-
"query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({inverse_names}) AND `os_entity_uid_to` IN ({entity_ids}) LIMIT {limit}"
|
354
|
-
}
|
355
|
-
)
|
356
|
-
local_queries = {
|
357
|
-
**{
|
358
|
-
"from_" + str(i): local_queries["from"][i]
|
359
|
-
for i in range(len(local_queries["from"]))
|
360
|
-
},
|
361
|
-
**{
|
362
|
-
"to_" + str(i): local_queries["to"][i]
|
363
|
-
for i in range(len(local_queries["to"]))
|
364
|
-
},
|
365
|
-
}
|
366
|
-
metrics.n_queries += len(local_queries)
|
367
|
-
local_stream = multiquery_ontology.streaming(
|
368
|
-
sql_queries={
|
369
|
-
query_id: query_data["query"]
|
370
|
-
for query_id, query_data in local_queries.items()
|
371
|
-
},
|
372
|
-
client=client,
|
373
|
-
timeout=timeout / 2.0,
|
374
|
-
)
|
375
|
-
all_results = await _get_stream_result(local_stream)
|
376
|
-
middle_entities = []
|
377
|
-
for query_id, data in all_results.items():
|
378
|
-
if data == None:
|
379
|
-
metrics.n_cancelled_queries += 1
|
380
|
-
if not data:
|
381
|
-
continue
|
382
|
-
query_id = query_id.split("_")
|
383
|
-
data = _left_join(
|
384
|
-
[e[0] for e in expanded_entities.values()],
|
385
|
-
data,
|
386
|
-
"os_entity_uid",
|
387
|
-
"os_entity_uid_" + query_id[0],
|
388
|
-
)
|
389
|
-
inverse_direction = "to" if query_id[0] == "from" else "from"
|
390
|
-
data = {
|
391
|
-
e[0]["os_entity_uid"]: (e[0], {r["os_entity_uid"]: (r, {}) for r in e[1]})
|
392
|
-
for e in data
|
393
|
-
}
|
394
|
-
middle_entities.extend(
|
395
|
-
[(inverse_direction, r) for e in data.values() for r in e[1].values()]
|
396
|
-
)
|
397
|
-
for entity_id, entities in data.items():
|
398
|
-
if entity_id not in expanded_entities:
|
399
|
-
expanded_entities[entity_id] = [entities[0], {}]
|
400
|
-
for sub_entity_id, sub_entities in entities[1].items():
|
401
|
-
if sub_entity_id not in expanded_entities[entity_id][1]:
|
402
|
-
expanded_entities[entity_id][1][sub_entity_id] = [
|
403
|
-
sub_entities[0],
|
404
|
-
{},
|
405
|
-
]
|
406
|
-
expanded_entities[entity_id][1][sub_entity_id][1].update(
|
407
|
-
sub_entities[1]
|
408
|
-
)
|
409
|
-
middle_entities = [
|
410
|
-
{
|
411
|
-
"entity_id": r[1][0]["os_entity_uid_" + r[0]],
|
412
|
-
"concept_name": r[1][0]["os_entity_type_" + r[0]],
|
413
|
-
"direction": r[0],
|
414
|
-
"relationship": r[1],
|
415
|
-
}
|
416
|
-
for r in middle_entities
|
417
|
-
]
|
418
|
-
middle_entities = sorted(middle_entities, key=lambda x: x["concept_name"])
|
419
|
-
middle_entities = groupby(middle_entities, key=lambda x: x["concept_name"])
|
420
|
-
middle_entities = {e[0]: list(e[1]) for e in middle_entities}
|
421
|
-
local_queries_2 = []
|
422
|
-
for concept_name, entities in middle_entities.items():
|
423
|
-
entity_ids = ",".join(["'" + e["entity_id"] + "'" for e in entities])
|
424
|
-
if entity_ids:
|
425
|
-
local_queries_2.append(
|
426
|
-
{
|
427
|
-
"query": f"SELECT {target_fields} FROM `timbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
|
428
|
-
}
|
429
|
-
)
|
430
|
-
local_queries_2 = {str(i): local_queries_2[i] for i in range(len(local_queries_2))}
|
431
|
-
metrics.n_queries += len(local_queries_2)
|
432
|
-
local_stream_2 = multiquery_ontology.streaming(
|
433
|
-
sql_queries={
|
434
|
-
query_id: query_data["query"]
|
435
|
-
for query_id, query_data in local_queries_2.items()
|
436
|
-
},
|
437
|
-
client=client,
|
438
|
-
timeout=timeout / 2.0,
|
439
|
-
)
|
440
|
-
all_results = await _get_stream_result(local_stream_2)
|
441
|
-
middle_entities = list(chain(*middle_entities.values()))
|
442
|
-
for query_id, data in all_results.items():
|
443
|
-
if data == None:
|
444
|
-
metrics.n_cancelled_queries += 1
|
445
|
-
if not data:
|
446
|
-
continue
|
447
|
-
data = _left_join(middle_entities, data, "entity_id", "os_entity_uid")
|
448
|
-
for entry in data:
|
449
|
-
relationship_targets = entry[0]["relationship"]
|
450
|
-
for target_entity in entry[1]:
|
451
|
-
relationship_targets[1][target_entity["os_entity_uid"]] = target_entity
|
452
|
-
if data:
|
453
|
-
for rel in data:
|
454
|
-
targets = rel[0]["relationship"][1]
|
455
|
-
rel_uid = rel[0]["relationship"][0]["os_entity_uid"]
|
456
|
-
for entity in expanded_entities.values():
|
457
|
-
entity_rels = entity[1]
|
458
|
-
if rel_uid in entity_rels:
|
459
|
-
entity_rels[rel_uid][1].update(targets)
|
460
|
-
metrics.exec_times["mtm_local"] = time.time() - start_time
|
461
|
-
return expanded_entities
|
462
|
-
|
463
|
-
|
464
|
-
async def expand_entities(
|
465
|
-
entities,
|
466
|
-
ontology,
|
467
|
-
relationship_mappings_info,
|
468
|
-
client,
|
469
|
-
relationship_names_by_entity_type=None,
|
470
|
-
batch_size=10000,
|
471
|
-
avg_limit_per_entity=20,
|
472
|
-
timeout=10.0,
|
473
|
-
):
|
474
|
-
metrics = ExecutionMetrics()
|
475
|
-
metrics.timeout = timeout
|
476
|
-
start_time = time.time()
|
477
|
-
ordered_entities = sorted(entities, key=lambda x: x["entity_type"])
|
478
|
-
ordered_entities = list({e["os_entity_uid"]: e for e in entities}.values())
|
479
|
-
entity_batches = [
|
480
|
-
ordered_entities[i : i + batch_size]
|
481
|
-
for i in range(0, len(ordered_entities), batch_size)
|
482
|
-
]
|
483
|
-
expanded_entities = {}
|
484
|
-
for batch in entity_batches:
|
485
|
-
new_entities = await expand_entities_batch(
|
486
|
-
batch,
|
487
|
-
ontology,
|
488
|
-
relationship_mappings_info,
|
489
|
-
relationship_names_by_entity_type,
|
490
|
-
metrics,
|
491
|
-
client,
|
492
|
-
avg_limit_per_entity,
|
493
|
-
timeout,
|
494
|
-
)
|
495
|
-
expanded_entities.update(new_entities)
|
496
|
-
metrics.exec_time = time.time() - start_time
|
497
|
-
for _, entity_data in expanded_entities.items():
|
498
|
-
for rel in entity_data[1].values():
|
499
|
-
if rel[0]:
|
500
|
-
metrics.n_relationships += 1
|
501
|
-
metrics.relationship_names.add(rel[0]["os_relationship_name"])
|
502
|
-
metrics.n_target_entities += len(rel[1])
|
503
|
-
metrics.relationship_names = list(metrics.relationship_names)
|
504
|
-
expanded_entities = [
|
505
|
-
(e[0], [(r[0], list(r[1].values())[0]) for r in e[1].values()])
|
506
|
-
for e in expanded_entities.values()
|
507
|
-
]
|
508
|
-
return expanded_entities, metrics
|
509
|
-
|
510
|
-
|
511
|
-
async def expand_entities_batch(
|
512
|
-
entities,
|
513
|
-
ontology,
|
514
|
-
relationship_mappings,
|
515
|
-
relationship_names_by_entity_type,
|
516
|
-
metrics,
|
517
|
-
client,
|
518
|
-
avg_limit_per_entity=20,
|
519
|
-
timeout=10.0,
|
520
|
-
):
|
521
|
-
limit = avg_limit_per_entity * len(entities)
|
522
|
-
expanded_entities = {e["os_entity_uid"]: [e, {}] for e in entities}
|
523
|
-
entities_by_concept = sorted(entities, key=lambda x: x["entity_type"])
|
524
|
-
entities_by_concept = groupby(entities_by_concept, key=lambda x: x["entity_type"])
|
525
|
-
entities_by_concept = {e[0]: list(e[1]) for e in entities_by_concept}
|
526
|
-
ontology_rels = {r["relationship_name"]: r for r in ontology["relationships"]}
|
527
|
-
concepts_to_rels = {
|
528
|
-
cn: c["relationships"] for cn, c in ontology["concepts"].items()
|
529
|
-
}
|
530
|
-
concepts_to_mixed_rels = {}
|
531
|
-
concepts_to_otm_rels = {}
|
532
|
-
concepts_to_local_rels = {}
|
533
|
-
for concept_name in entities_by_concept.keys():
|
534
|
-
relationship_names = None
|
535
|
-
if relationship_names_by_entity_type:
|
536
|
-
relationship_names = set()
|
537
|
-
for cn_name in relationship_names_by_entity_type.keys():
|
538
|
-
if is_child_concept(concept_name, cn_name, ontology):
|
539
|
-
relationship_names = relationship_names.union(
|
540
|
-
set(relationship_names_by_entity_type[cn_name])
|
541
|
-
)
|
542
|
-
rels = concepts_to_rels[concept_name]
|
543
|
-
if relationship_names:
|
544
|
-
rels = [r for r in rels if r in relationship_names]
|
545
|
-
filtered_mtm_rels = []
|
546
|
-
filtered_otm_rels = []
|
547
|
-
filtered_local_mtm_rels = []
|
548
|
-
for rel in rels:
|
549
|
-
if not ontology_rels[rel]["is_mtm"]:
|
550
|
-
filtered_otm_rels.append(rel)
|
551
|
-
elif rel in relationship_mappings["unmapped"]:
|
552
|
-
continue
|
553
|
-
elif rel in relationship_mappings["local_only"]:
|
554
|
-
filtered_local_mtm_rels.append(rel)
|
555
|
-
else:
|
556
|
-
filtered_mtm_rels.append(rel)
|
557
|
-
concepts_to_mixed_rels[concept_name] = filtered_mtm_rels
|
558
|
-
concepts_to_otm_rels[concept_name] = filtered_otm_rels
|
559
|
-
concepts_to_local_rels[concept_name] = filtered_local_mtm_rels
|
560
|
-
# OTM QUERIES: fetch target entities directly from the target tables with an IN statement
|
561
|
-
expanded_entities = await expand_otm(
|
562
|
-
expanded_entities,
|
563
|
-
entities_by_concept,
|
564
|
-
ontology_rels,
|
565
|
-
concepts_to_otm_rels,
|
566
|
-
metrics,
|
567
|
-
client,
|
568
|
-
timeout,
|
569
|
-
limit,
|
570
|
-
)
|
571
|
-
# LOCAL MTM QUERIES: fetch relationships directly from the os_workspace_relationship table, then fetch target entities
|
572
|
-
expanded_entities = await expand_mtm_local(
|
573
|
-
expanded_entities,
|
574
|
-
entities_by_concept,
|
575
|
-
ontology_rels,
|
576
|
-
concepts_to_local_rels,
|
577
|
-
metrics,
|
578
|
-
client,
|
579
|
-
timeout,
|
580
|
-
limit,
|
581
|
-
)
|
582
|
-
# MIXED MTM QUERIES: query via timbr to make the most of JOINs
|
583
|
-
expanded_entities = await expand_mtm_mixed(
|
584
|
-
expanded_entities,
|
585
|
-
entities_by_concept,
|
586
|
-
ontology,
|
587
|
-
concepts_to_mixed_rels,
|
588
|
-
ontology_rels,
|
589
|
-
metrics,
|
590
|
-
client,
|
591
|
-
timeout,
|
592
|
-
limit,
|
593
|
-
)
|
594
|
-
return expanded_entities
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|