streamlit-octostar-utils 0.2.12a2__tar.gz → 0.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/PKG-INFO +1 -1
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/pyproject.toml +1 -1
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/nifi.py +143 -71
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/nlp/language.py +8 -2
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/ontology/inheritance.py +1 -1
- streamlit_octostar_utils-0.2.13/streamlit_octostar_utils/ontology/relationships.py +28 -0
- streamlit_octostar_utils-0.2.12a2/streamlit_octostar_utils/ontology/expand_entities.py +0 -594
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/LICENSE +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/README.md +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/dict.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/filetypes.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/timestamp.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/nlp/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/nlp/ner.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/client.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/context.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/permissions.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/ontology/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/ontology/validation.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/style/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/style/common.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/__init__.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
- {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -163,12 +163,16 @@ class NifiEntityProxy(object):
|
|
163
163
|
if child_entity.uid == uid_to_search:
|
164
164
|
found_entity = child_entity
|
165
165
|
else:
|
166
|
-
found_entity = _recursive_search_expanded_proxy(
|
166
|
+
found_entity = _recursive_search_expanded_proxy(
|
167
|
+
child_entity._proxy, uid_to_search
|
168
|
+
)
|
167
169
|
if found_entity:
|
168
170
|
return found_entity
|
169
171
|
|
170
172
|
if not self._proxy:
|
171
|
-
main_entities = itertools.chain(
|
173
|
+
main_entities = itertools.chain(
|
174
|
+
*[b.entities for b in self.context.in_batches]
|
175
|
+
)
|
172
176
|
main_entities = {e.record["entity_id"]: e for e in main_entities}
|
173
177
|
if main_entities.get(self.uid):
|
174
178
|
self._proxy = main_entities.get(self.uid)
|
@@ -179,7 +183,9 @@ class NifiEntityProxy(object):
|
|
179
183
|
self._proxy = found_entity._proxy
|
180
184
|
return self._proxy
|
181
185
|
## TODO: Try to get the entity from the database with query_ontology()
|
182
|
-
raise AttributeError(
|
186
|
+
raise AttributeError(
|
187
|
+
f"Cannot find children with UUID {self.uid}! It may exist in the database?"
|
188
|
+
)
|
183
189
|
|
184
190
|
def __getattr__(self, name):
|
185
191
|
if name in self.__dict__:
|
@@ -213,24 +219,28 @@ class NifiFragmenter(object):
|
|
213
219
|
raise ValueError("Must have at least 2 entities for fragmentation")
|
214
220
|
identifier = str(uuid.uuid4())
|
215
221
|
for i, entity in enumerate(fragments):
|
216
|
-
travel_dict(
|
217
|
-
|
218
|
-
)
|
222
|
+
travel_dict(
|
223
|
+
entity.request["nifi_attributes"], fragmenter_keylist.split("."), "w"
|
224
|
+
)({"identifier": identifier, "count": count, "index": i})
|
219
225
|
if "fragment" not in entity.request["config"]:
|
220
226
|
entity.request["config"]["fragment"] = {}
|
221
227
|
if "fragments_stack" not in entity.request["config"]["fragment"]:
|
222
228
|
entity.request["config"]["fragment"]["fragments_stack"] = []
|
223
|
-
entity.request["config"]["fragment"]["fragments_stack"].insert(
|
224
|
-
|
225
|
-
"fragments_stack"
|
226
|
-
]
|
227
|
-
travel_dict(entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w")(
|
228
|
-
{"identifier": identifier, "count": count, "index": i}
|
229
|
+
entity.request["config"]["fragment"]["fragments_stack"].insert(
|
230
|
+
0, fragmenter_keylist
|
229
231
|
)
|
232
|
+
entity.request["nifi_attributes"]["fragments_stack"] = entity.request[
|
233
|
+
"config"
|
234
|
+
]["fragment"]["fragments_stack"]
|
235
|
+
travel_dict(
|
236
|
+
entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w"
|
237
|
+
)({"identifier": identifier, "count": count, "index": i})
|
230
238
|
|
231
239
|
def push_defragment_strategy(fragment, defragmenter_config):
|
232
240
|
pointer = fragment.request["config"]
|
233
|
-
last_fragmenter_keylist = fragment.request["config"]["fragment"][
|
241
|
+
last_fragmenter_keylist = fragment.request["config"]["fragment"][
|
242
|
+
"fragments_stack"
|
243
|
+
][0]
|
234
244
|
for k in ("fragment." + last_fragmenter_keylist).split("."):
|
235
245
|
if not pointer.get(k):
|
236
246
|
pointer[k] = {}
|
@@ -249,7 +259,7 @@ class NifiEntityBatch(object):
|
|
249
259
|
|
250
260
|
class NifiContextManager(object):
|
251
261
|
HEADLESS_PROCESSOR_NAME = "headless"
|
252
|
-
|
262
|
+
|
253
263
|
class SyncFlag(Enum):
|
254
264
|
UPSERT_ENTITY_ALL = 0 # bool
|
255
265
|
UPSERT_ENTITY_SPECIFIC_FIELDS = 1 # 'fields': list of record fields
|
@@ -270,7 +280,9 @@ class NifiContextManager(object):
|
|
270
280
|
@property
|
271
281
|
def ontology(self):
|
272
282
|
if not self._ontology:
|
273
|
-
self._ontology = fetch_ontology_data.sync(
|
283
|
+
self._ontology = fetch_ontology_data.sync(
|
284
|
+
ontology_name=self.ontology_name, client=self.client
|
285
|
+
)
|
274
286
|
return self._ontology
|
275
287
|
|
276
288
|
def _config_get(entity, keylist):
|
@@ -339,7 +351,9 @@ class NifiContextManager(object):
|
|
339
351
|
return self
|
340
352
|
|
341
353
|
def get_workspaces_permissions(self, workspace_ids):
|
342
|
-
permissions_to_fetch = list(
|
354
|
+
permissions_to_fetch = list(
|
355
|
+
set(workspace_ids).difference(set(list(self.permissions.keys())))
|
356
|
+
)
|
343
357
|
if permissions_to_fetch:
|
344
358
|
permissions = get_permissions.sync(permissions_to_fetch, client=self.client)
|
345
359
|
self.permissions.update(permissions)
|
@@ -369,13 +383,18 @@ class NifiContextManager(object):
|
|
369
383
|
entities.append(entity)
|
370
384
|
for child_entity in entity.children_entities:
|
371
385
|
if not child_entity.drop_on_output:
|
372
|
-
if
|
386
|
+
if (
|
387
|
+
child_entity.output_as_independent
|
388
|
+
or child_entity.output_as_child
|
389
|
+
):
|
373
390
|
if processor_name != NifiContextManager.HEADLESS_PROCESSOR_NAME:
|
374
391
|
child_entity.request["last_processor_name"] = processor_name
|
375
392
|
if child_entity.output_as_independent:
|
376
393
|
if not child_entity._proxy:
|
377
394
|
child_entity.fetch_proxy()
|
378
|
-
entities.extend(
|
395
|
+
entities.extend(
|
396
|
+
_process_entity(child_entity._proxy, processor_name)
|
397
|
+
)
|
379
398
|
return entities
|
380
399
|
|
381
400
|
entities = itertools.chain(*[b.entities for b in entity_batches])
|
@@ -386,7 +405,9 @@ class NifiContextManager(object):
|
|
386
405
|
all_entities,
|
387
406
|
key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
|
388
407
|
)
|
389
|
-
self.out_entities = list(
|
408
|
+
self.out_entities = list(
|
409
|
+
{e.record["entity_id"]: e for e in all_entities}.values()
|
410
|
+
)
|
390
411
|
self.sync_entities()
|
391
412
|
return [entity for entity in self.jsonify(self.out_entities)["content"]]
|
392
413
|
|
@@ -394,19 +415,23 @@ class NifiContextManager(object):
|
|
394
415
|
error_response = DefaultErrorRoute.format_error(exc)
|
395
416
|
entity.request["exception"]["code"] = error_response.status_code
|
396
417
|
entity.request["exception"]["body"] = json.loads(error_response.body)["message"]
|
397
|
-
travel_dict(
|
398
|
-
entity.request["
|
399
|
-
)
|
400
|
-
travel_dict(
|
401
|
-
entity.request["
|
402
|
-
)
|
418
|
+
travel_dict(
|
419
|
+
entity.request["nifi_attributes"], ["invokehttp", "response", "body"], "w"
|
420
|
+
)(entity.request["exception"]["body"])
|
421
|
+
travel_dict(
|
422
|
+
entity.request["nifi_attributes"], ["invokehttp", "response", "code"], "w"
|
423
|
+
)(entity.request["exception"]["code"])
|
403
424
|
entity.request["nifi_attributes"]["raised_exc"] = True
|
404
425
|
|
405
426
|
def sync_entities(self):
|
406
427
|
if not self.lazy_sync:
|
407
428
|
entities = self.out_entities
|
408
429
|
else:
|
409
|
-
entities = [
|
430
|
+
entities = [
|
431
|
+
e
|
432
|
+
for e in self.out_entities
|
433
|
+
if e.record["entity_id"] in self.nonlazy_sync_ids
|
434
|
+
]
|
410
435
|
if not entities:
|
411
436
|
return
|
412
437
|
reserved_fields = [
|
@@ -436,16 +461,18 @@ class NifiContextManager(object):
|
|
436
461
|
for entity in entities:
|
437
462
|
if entity.sync_params.get(NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS):
|
438
463
|
concept_name = entity.record["entity_type"]
|
439
|
-
rels_to_fetch = entity.sync_params.get(
|
464
|
+
rels_to_fetch = entity.sync_params.get(
|
465
|
+
NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS, []
|
466
|
+
)
|
440
467
|
for rel in rels_to_fetch:
|
441
468
|
if rel not in fetch_relationships_entities:
|
442
469
|
fetch_relationships_entities[rel] = []
|
443
470
|
fetch_relationships_entities[rel].append(entity)
|
444
471
|
if concept_name not in fetch_concept_relationships:
|
445
472
|
fetch_concept_relationships[concept_name] = set()
|
446
|
-
fetch_concept_relationships[concept_name] = fetch_concept_relationships[
|
447
|
-
|
448
|
-
)
|
473
|
+
fetch_concept_relationships[concept_name] = fetch_concept_relationships[
|
474
|
+
concept_name
|
475
|
+
].union(set(rels_to_fetch))
|
449
476
|
for k in fetch_concept_relationships.keys():
|
450
477
|
fetch_concept_relationships[k] = list(fetch_concept_relationships[k])
|
451
478
|
# UPSERT ENTITIES
|
@@ -491,7 +518,7 @@ class NifiContextManager(object):
|
|
491
518
|
file.request["is_temporary"] = False
|
492
519
|
file.request["entity_timestamp"] = file.record["os_last_updated_at"]
|
493
520
|
# FETCH RELATIONSHIPS
|
494
|
-
|
521
|
+
'''
|
495
522
|
if fetch_relationships_entities:
|
496
523
|
relationship_mappings_info = relationship_mappings.sync_detailed(
|
497
524
|
client=self.client
|
@@ -547,7 +574,7 @@ class NifiContextManager(object):
|
|
547
574
|
child_rel.request["entity_timestamp"] = rel.get(
|
548
575
|
"os_last_updated_at"
|
549
576
|
)
|
550
|
-
|
577
|
+
'''
|
551
578
|
# CLEAN SYNC PARAMS
|
552
579
|
for entity in entities:
|
553
580
|
entity.sync_params = {}
|
@@ -556,10 +583,15 @@ class NifiContextManager(object):
|
|
556
583
|
for entity in entities:
|
557
584
|
fields = set()
|
558
585
|
|
559
|
-
if
|
586
|
+
if (
|
587
|
+
entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_ALL)
|
588
|
+
or entity.request["is_temporary"]
|
589
|
+
):
|
560
590
|
fields = fields.union(set(list(entity.record.keys())))
|
561
591
|
|
562
|
-
if entity.sync_params.get(
|
592
|
+
if entity.sync_params.get(
|
593
|
+
NifiContextManager.SyncFlag.UPSERT_ENTITY_SPECIFIC_FIELDS
|
594
|
+
):
|
563
595
|
fields = fields.union(
|
564
596
|
set(
|
565
597
|
entity.sync_params.get(
|
@@ -570,7 +602,9 @@ class NifiContextManager(object):
|
|
570
602
|
)
|
571
603
|
)
|
572
604
|
if fields:
|
573
|
-
entities_to_upsert.append(
|
605
|
+
entities_to_upsert.append(
|
606
|
+
(entity, [f for f in list(fields) if f not in reserved_fields])
|
607
|
+
)
|
574
608
|
|
575
609
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
576
610
|
if exc_val is not None:
|
@@ -584,7 +618,9 @@ class NifiContextManager(object):
|
|
584
618
|
for entity in entities:
|
585
619
|
if isinstance(entity, NifiEntityProxy):
|
586
620
|
children.extend(entity.children_entities)
|
587
|
-
children.extend(
|
621
|
+
children.extend(
|
622
|
+
_recursive_collect_proxies(entity.children_entities)
|
623
|
+
)
|
588
624
|
return children
|
589
625
|
|
590
626
|
all_proxies = _recursive_collect_proxies(entities)
|
@@ -597,21 +633,15 @@ class NifiContextManager(object):
|
|
597
633
|
|
598
634
|
|
599
635
|
class NifiEntity(object):
|
600
|
-
def __init__(
|
636
|
+
def __init__(
|
637
|
+
self, context, request, record, annotations, all_independent_uids, children=[], contents=None
|
638
|
+
):
|
601
639
|
self.context = context
|
602
640
|
self.request = request
|
603
641
|
self.record = record
|
604
642
|
self.annotations = annotations
|
605
|
-
assert (
|
606
|
-
|
607
|
-
and self.record.get("entity_id")
|
608
|
-
and self.record["os_entity_uid"] == self.record["entity_id"]
|
609
|
-
)
|
610
|
-
assert (
|
611
|
-
self.record.get("os_concept")
|
612
|
-
and self.record.get("entity_type")
|
613
|
-
and self.record["os_concept"] == self.record["entity_type"]
|
614
|
-
)
|
643
|
+
assert self.record.get("os_entity_uid") and self.record.get("entity_id") and self.record["os_entity_uid"] == self.record["entity_id"]
|
644
|
+
assert self.record.get("os_concept") and self.record.get("entity_type") and self.record["os_concept"] == self.record["entity_type"]
|
615
645
|
if "entity_label" not in self.record:
|
616
646
|
self.record["entity_label"] = self.label
|
617
647
|
children = [c for c in children if isinstance(c, (str, dict))]
|
@@ -631,7 +661,9 @@ class NifiEntity(object):
|
|
631
661
|
child_types = [c["entity_type"] for c in proxy_entity_children] + [
|
632
662
|
c["record"]["entity_type"] for c in full_entity_children
|
633
663
|
]
|
634
|
-
output_as_child = [False] * len(proxy_entity_children) + [True] * len(
|
664
|
+
output_as_child = [False] * len(proxy_entity_children) + [True] * len(
|
665
|
+
full_entity_children
|
666
|
+
)
|
635
667
|
output_as_independent = [uid in all_independent_uids for uid in child_uids]
|
636
668
|
full_entity_children = [
|
637
669
|
NifiEntity(
|
@@ -645,7 +677,9 @@ class NifiEntity(object):
|
|
645
677
|
)
|
646
678
|
for c in full_entity_children
|
647
679
|
]
|
648
|
-
proxy_otm_children = [
|
680
|
+
proxy_otm_children = [
|
681
|
+
NifiOTMRelationshipProxy(**otm_child) for otm_child in proxy_otm_children
|
682
|
+
]
|
649
683
|
child_proxies = [None] * len(proxy_entity_children) + full_entity_children
|
650
684
|
self.children = [
|
651
685
|
NifiEntityProxy(
|
@@ -673,18 +707,21 @@ class NifiEntity(object):
|
|
673
707
|
|
674
708
|
@property
|
675
709
|
def sync_params(self):
|
676
|
-
return {
|
710
|
+
return {
|
711
|
+
NifiContextManager.SyncFlag[k]: v
|
712
|
+
for k, v in (self.request.get("sync_params") or {}).items()
|
713
|
+
}
|
677
714
|
|
678
715
|
@sync_params.setter
|
679
716
|
def sync_params(self, new_params):
|
680
717
|
self.request["sync_params"] = {
|
681
|
-
(k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v
|
718
|
+
(k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v
|
719
|
+
for k, v in new_params.items()
|
682
720
|
}
|
683
721
|
|
684
722
|
@property
|
685
723
|
def metadata(self):
|
686
724
|
return self.annotations
|
687
|
-
|
688
725
|
@metadata.setter
|
689
726
|
def metadata(self, new_metadata):
|
690
727
|
self.annotations = new_metadata
|
@@ -707,7 +744,9 @@ class NifiEntity(object):
|
|
707
744
|
contents_pointer = deepcopy(self.request["contents_pointer"])
|
708
745
|
ptr_location = contents_pointer.get("location")
|
709
746
|
if ptr_location == "attachment" and not contents_pointer.get("pointer"):
|
710
|
-
contents_pointer["pointer"] =
|
747
|
+
contents_pointer["pointer"] = (
|
748
|
+
f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
|
749
|
+
)
|
711
750
|
return self.request["contents_pointer"]
|
712
751
|
|
713
752
|
@contents_pointer.setter
|
@@ -727,7 +766,9 @@ class NifiEntity(object):
|
|
727
766
|
return list(
|
728
767
|
filter(
|
729
768
|
lambda x: isinstance(x, NifiOTMRelationshipProxy)
|
730
|
-
or is_child_concept_fn(
|
769
|
+
or is_child_concept_fn(
|
770
|
+
x.entity_type, RELATIONSHIP_ENTITY_NAME, self.context.ontology
|
771
|
+
),
|
731
772
|
self.children,
|
732
773
|
)
|
733
774
|
)
|
@@ -746,12 +787,18 @@ class NifiEntity(object):
|
|
746
787
|
)
|
747
788
|
if (
|
748
789
|
self.record.get("os_workspace")
|
749
|
-
and (
|
790
|
+
and (
|
791
|
+
permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE
|
792
|
+
)
|
793
|
+
>= PermissionLevel.WRITE
|
750
794
|
):
|
751
795
|
return self.record["os_workspace"]
|
752
796
|
elif (
|
753
797
|
self.request.get("fallback_os_workspace")
|
754
|
-
and (
|
798
|
+
and (
|
799
|
+
permissions.get(self.request.get("fallback_os_workspace"))
|
800
|
+
or PermissionLevel.NONE
|
801
|
+
)
|
755
802
|
>= PermissionLevel.WRITE
|
756
803
|
):
|
757
804
|
return self.request["fallback_os_workspace"]
|
@@ -785,10 +832,14 @@ class NifiEntity(object):
|
|
785
832
|
entity_type = None
|
786
833
|
if isinstance(self, NifiEntityProxy):
|
787
834
|
entity_type = self.entity_type
|
788
|
-
return entity_type == type or is_child_concept_fn(
|
835
|
+
return entity_type == type or is_child_concept_fn(
|
836
|
+
entity_type, type, self.context.ontology
|
837
|
+
)
|
789
838
|
else:
|
790
839
|
entity_type = self.record["entity_type"]
|
791
|
-
return
|
840
|
+
return (
|
841
|
+
entity_type == type or type in self.request["ontology_info"]["parents"]
|
842
|
+
)
|
792
843
|
|
793
844
|
def is_fragmented(self) -> bool:
|
794
845
|
return bool(self.request["config"].get("fragment", {}).get("fragments_stack"))
|
@@ -804,7 +855,7 @@ class NifiEntity(object):
|
|
804
855
|
if _is_sub_fragment_recursive(value):
|
805
856
|
return True
|
806
857
|
return False
|
807
|
-
|
858
|
+
|
808
859
|
if not self.is_fragmented():
|
809
860
|
return True
|
810
861
|
fragment = entity.request.get("config", {}).get("fragment", {})
|
@@ -831,7 +882,10 @@ class NifiEntity(object):
|
|
831
882
|
else:
|
832
883
|
proxy_entity_children.append(child)
|
833
884
|
proxy_entity_children = list({c.uid: c for c in proxy_entity_children}.values())
|
834
|
-
proxy_entity_children = [
|
885
|
+
proxy_entity_children = [
|
886
|
+
{"entity_id": c.uid, "entity_type": c.entity_type}
|
887
|
+
for c in proxy_entity_children
|
888
|
+
]
|
835
889
|
proxy_otm_children = list(
|
836
890
|
{
|
837
891
|
c.record["os_entity_uid_from"]
|
@@ -847,7 +901,9 @@ class NifiEntity(object):
|
|
847
901
|
full_entity_children,
|
848
902
|
key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
|
849
903
|
)
|
850
|
-
full_entity_children = list(
|
904
|
+
full_entity_children = list(
|
905
|
+
{c.uid: c.to_json() for c in full_entity_children}.values()
|
906
|
+
)
|
851
907
|
children = full_entity_children + proxy_entity_children + proxy_otm_children
|
852
908
|
return {
|
853
909
|
"request": self.request,
|
@@ -931,7 +987,11 @@ class NifiEntity(object):
|
|
931
987
|
):
|
932
988
|
return self._add_entity(
|
933
989
|
os_relationship_workspace,
|
934
|
-
(
|
990
|
+
(
|
991
|
+
LOCAL_RELATIONSHIP_ENTITY_NAME
|
992
|
+
if os_relationship_workspace
|
993
|
+
else RELATIONSHIP_ENTITY_NAME
|
994
|
+
),
|
935
995
|
{
|
936
996
|
**relationship_fields,
|
937
997
|
"os_entity_uid_from": os_entity_uid_from,
|
@@ -1013,7 +1073,9 @@ class NifiEntity(object):
|
|
1013
1073
|
os_relationship_type,
|
1014
1074
|
)
|
1015
1075
|
child_entity._contents = file
|
1016
|
-
child_entity.request["contents_pointer"] =
|
1076
|
+
child_entity.request["contents_pointer"] = (
|
1077
|
+
NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
|
1078
|
+
)
|
1017
1079
|
return child_entity, child_rel
|
1018
1080
|
|
1019
1081
|
def add_tag(self, os_workspace, name, group, order, color):
|
@@ -1026,20 +1088,21 @@ class NifiEntity(object):
|
|
1026
1088
|
)
|
1027
1089
|
|
1028
1090
|
def add_metadata(
|
1029
|
-
self,
|
1030
|
-
json,
|
1031
|
-
merge_method: Callable[[Any, Any], Any],
|
1032
|
-
recurse: Union[bool, int] = False,
|
1091
|
+
self, json, merge_method: Callable[[Any, Any], Any], recurse: Union[bool, int] = False,
|
1033
1092
|
):
|
1034
1093
|
if not self.metadata:
|
1035
1094
|
self.metadata = {}
|
1036
|
-
self.metadata = recursive_update_dict(
|
1095
|
+
self.metadata = recursive_update_dict(
|
1096
|
+
self.metadata, json, merge_method, recurse
|
1097
|
+
)
|
1037
1098
|
|
1038
1099
|
def propagate_metadata(self, to_entity, fields=None, merge_method=lambda _, v2: v2):
|
1039
1100
|
metadata_to_propagate = deepcopy(self.metadata)
|
1040
1101
|
if fields:
|
1041
1102
|
metadata_to_propagate = {k: v for k, v in self.metadata if k in fields}
|
1042
|
-
to_entity.metadata = recursive_update_dict(
|
1103
|
+
to_entity.metadata = recursive_update_dict(
|
1104
|
+
to_entity.metadata, metadata_to_propagate, merge_method
|
1105
|
+
)
|
1043
1106
|
|
1044
1107
|
|
1045
1108
|
def more_recent_than(record_a, record_b):
|
@@ -1098,10 +1161,19 @@ class NifiRoute(Route):
|
|
1098
1161
|
query_params = request.query_params
|
1099
1162
|
processor_suffix = query_params["processor_suffix"]
|
1100
1163
|
body = await request.json()
|
1101
|
-
processor_name =
|
1164
|
+
processor_name = (
|
1165
|
+
"processor."
|
1166
|
+
+ self.processor_name
|
1167
|
+
+ "."
|
1168
|
+
+ op.replace("-", "_")
|
1169
|
+
+ "."
|
1170
|
+
+ processor_suffix
|
1171
|
+
)
|
1102
1172
|
if op not in self.endpoints.keys():
|
1103
1173
|
raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
|
1104
|
-
task_id = await self.celery_executor.send_task(
|
1174
|
+
task_id = await self.celery_executor.send_task(
|
1175
|
+
self.endpoints[op], args=[body, processor_name]
|
1176
|
+
)
|
1105
1177
|
return task_id
|
1106
1178
|
|
1107
1179
|
@staticmethod
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import re
|
2
2
|
import py3langid as langid
|
3
|
-
|
3
|
+
from iso639 import Lang
|
4
4
|
|
5
5
|
|
6
6
|
def detect_language(text, min_confidence=None):
|
@@ -11,5 +11,11 @@ def detect_language(text, min_confidence=None):
|
|
11
11
|
if min_confidence and confidence < min_confidence:
|
12
12
|
return None, confidence
|
13
13
|
detected_lang = re.sub("[^A-Za-z]", "", detected_lang).lower()
|
14
|
-
detected_lang =
|
14
|
+
detected_lang = Lang(detected_lang).name.lower()
|
15
15
|
return detected_lang, confidence
|
16
|
+
|
17
|
+
def to_name(alpha2):
|
18
|
+
return Lang(alpha2).name.lower()
|
19
|
+
|
20
|
+
def to_alpha2(name):
|
21
|
+
return Lang(name).pt1
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from .inheritance import is_child_concept
|
2
|
+
|
3
|
+
|
4
|
+
def get_relationships_between_concepts(source, target, ontology):
|
5
|
+
ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
|
6
|
+
from_rels = ontology["concepts"][source]["relationships"]
|
7
|
+
from_rels = [ontology_rels[r] for r in from_rels]
|
8
|
+
from_rels = [
|
9
|
+
r
|
10
|
+
for r in from_rels
|
11
|
+
if is_child_concept(
|
12
|
+
target,
|
13
|
+
r["target_concept"],
|
14
|
+
{
|
15
|
+
"concepts": ontology["concepts"],
|
16
|
+
"relationships": list(ontology_rels.values()),
|
17
|
+
},
|
18
|
+
)
|
19
|
+
]
|
20
|
+
return from_rels
|
21
|
+
|
22
|
+
|
23
|
+
def invert_relationships(rels, ontology):
|
24
|
+
ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
|
25
|
+
inverses = []
|
26
|
+
for rel in rels:
|
27
|
+
inverses.append(ontology_rels[rel]["inverse_name"])
|
28
|
+
return inverses
|
@@ -1,594 +0,0 @@
|
|
1
|
-
from itertools import groupby, chain
|
2
|
-
import time
|
3
|
-
import uuid
|
4
|
-
from octostar.utils.ontology import multiquery_ontology
|
5
|
-
from octostar.utils.exceptions import StopAsyncIterationWithResult
|
6
|
-
|
7
|
-
from .inheritance import is_child_concept
|
8
|
-
|
9
|
-
REL_FETCHED_FIELDS = [
|
10
|
-
"os_relationship_name",
|
11
|
-
"os_entity_uid_from",
|
12
|
-
"os_entity_uid_to",
|
13
|
-
"os_entity_uid",
|
14
|
-
"os_entity_type_from",
|
15
|
-
"os_entity_type_to",
|
16
|
-
"os_workspace",
|
17
|
-
]
|
18
|
-
TARGETS_FETCHED_FIELDS = ["os_entity_uid", "os_concept", "entity_label", "os_workspace"]
|
19
|
-
|
20
|
-
|
21
|
-
class ExecutionMetrics:
|
22
|
-
def __init__(self):
|
23
|
-
self.n_queries = 0
|
24
|
-
self.n_relationships = 0
|
25
|
-
self.n_target_entities = 0
|
26
|
-
self.exec_time = 0.0
|
27
|
-
self.exec_times = {}
|
28
|
-
self.n_cancelled_queries = 0
|
29
|
-
self.relationship_names = set()
|
30
|
-
self.timeout = 0
|
31
|
-
|
32
|
-
def print_metrics(self):
|
33
|
-
print("Execution Metrics:")
|
34
|
-
print(f" Execution Time: {self.exec_time}")
|
35
|
-
print(f" Per-Type Execution Times: {self.exec_times}")
|
36
|
-
print(f" Number of Queries: {self.n_queries}")
|
37
|
-
print(f" Number of Cancelled Queries: {self.n_cancelled_queries}")
|
38
|
-
print(f" Timeout per Query: {self.timeout}")
|
39
|
-
print(f" Number of Relationships: {self.n_relationships}")
|
40
|
-
print(f" Number of Target Entities: {self.n_target_entities}")
|
41
|
-
print(f" Relationship Names: {self.relationship_names}")
|
42
|
-
|
43
|
-
|
44
|
-
async def _get_stream_result(stream):
|
45
|
-
result = None
|
46
|
-
try:
|
47
|
-
async for _ in stream:
|
48
|
-
pass
|
49
|
-
except StopAsyncIterationWithResult as e:
|
50
|
-
result = e.value
|
51
|
-
return result
|
52
|
-
|
53
|
-
|
54
|
-
def _left_join(left_list, right_list, left_keys, right_keys):
|
55
|
-
left_keys = left_keys if isinstance(left_keys, (list, tuple)) else [left_keys]
|
56
|
-
right_keys = right_keys if isinstance(right_keys, (list, tuple)) else [right_keys]
|
57
|
-
|
58
|
-
def _make_composite_key(item, keys):
|
59
|
-
return tuple(item[key] for key in keys)
|
60
|
-
|
61
|
-
left_list.sort(key=lambda item: _make_composite_key(item, left_keys))
|
62
|
-
right_list.sort(key=lambda item: _make_composite_key(item, right_keys))
|
63
|
-
left_groups = groupby(
|
64
|
-
left_list, key=lambda item: _make_composite_key(item, left_keys)
|
65
|
-
)
|
66
|
-
right_groups = groupby(
|
67
|
-
right_list, key=lambda item: _make_composite_key(item, right_keys)
|
68
|
-
)
|
69
|
-
right_dict = {key: list(group) for key, group in right_groups}
|
70
|
-
result = []
|
71
|
-
for left_key_val, left_items in left_groups:
|
72
|
-
left_items = list(left_items)
|
73
|
-
associated_rights = right_dict.get(left_key_val, [])
|
74
|
-
for left_item in left_items:
|
75
|
-
result.append((left_item, associated_rights))
|
76
|
-
return result
|
77
|
-
|
78
|
-
|
79
|
-
async def expand_otm(
|
80
|
-
expanded_entities,
|
81
|
-
entities_by_concept,
|
82
|
-
ontology_rels,
|
83
|
-
concepts_to_otm_rels,
|
84
|
-
metrics,
|
85
|
-
client,
|
86
|
-
timeout,
|
87
|
-
limit,
|
88
|
-
):
|
89
|
-
start_time = time.time()
|
90
|
-
otm_queries = []
|
91
|
-
target_fields = ",".join(
|
92
|
-
["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
|
93
|
-
)
|
94
|
-
for concept_name, rels in concepts_to_otm_rels.items():
|
95
|
-
for rel_name in rels:
|
96
|
-
rel = ontology_rels[rel_name]
|
97
|
-
source_properties = rel["source_properties"].split(",")
|
98
|
-
source_properties_values = [
|
99
|
-
[entity[p] for p in source_properties]
|
100
|
-
for entity in entities_by_concept[concept_name]
|
101
|
-
]
|
102
|
-
source_properties_values = [
|
103
|
-
values
|
104
|
-
for values in source_properties_values
|
105
|
-
if all(value is not None for value in values)
|
106
|
-
]
|
107
|
-
if source_properties_values:
|
108
|
-
target_prop_names = rel["target_properties"].split(",")
|
109
|
-
target_prop_names = ",".join(
|
110
|
-
["`" + prop + "`" for prop in target_prop_names]
|
111
|
-
)
|
112
|
-
source_properties_values = ",".join(
|
113
|
-
[
|
114
|
-
"(" + ",".join(["'" + value + "'" for value in values]) + ")"
|
115
|
-
for values in source_properties_values
|
116
|
-
]
|
117
|
-
)
|
118
|
-
if target_prop_names and source_properties_values:
|
119
|
-
query = f"""SELECT {target_fields} FROM `etimbr`.`{rel['target_concept']}` WHERE ({target_prop_names}) IN ({source_properties_values}) LIMIT {limit}"""
|
120
|
-
otm_queries.append(
|
121
|
-
{
|
122
|
-
"concept_name": concept_name,
|
123
|
-
"query": query,
|
124
|
-
"relationship_name": rel_name,
|
125
|
-
"source_properties": rel["source_properties"].split(","),
|
126
|
-
"target_properties": rel["target_properties"].split(","),
|
127
|
-
}
|
128
|
-
)
|
129
|
-
otm_queries = {str(i): otm_queries[i] for i in range(len(otm_queries))}
|
130
|
-
otm_stream = multiquery_ontology.streaming(
|
131
|
-
sql_queries={
|
132
|
-
query_id: query_data["query"]
|
133
|
-
for query_id, query_data in otm_queries.items()
|
134
|
-
},
|
135
|
-
client=client,
|
136
|
-
timeout=timeout,
|
137
|
-
)
|
138
|
-
metrics.n_queries += len(otm_queries)
|
139
|
-
all_results = await _get_stream_result(otm_stream)
|
140
|
-
for query_id, data in all_results.items():
|
141
|
-
if data == None:
|
142
|
-
metrics.n_cancelled_queries += 1
|
143
|
-
if not data:
|
144
|
-
continue
|
145
|
-
query_data = otm_queries[query_id]
|
146
|
-
data = _left_join(
|
147
|
-
[
|
148
|
-
e[0]
|
149
|
-
for e in expanded_entities.values()
|
150
|
-
if e[0]["os_concept"] == query_data["concept_name"]
|
151
|
-
],
|
152
|
-
data,
|
153
|
-
query_data["source_properties"],
|
154
|
-
query_data["target_properties"],
|
155
|
-
)
|
156
|
-
data = {
|
157
|
-
e[0]["os_entity_uid"]: (
|
158
|
-
e[0],
|
159
|
-
{
|
160
|
-
r["os_entity_uid"]: (
|
161
|
-
{"os_relationship_name": query_data["relationship_name"]},
|
162
|
-
{r["os_entity_uid"]: r},
|
163
|
-
)
|
164
|
-
for r in e[1]
|
165
|
-
},
|
166
|
-
)
|
167
|
-
for e in data
|
168
|
-
}
|
169
|
-
for entity_id, entities in data.items():
|
170
|
-
if entity_id not in expanded_entities:
|
171
|
-
expanded_entities[entity_id] = [entities[0], {}]
|
172
|
-
for sub_entity_id, sub_entities in entities[1].items():
|
173
|
-
if sub_entity_id not in expanded_entities[entity_id][1]:
|
174
|
-
expanded_entities[entity_id][1][sub_entity_id] = [
|
175
|
-
sub_entities[0],
|
176
|
-
{},
|
177
|
-
]
|
178
|
-
expanded_entities[entity_id][1][sub_entity_id][1].update(
|
179
|
-
sub_entities[1]
|
180
|
-
)
|
181
|
-
metrics.exec_times["otm"] = time.time() - start_time
|
182
|
-
return expanded_entities
|
183
|
-
|
184
|
-
|
185
|
-
async def expand_mtm_mixed(
|
186
|
-
expanded_entities,
|
187
|
-
entities_by_concept,
|
188
|
-
ontology,
|
189
|
-
concepts_to_mixed_rels,
|
190
|
-
ontology_rels,
|
191
|
-
metrics,
|
192
|
-
client,
|
193
|
-
timeout,
|
194
|
-
limit,
|
195
|
-
):
|
196
|
-
start_time = time.time()
|
197
|
-
mixed_queries = []
|
198
|
-
match_patterns = {}
|
199
|
-
rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
|
200
|
-
for concept_name, rels in concepts_to_mixed_rels.items():
|
201
|
-
if concept_name not in match_patterns:
|
202
|
-
match_patterns[concept_name] = []
|
203
|
-
match_patterns[concept_name].extend(
|
204
|
-
[entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
|
205
|
-
)
|
206
|
-
for concept_name, entity_ids in match_patterns.items():
|
207
|
-
rels = concepts_to_mixed_rels[concept_name]
|
208
|
-
entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
|
209
|
-
for rel_name in rels:
|
210
|
-
rel = ontology_rels[rel_name]
|
211
|
-
target_concept = rel["target_concept"]
|
212
|
-
relationship_fields = ",".join(
|
213
|
-
[
|
214
|
-
f"`{rel_name}[{target_concept}]_"
|
215
|
-
+ prop
|
216
|
-
+ "` AS "
|
217
|
-
+ "`rel__"
|
218
|
-
+ prop
|
219
|
-
+ "`"
|
220
|
-
for prop in REL_FETCHED_FIELDS
|
221
|
-
]
|
222
|
-
)
|
223
|
-
target_fields = ",".join(
|
224
|
-
[
|
225
|
-
f"`{rel_name}[{target_concept}]."
|
226
|
-
+ prop
|
227
|
-
+ "` AS "
|
228
|
-
+ "`tgt__"
|
229
|
-
+ prop
|
230
|
-
+ "`"
|
231
|
-
for prop in TARGETS_FETCHED_FIELDS
|
232
|
-
]
|
233
|
-
)
|
234
|
-
all_fields = (
|
235
|
-
f"{relationship_fields}, {target_fields}, `os_entity_uid`".strip(", ")
|
236
|
-
)
|
237
|
-
if entity_ids:
|
238
|
-
mixed_queries.append(
|
239
|
-
{
|
240
|
-
"query": f"SELECT {all_fields} FROM `dtimbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
|
241
|
-
}
|
242
|
-
)
|
243
|
-
mixed_queries = {str(i): mixed_queries[i] for i in range(len(mixed_queries))}
|
244
|
-
metrics.n_queries += len(mixed_queries)
|
245
|
-
local_stream = multiquery_ontology.streaming(
|
246
|
-
sql_queries={
|
247
|
-
query_id: query_data["query"]
|
248
|
-
for query_id, query_data in mixed_queries.items()
|
249
|
-
},
|
250
|
-
client=client,
|
251
|
-
timeout=timeout / 2.0,
|
252
|
-
)
|
253
|
-
all_results = await _get_stream_result(local_stream)
|
254
|
-
for (
|
255
|
-
_,
|
256
|
-
data,
|
257
|
-
) in (
|
258
|
-
all_results.items()
|
259
|
-
): ## TO BE TESTED (data is always empty at the moment due to timbr bug)
|
260
|
-
if data == None:
|
261
|
-
metrics.n_cancelled_queries += 1
|
262
|
-
if not data:
|
263
|
-
continue
|
264
|
-
data = _left_join(
|
265
|
-
[e[0] for e in expanded_entities.values()],
|
266
|
-
data,
|
267
|
-
"os_entity_uid",
|
268
|
-
"os_entity_uid",
|
269
|
-
)
|
270
|
-
for elem in data:
|
271
|
-
elem["#rel__os_entity_uid"] = elem["rel__os_entity_uid"] or (
|
272
|
-
"temp-" + str(uuid.uuid4())
|
273
|
-
)
|
274
|
-
data = {
|
275
|
-
e[0]["os_entity_uid"]: (
|
276
|
-
e[0],
|
277
|
-
{
|
278
|
-
rt["#rel__os_entity_uid"]: (
|
279
|
-
{k[5:]: v for k, v in rt.items() if k.startswith("rel__")},
|
280
|
-
{
|
281
|
-
rt["tgt__os_entity_uid"]: {
|
282
|
-
k[5:]: v for k, v in rt.items() if k.startswith("tgt__")
|
283
|
-
}
|
284
|
-
},
|
285
|
-
)
|
286
|
-
for rt in e[1]
|
287
|
-
},
|
288
|
-
)
|
289
|
-
for e in data
|
290
|
-
}
|
291
|
-
data = {
|
292
|
-
e[0]["os_entity_uid"]: (
|
293
|
-
e[0],
|
294
|
-
{r["os_entity_uid"]: ({}, {r["os_entity_uid"]: r}) for r in e[1]},
|
295
|
-
)
|
296
|
-
for e in data
|
297
|
-
}
|
298
|
-
for entity_id, entities in data.items():
|
299
|
-
if entity_id not in expanded_entities:
|
300
|
-
expanded_entities[entity_id] = [entities[0], {}]
|
301
|
-
for sub_entity_id, sub_entities in entities[1].items():
|
302
|
-
if sub_entity_id not in expanded_entities[entity_id][1]:
|
303
|
-
expanded_entities[entity_id][1][sub_entity_id] = [
|
304
|
-
sub_entities[0],
|
305
|
-
{},
|
306
|
-
]
|
307
|
-
expanded_entities[entity_id][1][sub_entity_id][1].update(
|
308
|
-
sub_entities[1]
|
309
|
-
)
|
310
|
-
metrics.exec_times["mtm_mixed"] = time.time() - start_time
|
311
|
-
return expanded_entities
|
312
|
-
|
313
|
-
|
314
|
-
async def expand_mtm_local(
|
315
|
-
expanded_entities,
|
316
|
-
entities_by_concept,
|
317
|
-
ontology_rels,
|
318
|
-
concepts_to_local_rels,
|
319
|
-
metrics,
|
320
|
-
client,
|
321
|
-
timeout,
|
322
|
-
limit,
|
323
|
-
):
|
324
|
-
start_time = time.time()
|
325
|
-
local_queries = {"from": [], "to": []}
|
326
|
-
match_patterns = {}
|
327
|
-
target_fields = ",".join(
|
328
|
-
["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
|
329
|
-
)
|
330
|
-
rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
|
331
|
-
for concept_name in concepts_to_local_rels.keys():
|
332
|
-
if concept_name not in match_patterns:
|
333
|
-
match_patterns[concept_name] = []
|
334
|
-
match_patterns[concept_name].extend(
|
335
|
-
[entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
|
336
|
-
)
|
337
|
-
for concept_name, entity_ids in match_patterns.items():
|
338
|
-
rel_names = concepts_to_local_rels[concept_name]
|
339
|
-
inverse_names = [
|
340
|
-
ontology_rels[rel_name]["inverse_name"] for rel_name in rel_names
|
341
|
-
]
|
342
|
-
rel_names = ",".join(["'" + rel_name + "'" for rel_name in rel_names])
|
343
|
-
inverse_names = ",".join(["'" + rel_name + "'" for rel_name in inverse_names])
|
344
|
-
entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
|
345
|
-
if rel_names and inverse_names and entity_ids:
|
346
|
-
local_queries["from"].append(
|
347
|
-
{
|
348
|
-
"query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({rel_names}) AND `os_entity_uid_from` IN ({entity_ids}) LIMIT {limit}"
|
349
|
-
}
|
350
|
-
)
|
351
|
-
local_queries["to"].append(
|
352
|
-
{
|
353
|
-
"query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({inverse_names}) AND `os_entity_uid_to` IN ({entity_ids}) LIMIT {limit}"
|
354
|
-
}
|
355
|
-
)
|
356
|
-
local_queries = {
|
357
|
-
**{
|
358
|
-
"from_" + str(i): local_queries["from"][i]
|
359
|
-
for i in range(len(local_queries["from"]))
|
360
|
-
},
|
361
|
-
**{
|
362
|
-
"to_" + str(i): local_queries["to"][i]
|
363
|
-
for i in range(len(local_queries["to"]))
|
364
|
-
},
|
365
|
-
}
|
366
|
-
metrics.n_queries += len(local_queries)
|
367
|
-
local_stream = multiquery_ontology.streaming(
|
368
|
-
sql_queries={
|
369
|
-
query_id: query_data["query"]
|
370
|
-
for query_id, query_data in local_queries.items()
|
371
|
-
},
|
372
|
-
client=client,
|
373
|
-
timeout=timeout / 2.0,
|
374
|
-
)
|
375
|
-
all_results = await _get_stream_result(local_stream)
|
376
|
-
middle_entities = []
|
377
|
-
for query_id, data in all_results.items():
|
378
|
-
if data == None:
|
379
|
-
metrics.n_cancelled_queries += 1
|
380
|
-
if not data:
|
381
|
-
continue
|
382
|
-
query_id = query_id.split("_")
|
383
|
-
data = _left_join(
|
384
|
-
[e[0] for e in expanded_entities.values()],
|
385
|
-
data,
|
386
|
-
"os_entity_uid",
|
387
|
-
"os_entity_uid_" + query_id[0],
|
388
|
-
)
|
389
|
-
inverse_direction = "to" if query_id[0] == "from" else "from"
|
390
|
-
data = {
|
391
|
-
e[0]["os_entity_uid"]: (e[0], {r["os_entity_uid"]: (r, {}) for r in e[1]})
|
392
|
-
for e in data
|
393
|
-
}
|
394
|
-
middle_entities.extend(
|
395
|
-
[(inverse_direction, r) for e in data.values() for r in e[1].values()]
|
396
|
-
)
|
397
|
-
for entity_id, entities in data.items():
|
398
|
-
if entity_id not in expanded_entities:
|
399
|
-
expanded_entities[entity_id] = [entities[0], {}]
|
400
|
-
for sub_entity_id, sub_entities in entities[1].items():
|
401
|
-
if sub_entity_id not in expanded_entities[entity_id][1]:
|
402
|
-
expanded_entities[entity_id][1][sub_entity_id] = [
|
403
|
-
sub_entities[0],
|
404
|
-
{},
|
405
|
-
]
|
406
|
-
expanded_entities[entity_id][1][sub_entity_id][1].update(
|
407
|
-
sub_entities[1]
|
408
|
-
)
|
409
|
-
middle_entities = [
|
410
|
-
{
|
411
|
-
"entity_id": r[1][0]["os_entity_uid_" + r[0]],
|
412
|
-
"concept_name": r[1][0]["os_entity_type_" + r[0]],
|
413
|
-
"direction": r[0],
|
414
|
-
"relationship": r[1],
|
415
|
-
}
|
416
|
-
for r in middle_entities
|
417
|
-
]
|
418
|
-
middle_entities = sorted(middle_entities, key=lambda x: x["concept_name"])
|
419
|
-
middle_entities = groupby(middle_entities, key=lambda x: x["concept_name"])
|
420
|
-
middle_entities = {e[0]: list(e[1]) for e in middle_entities}
|
421
|
-
local_queries_2 = []
|
422
|
-
for concept_name, entities in middle_entities.items():
|
423
|
-
entity_ids = ",".join(["'" + e["entity_id"] + "'" for e in entities])
|
424
|
-
if entity_ids:
|
425
|
-
local_queries_2.append(
|
426
|
-
{
|
427
|
-
"query": f"SELECT {target_fields} FROM `timbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
|
428
|
-
}
|
429
|
-
)
|
430
|
-
local_queries_2 = {str(i): local_queries_2[i] for i in range(len(local_queries_2))}
|
431
|
-
metrics.n_queries += len(local_queries_2)
|
432
|
-
local_stream_2 = multiquery_ontology.streaming(
|
433
|
-
sql_queries={
|
434
|
-
query_id: query_data["query"]
|
435
|
-
for query_id, query_data in local_queries_2.items()
|
436
|
-
},
|
437
|
-
client=client,
|
438
|
-
timeout=timeout / 2.0,
|
439
|
-
)
|
440
|
-
all_results = await _get_stream_result(local_stream_2)
|
441
|
-
middle_entities = list(chain(*middle_entities.values()))
|
442
|
-
for query_id, data in all_results.items():
|
443
|
-
if data == None:
|
444
|
-
metrics.n_cancelled_queries += 1
|
445
|
-
if not data:
|
446
|
-
continue
|
447
|
-
data = _left_join(middle_entities, data, "entity_id", "os_entity_uid")
|
448
|
-
for entry in data:
|
449
|
-
relationship_targets = entry[0]["relationship"]
|
450
|
-
for target_entity in entry[1]:
|
451
|
-
relationship_targets[1][target_entity["os_entity_uid"]] = target_entity
|
452
|
-
if data:
|
453
|
-
for rel in data:
|
454
|
-
targets = rel[0]["relationship"][1]
|
455
|
-
rel_uid = rel[0]["relationship"][0]["os_entity_uid"]
|
456
|
-
for entity in expanded_entities.values():
|
457
|
-
entity_rels = entity[1]
|
458
|
-
if rel_uid in entity_rels:
|
459
|
-
entity_rels[rel_uid][1].update(targets)
|
460
|
-
metrics.exec_times["mtm_local"] = time.time() - start_time
|
461
|
-
return expanded_entities
|
462
|
-
|
463
|
-
|
464
|
-
async def expand_entities(
|
465
|
-
entities,
|
466
|
-
ontology,
|
467
|
-
relationship_mappings_info,
|
468
|
-
client,
|
469
|
-
relationship_names_by_entity_type=None,
|
470
|
-
batch_size=10000,
|
471
|
-
avg_limit_per_entity=20,
|
472
|
-
timeout=10.0,
|
473
|
-
):
|
474
|
-
metrics = ExecutionMetrics()
|
475
|
-
metrics.timeout = timeout
|
476
|
-
start_time = time.time()
|
477
|
-
ordered_entities = sorted(entities, key=lambda x: x["entity_type"])
|
478
|
-
ordered_entities = list({e["os_entity_uid"]: e for e in entities}.values())
|
479
|
-
entity_batches = [
|
480
|
-
ordered_entities[i : i + batch_size]
|
481
|
-
for i in range(0, len(ordered_entities), batch_size)
|
482
|
-
]
|
483
|
-
expanded_entities = {}
|
484
|
-
for batch in entity_batches:
|
485
|
-
new_entities = await expand_entities_batch(
|
486
|
-
batch,
|
487
|
-
ontology,
|
488
|
-
relationship_mappings_info,
|
489
|
-
relationship_names_by_entity_type,
|
490
|
-
metrics,
|
491
|
-
client,
|
492
|
-
avg_limit_per_entity,
|
493
|
-
timeout,
|
494
|
-
)
|
495
|
-
expanded_entities.update(new_entities)
|
496
|
-
metrics.exec_time = time.time() - start_time
|
497
|
-
for _, entity_data in expanded_entities.items():
|
498
|
-
for rel in entity_data[1].values():
|
499
|
-
if rel[0]:
|
500
|
-
metrics.n_relationships += 1
|
501
|
-
metrics.relationship_names.add(rel[0]["os_relationship_name"])
|
502
|
-
metrics.n_target_entities += len(rel[1])
|
503
|
-
metrics.relationship_names = list(metrics.relationship_names)
|
504
|
-
expanded_entities = [
|
505
|
-
(e[0], [(r[0], list(r[1].values())[0]) for r in e[1].values()])
|
506
|
-
for e in expanded_entities.values()
|
507
|
-
]
|
508
|
-
return expanded_entities, metrics
|
509
|
-
|
510
|
-
|
511
|
-
async def expand_entities_batch(
|
512
|
-
entities,
|
513
|
-
ontology,
|
514
|
-
relationship_mappings,
|
515
|
-
relationship_names_by_entity_type,
|
516
|
-
metrics,
|
517
|
-
client,
|
518
|
-
avg_limit_per_entity=20,
|
519
|
-
timeout=10.0,
|
520
|
-
):
|
521
|
-
limit = avg_limit_per_entity * len(entities)
|
522
|
-
expanded_entities = {e["os_entity_uid"]: [e, {}] for e in entities}
|
523
|
-
entities_by_concept = sorted(entities, key=lambda x: x["entity_type"])
|
524
|
-
entities_by_concept = groupby(entities_by_concept, key=lambda x: x["entity_type"])
|
525
|
-
entities_by_concept = {e[0]: list(e[1]) for e in entities_by_concept}
|
526
|
-
ontology_rels = {r["relationship_name"]: r for r in ontology["relationships"]}
|
527
|
-
concepts_to_rels = {
|
528
|
-
cn: c["relationships"] for cn, c in ontology["concepts"].items()
|
529
|
-
}
|
530
|
-
concepts_to_mixed_rels = {}
|
531
|
-
concepts_to_otm_rels = {}
|
532
|
-
concepts_to_local_rels = {}
|
533
|
-
for concept_name in entities_by_concept.keys():
|
534
|
-
relationship_names = None
|
535
|
-
if relationship_names_by_entity_type:
|
536
|
-
relationship_names = set()
|
537
|
-
for cn_name in relationship_names_by_entity_type.keys():
|
538
|
-
if is_child_concept(concept_name, cn_name, ontology):
|
539
|
-
relationship_names = relationship_names.union(
|
540
|
-
set(relationship_names_by_entity_type[cn_name])
|
541
|
-
)
|
542
|
-
rels = concepts_to_rels[concept_name]
|
543
|
-
if relationship_names:
|
544
|
-
rels = [r for r in rels if r in relationship_names]
|
545
|
-
filtered_mtm_rels = []
|
546
|
-
filtered_otm_rels = []
|
547
|
-
filtered_local_mtm_rels = []
|
548
|
-
for rel in rels:
|
549
|
-
if not ontology_rels[rel]["is_mtm"]:
|
550
|
-
filtered_otm_rels.append(rel)
|
551
|
-
elif rel in relationship_mappings["unmapped"]:
|
552
|
-
continue
|
553
|
-
elif rel in relationship_mappings["local_only"]:
|
554
|
-
filtered_local_mtm_rels.append(rel)
|
555
|
-
else:
|
556
|
-
filtered_mtm_rels.append(rel)
|
557
|
-
concepts_to_mixed_rels[concept_name] = filtered_mtm_rels
|
558
|
-
concepts_to_otm_rels[concept_name] = filtered_otm_rels
|
559
|
-
concepts_to_local_rels[concept_name] = filtered_local_mtm_rels
|
560
|
-
# OTM QUERIES: fetch target entities directly from the target tables with an IN statement
|
561
|
-
expanded_entities = await expand_otm(
|
562
|
-
expanded_entities,
|
563
|
-
entities_by_concept,
|
564
|
-
ontology_rels,
|
565
|
-
concepts_to_otm_rels,
|
566
|
-
metrics,
|
567
|
-
client,
|
568
|
-
timeout,
|
569
|
-
limit,
|
570
|
-
)
|
571
|
-
# LOCAL MTM QUERIES: fetch relationships directly from the os_workspace_relationship table, then fetch target entities
|
572
|
-
expanded_entities = await expand_mtm_local(
|
573
|
-
expanded_entities,
|
574
|
-
entities_by_concept,
|
575
|
-
ontology_rels,
|
576
|
-
concepts_to_local_rels,
|
577
|
-
metrics,
|
578
|
-
client,
|
579
|
-
timeout,
|
580
|
-
limit,
|
581
|
-
)
|
582
|
-
# MIXED MTM QUERIES: query via timbr to make the most of JOINs
|
583
|
-
expanded_entities = await expand_mtm_mixed(
|
584
|
-
expanded_entities,
|
585
|
-
entities_by_concept,
|
586
|
-
ontology,
|
587
|
-
concepts_to_mixed_rels,
|
588
|
-
ontology_rels,
|
589
|
-
metrics,
|
590
|
-
client,
|
591
|
-
timeout,
|
592
|
-
limit,
|
593
|
-
)
|
594
|
-
return expanded_entities
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|