streamlit-octostar-utils 0.2.12a1__tar.gz → 0.2.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/nifi.py +147 -75
  4. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/nlp/language.py +8 -2
  5. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/ontology/inheritance.py +1 -1
  6. streamlit_octostar_utils-0.2.13/streamlit_octostar_utils/ontology/relationships.py +28 -0
  7. streamlit_octostar_utils-0.2.12a1/streamlit_octostar_utils/ontology/expand_entities.py +0 -594
  8. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/LICENSE +0 -0
  9. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/README.md +0 -0
  10. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/__init__.py +0 -0
  11. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  12. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  13. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  14. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  15. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  16. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  17. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  18. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  19. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  20. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  21. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  22. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  23. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  24. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/__init__.py +0 -0
  25. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/dict.py +0 -0
  26. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/filetypes.py +0 -0
  27. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  28. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  29. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/core/timestamp.py +0 -0
  30. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  31. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/nlp/ner.py +0 -0
  32. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  33. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/client.py +0 -0
  34. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/context.py +0 -0
  35. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  36. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  37. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/ontology/validation.py +0 -0
  38. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/style/__init__.py +0 -0
  39. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/style/common.py +0 -0
  40. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  42. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  43. {streamlit_octostar_utils-0.2.12a1 → streamlit_octostar_utils-0.2.13}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.2.12a1
3
+ Version: 0.2.13
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.2.12a1"
8
+ version = "0.2.13"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -163,12 +163,16 @@ class NifiEntityProxy(object):
163
163
  if child_entity.uid == uid_to_search:
164
164
  found_entity = child_entity
165
165
  else:
166
- found_entity = _recursive_search_expanded_proxy(child_entity._proxy, uid_to_search)
166
+ found_entity = _recursive_search_expanded_proxy(
167
+ child_entity._proxy, uid_to_search
168
+ )
167
169
  if found_entity:
168
170
  return found_entity
169
171
 
170
172
  if not self._proxy:
171
- main_entities = itertools.chain(*[b.entities for b in self.context.in_batches])
173
+ main_entities = itertools.chain(
174
+ *[b.entities for b in self.context.in_batches]
175
+ )
172
176
  main_entities = {e.record["entity_id"]: e for e in main_entities}
173
177
  if main_entities.get(self.uid):
174
178
  self._proxy = main_entities.get(self.uid)
@@ -179,7 +183,9 @@ class NifiEntityProxy(object):
179
183
  self._proxy = found_entity._proxy
180
184
  return self._proxy
181
185
  ## TODO: Try to get the entity from the database with query_ontology()
182
- raise AttributeError(f"Cannot find children with UUID {self.uid}! It may exist in the database?")
186
+ raise AttributeError(
187
+ f"Cannot find children with UUID {self.uid}! It may exist in the database?"
188
+ )
183
189
 
184
190
  def __getattr__(self, name):
185
191
  if name in self.__dict__:
@@ -213,24 +219,28 @@ class NifiFragmenter(object):
213
219
  raise ValueError("Must have at least 2 entities for fragmentation")
214
220
  identifier = str(uuid.uuid4())
215
221
  for i, entity in enumerate(fragments):
216
- travel_dict(entity.request["nifi_attributes"], fragmenter_keylist.split("."), "w")(
217
- {"identifier": identifier, "count": count, "index": i}
218
- )
222
+ travel_dict(
223
+ entity.request["nifi_attributes"], fragmenter_keylist.split("."), "w"
224
+ )({"identifier": identifier, "count": count, "index": i})
219
225
  if "fragment" not in entity.request["config"]:
220
226
  entity.request["config"]["fragment"] = {}
221
227
  if "fragments_stack" not in entity.request["config"]["fragment"]:
222
228
  entity.request["config"]["fragment"]["fragments_stack"] = []
223
- entity.request["config"]["fragment"]["fragments_stack"].insert(0, fragmenter_keylist)
224
- entity.request["nifi_attributes"]["fragments_stack"] = entity.request["config"]["fragment"][
225
- "fragments_stack"
226
- ]
227
- travel_dict(entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w")(
228
- {"identifier": identifier, "count": count, "index": i}
229
+ entity.request["config"]["fragment"]["fragments_stack"].insert(
230
+ 0, fragmenter_keylist
229
231
  )
232
+ entity.request["nifi_attributes"]["fragments_stack"] = entity.request[
233
+ "config"
234
+ ]["fragment"]["fragments_stack"]
235
+ travel_dict(
236
+ entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w"
237
+ )({"identifier": identifier, "count": count, "index": i})
230
238
 
231
239
  def push_defragment_strategy(fragment, defragmenter_config):
232
240
  pointer = fragment.request["config"]
233
- last_fragmenter_keylist = fragment.request["config"]["fragment"]["fragments_stack"][0]
241
+ last_fragmenter_keylist = fragment.request["config"]["fragment"][
242
+ "fragments_stack"
243
+ ][0]
234
244
  for k in ("fragment." + last_fragmenter_keylist).split("."):
235
245
  if not pointer.get(k):
236
246
  pointer[k] = {}
@@ -249,7 +259,7 @@ class NifiEntityBatch(object):
249
259
 
250
260
  class NifiContextManager(object):
251
261
  HEADLESS_PROCESSOR_NAME = "headless"
252
-
262
+
253
263
  class SyncFlag(Enum):
254
264
  UPSERT_ENTITY_ALL = 0 # bool
255
265
  UPSERT_ENTITY_SPECIFIC_FIELDS = 1 # 'fields': list of record fields
@@ -270,7 +280,9 @@ class NifiContextManager(object):
270
280
  @property
271
281
  def ontology(self):
272
282
  if not self._ontology:
273
- self._ontology = fetch_ontology_data.sync(ontology_name=self.ontology_name, client=self.client)
283
+ self._ontology = fetch_ontology_data.sync(
284
+ ontology_name=self.ontology_name, client=self.client
285
+ )
274
286
  return self._ontology
275
287
 
276
288
  def _config_get(entity, keylist):
@@ -339,7 +351,9 @@ class NifiContextManager(object):
339
351
  return self
340
352
 
341
353
  def get_workspaces_permissions(self, workspace_ids):
342
- permissions_to_fetch = list(set(workspace_ids).difference(set(list(self.permissions.keys()))))
354
+ permissions_to_fetch = list(
355
+ set(workspace_ids).difference(set(list(self.permissions.keys())))
356
+ )
343
357
  if permissions_to_fetch:
344
358
  permissions = get_permissions.sync(permissions_to_fetch, client=self.client)
345
359
  self.permissions.update(permissions)
@@ -369,13 +383,18 @@ class NifiContextManager(object):
369
383
  entities.append(entity)
370
384
  for child_entity in entity.children_entities:
371
385
  if not child_entity.drop_on_output:
372
- if child_entity.output_as_independent or child_entity.output_as_child:
386
+ if (
387
+ child_entity.output_as_independent
388
+ or child_entity.output_as_child
389
+ ):
373
390
  if processor_name != NifiContextManager.HEADLESS_PROCESSOR_NAME:
374
391
  child_entity.request["last_processor_name"] = processor_name
375
392
  if child_entity.output_as_independent:
376
393
  if not child_entity._proxy:
377
394
  child_entity.fetch_proxy()
378
- entities.extend(_process_entity(child_entity._proxy, processor_name))
395
+ entities.extend(
396
+ _process_entity(child_entity._proxy, processor_name)
397
+ )
379
398
  return entities
380
399
 
381
400
  entities = itertools.chain(*[b.entities for b in entity_batches])
@@ -386,7 +405,9 @@ class NifiContextManager(object):
386
405
  all_entities,
387
406
  key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
388
407
  )
389
- self.out_entities = list({e.record["entity_id"]: e for e in all_entities}.values())
408
+ self.out_entities = list(
409
+ {e.record["entity_id"]: e for e in all_entities}.values()
410
+ )
390
411
  self.sync_entities()
391
412
  return [entity for entity in self.jsonify(self.out_entities)["content"]]
392
413
 
@@ -394,19 +415,23 @@ class NifiContextManager(object):
394
415
  error_response = DefaultErrorRoute.format_error(exc)
395
416
  entity.request["exception"]["code"] = error_response.status_code
396
417
  entity.request["exception"]["body"] = json.loads(error_response.body)["message"]
397
- travel_dict(entity.request["nifi_attributes"], ["invokehttp", "response", "body"], "w")(
398
- entity.request["exception"]["body"]
399
- )
400
- travel_dict(entity.request["nifi_attributes"], ["invokehttp", "response", "code"], "w")(
401
- entity.request["exception"]["code"]
402
- )
418
+ travel_dict(
419
+ entity.request["nifi_attributes"], ["invokehttp", "response", "body"], "w"
420
+ )(entity.request["exception"]["body"])
421
+ travel_dict(
422
+ entity.request["nifi_attributes"], ["invokehttp", "response", "code"], "w"
423
+ )(entity.request["exception"]["code"])
403
424
  entity.request["nifi_attributes"]["raised_exc"] = True
404
425
 
405
426
  def sync_entities(self):
406
427
  if not self.lazy_sync:
407
428
  entities = self.out_entities
408
429
  else:
409
- entities = [e for e in self.out_entities if e.record["entity_id"] in self.nonlazy_sync_ids]
430
+ entities = [
431
+ e
432
+ for e in self.out_entities
433
+ if e.record["entity_id"] in self.nonlazy_sync_ids
434
+ ]
410
435
  if not entities:
411
436
  return
412
437
  reserved_fields = [
@@ -436,16 +461,18 @@ class NifiContextManager(object):
436
461
  for entity in entities:
437
462
  if entity.sync_params.get(NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS):
438
463
  concept_name = entity.record["entity_type"]
439
- rels_to_fetch = entity.sync_params.get(NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS, [])
464
+ rels_to_fetch = entity.sync_params.get(
465
+ NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS, []
466
+ )
440
467
  for rel in rels_to_fetch:
441
468
  if rel not in fetch_relationships_entities:
442
469
  fetch_relationships_entities[rel] = []
443
470
  fetch_relationships_entities[rel].append(entity)
444
471
  if concept_name not in fetch_concept_relationships:
445
472
  fetch_concept_relationships[concept_name] = set()
446
- fetch_concept_relationships[concept_name] = fetch_concept_relationships[concept_name].union(
447
- set(rels_to_fetch)
448
- )
473
+ fetch_concept_relationships[concept_name] = fetch_concept_relationships[
474
+ concept_name
475
+ ].union(set(rels_to_fetch))
449
476
  for k in fetch_concept_relationships.keys():
450
477
  fetch_concept_relationships[k] = list(fetch_concept_relationships[k])
451
478
  # UPSERT ENTITIES
@@ -491,7 +518,7 @@ class NifiContextManager(object):
491
518
  file.request["is_temporary"] = False
492
519
  file.request["entity_timestamp"] = file.record["os_last_updated_at"]
493
520
  # FETCH RELATIONSHIPS
494
- """
521
+ '''
495
522
  if fetch_relationships_entities:
496
523
  relationship_mappings_info = relationship_mappings.sync_detailed(
497
524
  client=self.client
@@ -547,7 +574,7 @@ class NifiContextManager(object):
547
574
  child_rel.request["entity_timestamp"] = rel.get(
548
575
  "os_last_updated_at"
549
576
  )
550
- """
577
+ '''
551
578
  # CLEAN SYNC PARAMS
552
579
  for entity in entities:
553
580
  entity.sync_params = {}
@@ -556,10 +583,15 @@ class NifiContextManager(object):
556
583
  for entity in entities:
557
584
  fields = set()
558
585
 
559
- if entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_ALL) or entity.request["is_temporary"]:
586
+ if (
587
+ entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_ALL)
588
+ or entity.request["is_temporary"]
589
+ ):
560
590
  fields = fields.union(set(list(entity.record.keys())))
561
591
 
562
- if entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_SPECIFIC_FIELDS):
592
+ if entity.sync_params.get(
593
+ NifiContextManager.SyncFlag.UPSERT_ENTITY_SPECIFIC_FIELDS
594
+ ):
563
595
  fields = fields.union(
564
596
  set(
565
597
  entity.sync_params.get(
@@ -570,7 +602,9 @@ class NifiContextManager(object):
570
602
  )
571
603
  )
572
604
  if fields:
573
- entities_to_upsert.append((entity, [f for f in list(fields) if f not in reserved_fields]))
605
+ entities_to_upsert.append(
606
+ (entity, [f for f in list(fields) if f not in reserved_fields])
607
+ )
574
608
 
575
609
  def __exit__(self, exc_type, exc_val, exc_tb):
576
610
  if exc_val is not None:
@@ -584,7 +618,9 @@ class NifiContextManager(object):
584
618
  for entity in entities:
585
619
  if isinstance(entity, NifiEntityProxy):
586
620
  children.extend(entity.children_entities)
587
- children.extend(_recursive_collect_proxies(entity.children_entities))
621
+ children.extend(
622
+ _recursive_collect_proxies(entity.children_entities)
623
+ )
588
624
  return children
589
625
 
590
626
  all_proxies = _recursive_collect_proxies(entities)
@@ -597,21 +633,15 @@ class NifiContextManager(object):
597
633
 
598
634
 
599
635
  class NifiEntity(object):
600
- def __init__(self, context, request, record, annotations, all_independent_uids, children=[], contents=None):
636
+ def __init__(
637
+ self, context, request, record, annotations, all_independent_uids, children=[], contents=None
638
+ ):
601
639
  self.context = context
602
640
  self.request = request
603
641
  self.record = record
604
642
  self.annotations = annotations
605
- assert (
606
- self.record.get("os_entity_uid")
607
- and self.record.get("entity_id")
608
- and self.record["os_entity_uid"] == self.record["entity_id"]
609
- )
610
- assert (
611
- self.record.get("os_concept")
612
- and self.record.get("entity_type")
613
- and self.record["os_concept"] == self.record["entity_type"]
614
- )
643
+ assert self.record.get("os_entity_uid") and self.record.get("entity_id") and self.record["os_entity_uid"] == self.record["entity_id"]
644
+ assert self.record.get("os_concept") and self.record.get("entity_type") and self.record["os_concept"] == self.record["entity_type"]
615
645
  if "entity_label" not in self.record:
616
646
  self.record["entity_label"] = self.label
617
647
  children = [c for c in children if isinstance(c, (str, dict))]
@@ -631,7 +661,9 @@ class NifiEntity(object):
631
661
  child_types = [c["entity_type"] for c in proxy_entity_children] + [
632
662
  c["record"]["entity_type"] for c in full_entity_children
633
663
  ]
634
- output_as_child = [False] * len(proxy_entity_children) + [True] * len(full_entity_children)
664
+ output_as_child = [False] * len(proxy_entity_children) + [True] * len(
665
+ full_entity_children
666
+ )
635
667
  output_as_independent = [uid in all_independent_uids for uid in child_uids]
636
668
  full_entity_children = [
637
669
  NifiEntity(
@@ -645,7 +677,9 @@ class NifiEntity(object):
645
677
  )
646
678
  for c in full_entity_children
647
679
  ]
648
- proxy_otm_children = [NifiOTMRelationshipProxy(**otm_child) for otm_child in proxy_otm_children]
680
+ proxy_otm_children = [
681
+ NifiOTMRelationshipProxy(**otm_child) for otm_child in proxy_otm_children
682
+ ]
649
683
  child_proxies = [None] * len(proxy_entity_children) + full_entity_children
650
684
  self.children = [
651
685
  NifiEntityProxy(
@@ -673,18 +707,21 @@ class NifiEntity(object):
673
707
 
674
708
  @property
675
709
  def sync_params(self):
676
- return {NifiContextManager.SyncFlag[k]: v for k, v in (self.request.get("sync_params") or {}).items()}
710
+ return {
711
+ NifiContextManager.SyncFlag[k]: v
712
+ for k, v in (self.request.get("sync_params") or {}).items()
713
+ }
677
714
 
678
715
  @sync_params.setter
679
716
  def sync_params(self, new_params):
680
717
  self.request["sync_params"] = {
681
- (k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v for k, v in new_params.items()
718
+ (k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v
719
+ for k, v in new_params.items()
682
720
  }
683
721
 
684
722
  @property
685
723
  def metadata(self):
686
724
  return self.annotations
687
-
688
725
  @metadata.setter
689
726
  def metadata(self, new_metadata):
690
727
  self.annotations = new_metadata
@@ -707,7 +744,9 @@ class NifiEntity(object):
707
744
  contents_pointer = deepcopy(self.request["contents_pointer"])
708
745
  ptr_location = contents_pointer.get("location")
709
746
  if ptr_location == "attachment" and not contents_pointer.get("pointer"):
710
- contents_pointer["pointer"] = f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
747
+ contents_pointer["pointer"] = (
748
+ f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
749
+ )
711
750
  return self.request["contents_pointer"]
712
751
 
713
752
  @contents_pointer.setter
@@ -727,7 +766,9 @@ class NifiEntity(object):
727
766
  return list(
728
767
  filter(
729
768
  lambda x: isinstance(x, NifiOTMRelationshipProxy)
730
- or is_child_concept_fn(x.entity_type, RELATIONSHIP_ENTITY_NAME, self.context.ontology),
769
+ or is_child_concept_fn(
770
+ x.entity_type, RELATIONSHIP_ENTITY_NAME, self.context.ontology
771
+ ),
731
772
  self.children,
732
773
  )
733
774
  )
@@ -746,12 +787,18 @@ class NifiEntity(object):
746
787
  )
747
788
  if (
748
789
  self.record.get("os_workspace")
749
- and (permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE) >= PermissionLevel.WRITE
790
+ and (
791
+ permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE
792
+ )
793
+ >= PermissionLevel.WRITE
750
794
  ):
751
795
  return self.record["os_workspace"]
752
796
  elif (
753
797
  self.request.get("fallback_os_workspace")
754
- and (permissions.get(self.request.get("fallback_os_workspace")) or PermissionLevel.NONE)
798
+ and (
799
+ permissions.get(self.request.get("fallback_os_workspace"))
800
+ or PermissionLevel.NONE
801
+ )
755
802
  >= PermissionLevel.WRITE
756
803
  ):
757
804
  return self.request["fallback_os_workspace"]
@@ -760,7 +807,7 @@ class NifiEntity(object):
760
807
 
761
808
  @property
762
809
  def label(self):
763
- label_keys = self.request["ontology"]["label_keys"]
810
+ label_keys = self.request["ontology_info"]["label_keys"]
764
811
  label = " ".join([(self.record.get(field) or "") for field in label_keys]).strip()
765
812
  if not label:
766
813
  label = None
@@ -785,10 +832,14 @@ class NifiEntity(object):
785
832
  entity_type = None
786
833
  if isinstance(self, NifiEntityProxy):
787
834
  entity_type = self.entity_type
788
- return entity_type == type or is_child_concept_fn(entity_type, type, self.context.ontology)
835
+ return entity_type == type or is_child_concept_fn(
836
+ entity_type, type, self.context.ontology
837
+ )
789
838
  else:
790
839
  entity_type = self.record["entity_type"]
791
- return entity_type == type or type in self.request["ontology_info"]["parents"]
840
+ return (
841
+ entity_type == type or type in self.request["ontology_info"]["parents"]
842
+ )
792
843
 
793
844
  def is_fragmented(self) -> bool:
794
845
  return bool(self.request["config"].get("fragment", {}).get("fragments_stack"))
@@ -804,7 +855,7 @@ class NifiEntity(object):
804
855
  if _is_sub_fragment_recursive(value):
805
856
  return True
806
857
  return False
807
-
858
+
808
859
  if not self.is_fragmented():
809
860
  return True
810
861
  fragment = entity.request.get("config", {}).get("fragment", {})
@@ -831,7 +882,10 @@ class NifiEntity(object):
831
882
  else:
832
883
  proxy_entity_children.append(child)
833
884
  proxy_entity_children = list({c.uid: c for c in proxy_entity_children}.values())
834
- proxy_entity_children = [{"entity_id": c.uid, "entity_type": c.entity_type} for c in proxy_entity_children]
885
+ proxy_entity_children = [
886
+ {"entity_id": c.uid, "entity_type": c.entity_type}
887
+ for c in proxy_entity_children
888
+ ]
835
889
  proxy_otm_children = list(
836
890
  {
837
891
  c.record["os_entity_uid_from"]
@@ -847,7 +901,9 @@ class NifiEntity(object):
847
901
  full_entity_children,
848
902
  key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
849
903
  )
850
- full_entity_children = list({c.uid: c.to_json() for c in full_entity_children}.values())
904
+ full_entity_children = list(
905
+ {c.uid: c.to_json() for c in full_entity_children}.values()
906
+ )
851
907
  children = full_entity_children + proxy_entity_children + proxy_otm_children
852
908
  return {
853
909
  "request": self.request,
@@ -862,9 +918,9 @@ class NifiEntity(object):
862
918
  random_id = str(uuid.uuid4())
863
919
  username = self.jwt_data["username"]
864
920
  if entity_type == self.record["entity_type"]:
865
- ont_parents = self.request["ontology"]["parents"]
866
- ont_relationships = self.request["ontology"]["relationships"]
867
- ont_label_keys = self.request["ontology"]["label_keys"]
921
+ ont_parents = self.request["ontology_info"]["parents"]
922
+ ont_relationships = self.request["ontology_info"]["relationships"]
923
+ ont_label_keys = self.request["ontology_info"]["label_keys"]
868
924
  else:
869
925
  ont_parents = self.context.ontology["concepts"][entity_type]["parents"]
870
926
  ont_relationships = self.context.ontology["concepts"][entity_type]["relationships"]
@@ -931,7 +987,11 @@ class NifiEntity(object):
931
987
  ):
932
988
  return self._add_entity(
933
989
  os_relationship_workspace,
934
- (LOCAL_RELATIONSHIP_ENTITY_NAME if os_relationship_workspace else RELATIONSHIP_ENTITY_NAME),
990
+ (
991
+ LOCAL_RELATIONSHIP_ENTITY_NAME
992
+ if os_relationship_workspace
993
+ else RELATIONSHIP_ENTITY_NAME
994
+ ),
935
995
  {
936
996
  **relationship_fields,
937
997
  "os_entity_uid_from": os_entity_uid_from,
@@ -1013,7 +1073,9 @@ class NifiEntity(object):
1013
1073
  os_relationship_type,
1014
1074
  )
1015
1075
  child_entity._contents = file
1016
- child_entity.request["contents_pointer"] = NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
1076
+ child_entity.request["contents_pointer"] = (
1077
+ NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
1078
+ )
1017
1079
  return child_entity, child_rel
1018
1080
 
1019
1081
  def add_tag(self, os_workspace, name, group, order, color):
@@ -1026,20 +1088,21 @@ class NifiEntity(object):
1026
1088
  )
1027
1089
 
1028
1090
  def add_metadata(
1029
- self,
1030
- json,
1031
- merge_method: Callable[[Any, Any], Any],
1032
- recurse: Union[bool, int] = False,
1091
+ self, json, merge_method: Callable[[Any, Any], Any], recurse: Union[bool, int] = False,
1033
1092
  ):
1034
1093
  if not self.metadata:
1035
1094
  self.metadata = {}
1036
- self.metadata = recursive_update_dict(self.metadata, json, merge_method, recurse)
1095
+ self.metadata = recursive_update_dict(
1096
+ self.metadata, json, merge_method, recurse
1097
+ )
1037
1098
 
1038
1099
  def propagate_metadata(self, to_entity, fields=None, merge_method=lambda _, v2: v2):
1039
1100
  metadata_to_propagate = deepcopy(self.metadata)
1040
1101
  if fields:
1041
1102
  metadata_to_propagate = {k: v for k, v in self.metadata if k in fields}
1042
- to_entity.metadata = recursive_update_dict(to_entity.metadata, metadata_to_propagate, merge_method)
1103
+ to_entity.metadata = recursive_update_dict(
1104
+ to_entity.metadata, metadata_to_propagate, merge_method
1105
+ )
1043
1106
 
1044
1107
 
1045
1108
  def more_recent_than(record_a, record_b):
@@ -1098,10 +1161,19 @@ class NifiRoute(Route):
1098
1161
  query_params = request.query_params
1099
1162
  processor_suffix = query_params["processor_suffix"]
1100
1163
  body = await request.json()
1101
- processor_name = "processor." + self.processor_name + "." + op.replace("-", "_") + "." + processor_suffix
1164
+ processor_name = (
1165
+ "processor."
1166
+ + self.processor_name
1167
+ + "."
1168
+ + op.replace("-", "_")
1169
+ + "."
1170
+ + processor_suffix
1171
+ )
1102
1172
  if op not in self.endpoints.keys():
1103
1173
  raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
1104
- task_id = await self.celery_executor.send_task(self.endpoints[op], args=[body, processor_name])
1174
+ task_id = await self.celery_executor.send_task(
1175
+ self.endpoints[op], args=[body, processor_name]
1176
+ )
1105
1177
  return task_id
1106
1178
 
1107
1179
  @staticmethod
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import py3langid as langid
3
- import iso639 as languages
3
+ from iso639 import Lang
4
4
 
5
5
 
6
6
  def detect_language(text, min_confidence=None):
@@ -11,5 +11,11 @@ def detect_language(text, min_confidence=None):
11
11
  if min_confidence and confidence < min_confidence:
12
12
  return None, confidence
13
13
  detected_lang = re.sub("[^A-Za-z]", "", detected_lang).lower()
14
- detected_lang = languages.to_name(detected_lang).lower()
14
+ detected_lang = Lang(detected_lang).name.lower()
15
15
  return detected_lang, confidence
16
+
17
+ def to_name(alpha2):
18
+ return Lang(alpha2).name.lower()
19
+
20
+ def to_alpha2(name):
21
+ return Lang(name).pt1
@@ -12,4 +12,4 @@ def get_label_keys(type, ontology):
12
12
  if not label_key:
13
13
  continue
14
14
  label_keys[label_key] = None
15
- return list(label_keys.keys())
15
+ return list(label_keys.keys())
@@ -0,0 +1,28 @@
1
+ from .inheritance import is_child_concept
2
+
3
+
4
+ def get_relationships_between_concepts(source, target, ontology):
5
+ ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
6
+ from_rels = ontology["concepts"][source]["relationships"]
7
+ from_rels = [ontology_rels[r] for r in from_rels]
8
+ from_rels = [
9
+ r
10
+ for r in from_rels
11
+ if is_child_concept(
12
+ target,
13
+ r["target_concept"],
14
+ {
15
+ "concepts": ontology["concepts"],
16
+ "relationships": list(ontology_rels.values()),
17
+ },
18
+ )
19
+ ]
20
+ return from_rels
21
+
22
+
23
+ def invert_relationships(rels, ontology):
24
+ ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
25
+ inverses = []
26
+ for rel in rels:
27
+ inverses.append(ontology_rels[rel]["inverse_name"])
28
+ return inverses
@@ -1,594 +0,0 @@
1
- from itertools import groupby, chain
2
- import time
3
- import uuid
4
- from octostar.utils.ontology import multiquery_ontology
5
- from octostar.utils.exceptions import StopAsyncIterationWithResult
6
-
7
- from .inheritance import is_child_concept
8
-
9
- REL_FETCHED_FIELDS = [
10
- "os_relationship_name",
11
- "os_entity_uid_from",
12
- "os_entity_uid_to",
13
- "os_entity_uid",
14
- "os_entity_type_from",
15
- "os_entity_type_to",
16
- "os_workspace",
17
- ]
18
- TARGETS_FETCHED_FIELDS = ["os_entity_uid", "os_concept", "entity_label", "os_workspace"]
19
-
20
-
21
- class ExecutionMetrics:
22
- def __init__(self):
23
- self.n_queries = 0
24
- self.n_relationships = 0
25
- self.n_target_entities = 0
26
- self.exec_time = 0.0
27
- self.exec_times = {}
28
- self.n_cancelled_queries = 0
29
- self.relationship_names = set()
30
- self.timeout = 0
31
-
32
- def print_metrics(self):
33
- print("Execution Metrics:")
34
- print(f" Execution Time: {self.exec_time}")
35
- print(f" Per-Type Execution Times: {self.exec_times}")
36
- print(f" Number of Queries: {self.n_queries}")
37
- print(f" Number of Cancelled Queries: {self.n_cancelled_queries}")
38
- print(f" Timeout per Query: {self.timeout}")
39
- print(f" Number of Relationships: {self.n_relationships}")
40
- print(f" Number of Target Entities: {self.n_target_entities}")
41
- print(f" Relationship Names: {self.relationship_names}")
42
-
43
-
44
- async def _get_stream_result(stream):
45
- result = None
46
- try:
47
- async for _ in stream:
48
- pass
49
- except StopAsyncIterationWithResult as e:
50
- result = e.value
51
- return result
52
-
53
-
54
- def _left_join(left_list, right_list, left_keys, right_keys):
55
- left_keys = left_keys if isinstance(left_keys, (list, tuple)) else [left_keys]
56
- right_keys = right_keys if isinstance(right_keys, (list, tuple)) else [right_keys]
57
-
58
- def _make_composite_key(item, keys):
59
- return tuple(item[key] for key in keys)
60
-
61
- left_list.sort(key=lambda item: _make_composite_key(item, left_keys))
62
- right_list.sort(key=lambda item: _make_composite_key(item, right_keys))
63
- left_groups = groupby(
64
- left_list, key=lambda item: _make_composite_key(item, left_keys)
65
- )
66
- right_groups = groupby(
67
- right_list, key=lambda item: _make_composite_key(item, right_keys)
68
- )
69
- right_dict = {key: list(group) for key, group in right_groups}
70
- result = []
71
- for left_key_val, left_items in left_groups:
72
- left_items = list(left_items)
73
- associated_rights = right_dict.get(left_key_val, [])
74
- for left_item in left_items:
75
- result.append((left_item, associated_rights))
76
- return result
77
-
78
-
79
- async def expand_otm(
80
- expanded_entities,
81
- entities_by_concept,
82
- ontology_rels,
83
- concepts_to_otm_rels,
84
- metrics,
85
- client,
86
- timeout,
87
- limit,
88
- ):
89
- start_time = time.time()
90
- otm_queries = []
91
- target_fields = ",".join(
92
- ["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
93
- )
94
- for concept_name, rels in concepts_to_otm_rels.items():
95
- for rel_name in rels:
96
- rel = ontology_rels[rel_name]
97
- source_properties = rel["source_properties"].split(",")
98
- source_properties_values = [
99
- [entity[p] for p in source_properties]
100
- for entity in entities_by_concept[concept_name]
101
- ]
102
- source_properties_values = [
103
- values
104
- for values in source_properties_values
105
- if all(value is not None for value in values)
106
- ]
107
- if source_properties_values:
108
- target_prop_names = rel["target_properties"].split(",")
109
- target_prop_names = ",".join(
110
- ["`" + prop + "`" for prop in target_prop_names]
111
- )
112
- source_properties_values = ",".join(
113
- [
114
- "(" + ",".join(["'" + value + "'" for value in values]) + ")"
115
- for values in source_properties_values
116
- ]
117
- )
118
- if target_prop_names and source_properties_values:
119
- query = f"""SELECT {target_fields} FROM `etimbr`.`{rel['target_concept']}` WHERE ({target_prop_names}) IN ({source_properties_values}) LIMIT {limit}"""
120
- otm_queries.append(
121
- {
122
- "concept_name": concept_name,
123
- "query": query,
124
- "relationship_name": rel_name,
125
- "source_properties": rel["source_properties"].split(","),
126
- "target_properties": rel["target_properties"].split(","),
127
- }
128
- )
129
- otm_queries = {str(i): otm_queries[i] for i in range(len(otm_queries))}
130
- otm_stream = multiquery_ontology.streaming(
131
- sql_queries={
132
- query_id: query_data["query"]
133
- for query_id, query_data in otm_queries.items()
134
- },
135
- client=client,
136
- timeout=timeout,
137
- )
138
- metrics.n_queries += len(otm_queries)
139
- all_results = await _get_stream_result(otm_stream)
140
- for query_id, data in all_results.items():
141
- if data == None:
142
- metrics.n_cancelled_queries += 1
143
- if not data:
144
- continue
145
- query_data = otm_queries[query_id]
146
- data = _left_join(
147
- [
148
- e[0]
149
- for e in expanded_entities.values()
150
- if e[0]["os_concept"] == query_data["concept_name"]
151
- ],
152
- data,
153
- query_data["source_properties"],
154
- query_data["target_properties"],
155
- )
156
- data = {
157
- e[0]["os_entity_uid"]: (
158
- e[0],
159
- {
160
- r["os_entity_uid"]: (
161
- {"os_relationship_name": query_data["relationship_name"]},
162
- {r["os_entity_uid"]: r},
163
- )
164
- for r in e[1]
165
- },
166
- )
167
- for e in data
168
- }
169
- for entity_id, entities in data.items():
170
- if entity_id not in expanded_entities:
171
- expanded_entities[entity_id] = [entities[0], {}]
172
- for sub_entity_id, sub_entities in entities[1].items():
173
- if sub_entity_id not in expanded_entities[entity_id][1]:
174
- expanded_entities[entity_id][1][sub_entity_id] = [
175
- sub_entities[0],
176
- {},
177
- ]
178
- expanded_entities[entity_id][1][sub_entity_id][1].update(
179
- sub_entities[1]
180
- )
181
- metrics.exec_times["otm"] = time.time() - start_time
182
- return expanded_entities
183
-
184
-
185
- async def expand_mtm_mixed(
186
- expanded_entities,
187
- entities_by_concept,
188
- ontology,
189
- concepts_to_mixed_rels,
190
- ontology_rels,
191
- metrics,
192
- client,
193
- timeout,
194
- limit,
195
- ):
196
- start_time = time.time()
197
- mixed_queries = []
198
- match_patterns = {}
199
- rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
200
- for concept_name, rels in concepts_to_mixed_rels.items():
201
- if concept_name not in match_patterns:
202
- match_patterns[concept_name] = []
203
- match_patterns[concept_name].extend(
204
- [entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
205
- )
206
- for concept_name, entity_ids in match_patterns.items():
207
- rels = concepts_to_mixed_rels[concept_name]
208
- entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
209
- for rel_name in rels:
210
- rel = ontology_rels[rel_name]
211
- target_concept = rel["target_concept"]
212
- relationship_fields = ",".join(
213
- [
214
- f"`{rel_name}[{target_concept}]_"
215
- + prop
216
- + "` AS "
217
- + "`rel__"
218
- + prop
219
- + "`"
220
- for prop in REL_FETCHED_FIELDS
221
- ]
222
- )
223
- target_fields = ",".join(
224
- [
225
- f"`{rel_name}[{target_concept}]."
226
- + prop
227
- + "` AS "
228
- + "`tgt__"
229
- + prop
230
- + "`"
231
- for prop in TARGETS_FETCHED_FIELDS
232
- ]
233
- )
234
- all_fields = (
235
- f"{relationship_fields}, {target_fields}, `os_entity_uid`".strip(", ")
236
- )
237
- if entity_ids:
238
- mixed_queries.append(
239
- {
240
- "query": f"SELECT {all_fields} FROM `dtimbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
241
- }
242
- )
243
- mixed_queries = {str(i): mixed_queries[i] for i in range(len(mixed_queries))}
244
- metrics.n_queries += len(mixed_queries)
245
- local_stream = multiquery_ontology.streaming(
246
- sql_queries={
247
- query_id: query_data["query"]
248
- for query_id, query_data in mixed_queries.items()
249
- },
250
- client=client,
251
- timeout=timeout / 2.0,
252
- )
253
- all_results = await _get_stream_result(local_stream)
254
- for (
255
- _,
256
- data,
257
- ) in (
258
- all_results.items()
259
- ): ## TO BE TESTED (data is always empty at the moment due to timbr bug)
260
- if data == None:
261
- metrics.n_cancelled_queries += 1
262
- if not data:
263
- continue
264
- data = _left_join(
265
- [e[0] for e in expanded_entities.values()],
266
- data,
267
- "os_entity_uid",
268
- "os_entity_uid",
269
- )
270
- for elem in data:
271
- elem["#rel__os_entity_uid"] = elem["rel__os_entity_uid"] or (
272
- "temp-" + str(uuid.uuid4())
273
- )
274
- data = {
275
- e[0]["os_entity_uid"]: (
276
- e[0],
277
- {
278
- rt["#rel__os_entity_uid"]: (
279
- {k[5:]: v for k, v in rt.items() if k.startswith("rel__")},
280
- {
281
- rt["tgt__os_entity_uid"]: {
282
- k[5:]: v for k, v in rt.items() if k.startswith("tgt__")
283
- }
284
- },
285
- )
286
- for rt in e[1]
287
- },
288
- )
289
- for e in data
290
- }
291
- data = {
292
- e[0]["os_entity_uid"]: (
293
- e[0],
294
- {r["os_entity_uid"]: ({}, {r["os_entity_uid"]: r}) for r in e[1]},
295
- )
296
- for e in data
297
- }
298
- for entity_id, entities in data.items():
299
- if entity_id not in expanded_entities:
300
- expanded_entities[entity_id] = [entities[0], {}]
301
- for sub_entity_id, sub_entities in entities[1].items():
302
- if sub_entity_id not in expanded_entities[entity_id][1]:
303
- expanded_entities[entity_id][1][sub_entity_id] = [
304
- sub_entities[0],
305
- {},
306
- ]
307
- expanded_entities[entity_id][1][sub_entity_id][1].update(
308
- sub_entities[1]
309
- )
310
- metrics.exec_times["mtm_mixed"] = time.time() - start_time
311
- return expanded_entities
312
-
313
-
314
- async def expand_mtm_local(
315
- expanded_entities,
316
- entities_by_concept,
317
- ontology_rels,
318
- concepts_to_local_rels,
319
- metrics,
320
- client,
321
- timeout,
322
- limit,
323
- ):
324
- start_time = time.time()
325
- local_queries = {"from": [], "to": []}
326
- match_patterns = {}
327
- target_fields = ",".join(
328
- ["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
329
- )
330
- rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
331
- for concept_name in concepts_to_local_rels.keys():
332
- if concept_name not in match_patterns:
333
- match_patterns[concept_name] = []
334
- match_patterns[concept_name].extend(
335
- [entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
336
- )
337
- for concept_name, entity_ids in match_patterns.items():
338
- rel_names = concepts_to_local_rels[concept_name]
339
- inverse_names = [
340
- ontology_rels[rel_name]["inverse_name"] for rel_name in rel_names
341
- ]
342
- rel_names = ",".join(["'" + rel_name + "'" for rel_name in rel_names])
343
- inverse_names = ",".join(["'" + rel_name + "'" for rel_name in inverse_names])
344
- entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
345
- if rel_names and inverse_names and entity_ids:
346
- local_queries["from"].append(
347
- {
348
- "query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({rel_names}) AND `os_entity_uid_from` IN ({entity_ids}) LIMIT {limit}"
349
- }
350
- )
351
- local_queries["to"].append(
352
- {
353
- "query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({inverse_names}) AND `os_entity_uid_to` IN ({entity_ids}) LIMIT {limit}"
354
- }
355
- )
356
- local_queries = {
357
- **{
358
- "from_" + str(i): local_queries["from"][i]
359
- for i in range(len(local_queries["from"]))
360
- },
361
- **{
362
- "to_" + str(i): local_queries["to"][i]
363
- for i in range(len(local_queries["to"]))
364
- },
365
- }
366
- metrics.n_queries += len(local_queries)
367
- local_stream = multiquery_ontology.streaming(
368
- sql_queries={
369
- query_id: query_data["query"]
370
- for query_id, query_data in local_queries.items()
371
- },
372
- client=client,
373
- timeout=timeout / 2.0,
374
- )
375
- all_results = await _get_stream_result(local_stream)
376
- middle_entities = []
377
- for query_id, data in all_results.items():
378
- if data == None:
379
- metrics.n_cancelled_queries += 1
380
- if not data:
381
- continue
382
- query_id = query_id.split("_")
383
- data = _left_join(
384
- [e[0] for e in expanded_entities.values()],
385
- data,
386
- "os_entity_uid",
387
- "os_entity_uid_" + query_id[0],
388
- )
389
- inverse_direction = "to" if query_id[0] == "from" else "from"
390
- data = {
391
- e[0]["os_entity_uid"]: (e[0], {r["os_entity_uid"]: (r, {}) for r in e[1]})
392
- for e in data
393
- }
394
- middle_entities.extend(
395
- [(inverse_direction, r) for e in data.values() for r in e[1].values()]
396
- )
397
- for entity_id, entities in data.items():
398
- if entity_id not in expanded_entities:
399
- expanded_entities[entity_id] = [entities[0], {}]
400
- for sub_entity_id, sub_entities in entities[1].items():
401
- if sub_entity_id not in expanded_entities[entity_id][1]:
402
- expanded_entities[entity_id][1][sub_entity_id] = [
403
- sub_entities[0],
404
- {},
405
- ]
406
- expanded_entities[entity_id][1][sub_entity_id][1].update(
407
- sub_entities[1]
408
- )
409
- middle_entities = [
410
- {
411
- "entity_id": r[1][0]["os_entity_uid_" + r[0]],
412
- "concept_name": r[1][0]["os_entity_type_" + r[0]],
413
- "direction": r[0],
414
- "relationship": r[1],
415
- }
416
- for r in middle_entities
417
- ]
418
- middle_entities = sorted(middle_entities, key=lambda x: x["concept_name"])
419
- middle_entities = groupby(middle_entities, key=lambda x: x["concept_name"])
420
- middle_entities = {e[0]: list(e[1]) for e in middle_entities}
421
- local_queries_2 = []
422
- for concept_name, entities in middle_entities.items():
423
- entity_ids = ",".join(["'" + e["entity_id"] + "'" for e in entities])
424
- if entity_ids:
425
- local_queries_2.append(
426
- {
427
- "query": f"SELECT {target_fields} FROM `timbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
428
- }
429
- )
430
- local_queries_2 = {str(i): local_queries_2[i] for i in range(len(local_queries_2))}
431
- metrics.n_queries += len(local_queries_2)
432
- local_stream_2 = multiquery_ontology.streaming(
433
- sql_queries={
434
- query_id: query_data["query"]
435
- for query_id, query_data in local_queries_2.items()
436
- },
437
- client=client,
438
- timeout=timeout / 2.0,
439
- )
440
- all_results = await _get_stream_result(local_stream_2)
441
- middle_entities = list(chain(*middle_entities.values()))
442
- for query_id, data in all_results.items():
443
- if data == None:
444
- metrics.n_cancelled_queries += 1
445
- if not data:
446
- continue
447
- data = _left_join(middle_entities, data, "entity_id", "os_entity_uid")
448
- for entry in data:
449
- relationship_targets = entry[0]["relationship"]
450
- for target_entity in entry[1]:
451
- relationship_targets[1][target_entity["os_entity_uid"]] = target_entity
452
- if data:
453
- for rel in data:
454
- targets = rel[0]["relationship"][1]
455
- rel_uid = rel[0]["relationship"][0]["os_entity_uid"]
456
- for entity in expanded_entities.values():
457
- entity_rels = entity[1]
458
- if rel_uid in entity_rels:
459
- entity_rels[rel_uid][1].update(targets)
460
- metrics.exec_times["mtm_local"] = time.time() - start_time
461
- return expanded_entities
462
-
463
-
464
- async def expand_entities(
465
- entities,
466
- ontology,
467
- relationship_mappings_info,
468
- client,
469
- relationship_names_by_entity_type=None,
470
- batch_size=10000,
471
- avg_limit_per_entity=20,
472
- timeout=10.0,
473
- ):
474
- metrics = ExecutionMetrics()
475
- metrics.timeout = timeout
476
- start_time = time.time()
477
- ordered_entities = sorted(entities, key=lambda x: x["entity_type"])
478
- ordered_entities = list({e["os_entity_uid"]: e for e in entities}.values())
479
- entity_batches = [
480
- ordered_entities[i : i + batch_size]
481
- for i in range(0, len(ordered_entities), batch_size)
482
- ]
483
- expanded_entities = {}
484
- for batch in entity_batches:
485
- new_entities = await expand_entities_batch(
486
- batch,
487
- ontology,
488
- relationship_mappings_info,
489
- relationship_names_by_entity_type,
490
- metrics,
491
- client,
492
- avg_limit_per_entity,
493
- timeout,
494
- )
495
- expanded_entities.update(new_entities)
496
- metrics.exec_time = time.time() - start_time
497
- for _, entity_data in expanded_entities.items():
498
- for rel in entity_data[1].values():
499
- if rel[0]:
500
- metrics.n_relationships += 1
501
- metrics.relationship_names.add(rel[0]["os_relationship_name"])
502
- metrics.n_target_entities += len(rel[1])
503
- metrics.relationship_names = list(metrics.relationship_names)
504
- expanded_entities = [
505
- (e[0], [(r[0], list(r[1].values())[0]) for r in e[1].values()])
506
- for e in expanded_entities.values()
507
- ]
508
- return expanded_entities, metrics
509
-
510
-
511
- async def expand_entities_batch(
512
- entities,
513
- ontology,
514
- relationship_mappings,
515
- relationship_names_by_entity_type,
516
- metrics,
517
- client,
518
- avg_limit_per_entity=20,
519
- timeout=10.0,
520
- ):
521
- limit = avg_limit_per_entity * len(entities)
522
- expanded_entities = {e["os_entity_uid"]: [e, {}] for e in entities}
523
- entities_by_concept = sorted(entities, key=lambda x: x["entity_type"])
524
- entities_by_concept = groupby(entities_by_concept, key=lambda x: x["entity_type"])
525
- entities_by_concept = {e[0]: list(e[1]) for e in entities_by_concept}
526
- ontology_rels = {r["relationship_name"]: r for r in ontology["relationships"]}
527
- concepts_to_rels = {
528
- cn: c["relationships"] for cn, c in ontology["concepts"].items()
529
- }
530
- concepts_to_mixed_rels = {}
531
- concepts_to_otm_rels = {}
532
- concepts_to_local_rels = {}
533
- for concept_name in entities_by_concept.keys():
534
- relationship_names = None
535
- if relationship_names_by_entity_type:
536
- relationship_names = set()
537
- for cn_name in relationship_names_by_entity_type.keys():
538
- if is_child_concept(concept_name, cn_name, ontology):
539
- relationship_names = relationship_names.union(
540
- set(relationship_names_by_entity_type[cn_name])
541
- )
542
- rels = concepts_to_rels[concept_name]
543
- if relationship_names:
544
- rels = [r for r in rels if r in relationship_names]
545
- filtered_mtm_rels = []
546
- filtered_otm_rels = []
547
- filtered_local_mtm_rels = []
548
- for rel in rels:
549
- if not ontology_rels[rel]["is_mtm"]:
550
- filtered_otm_rels.append(rel)
551
- elif rel in relationship_mappings["unmapped"]:
552
- continue
553
- elif rel in relationship_mappings["local_only"]:
554
- filtered_local_mtm_rels.append(rel)
555
- else:
556
- filtered_mtm_rels.append(rel)
557
- concepts_to_mixed_rels[concept_name] = filtered_mtm_rels
558
- concepts_to_otm_rels[concept_name] = filtered_otm_rels
559
- concepts_to_local_rels[concept_name] = filtered_local_mtm_rels
560
- # OTM QUERIES: fetch target entities directly from the target tables with an IN statement
561
- expanded_entities = await expand_otm(
562
- expanded_entities,
563
- entities_by_concept,
564
- ontology_rels,
565
- concepts_to_otm_rels,
566
- metrics,
567
- client,
568
- timeout,
569
- limit,
570
- )
571
- # LOCAL MTM QUERIES: fetch relationships directly from the os_workspace_relationship table, then fetch target entities
572
- expanded_entities = await expand_mtm_local(
573
- expanded_entities,
574
- entities_by_concept,
575
- ontology_rels,
576
- concepts_to_local_rels,
577
- metrics,
578
- client,
579
- timeout,
580
- limit,
581
- )
582
- # MIXED MTM QUERIES: query via timbr to make the most of JOINs
583
- expanded_entities = await expand_mtm_mixed(
584
- expanded_entities,
585
- entities_by_concept,
586
- ontology,
587
- concepts_to_mixed_rels,
588
- ontology_rels,
589
- metrics,
590
- client,
591
- timeout,
592
- limit,
593
- )
594
- return expanded_entities