streamlit-octostar-utils 0.2.12a2__tar.gz → 0.2.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/nifi.py +143 -72
  4. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/nlp/language.py +8 -2
  5. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/ontology/inheritance.py +1 -1
  6. streamlit_octostar_utils-0.2.14/streamlit_octostar_utils/ontology/relationships.py +28 -0
  7. streamlit_octostar_utils-0.2.12a2/streamlit_octostar_utils/ontology/expand_entities.py +0 -594
  8. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/LICENSE +0 -0
  9. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/README.md +0 -0
  10. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/__init__.py +0 -0
  11. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  12. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  13. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  14. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  15. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  16. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  17. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  18. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  19. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  20. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  21. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  22. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  23. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  24. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/__init__.py +0 -0
  25. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/dict.py +0 -0
  26. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/filetypes.py +0 -0
  27. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  28. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  29. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/core/timestamp.py +0 -0
  30. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  31. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/nlp/ner.py +0 -0
  32. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  33. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/client.py +0 -0
  34. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/context.py +0 -0
  35. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  36. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  37. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/ontology/validation.py +0 -0
  38. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/style/__init__.py +0 -0
  39. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/style/common.py +0 -0
  40. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  42. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  43. {streamlit_octostar_utils-0.2.12a2 → streamlit_octostar_utils-0.2.14}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.2.12a2
3
+ Version: 0.2.14
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.2.12a2"
8
+ version = "0.2.14"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -24,7 +24,6 @@ from ..core.dict import recursive_update_dict, travel_dict, jsondict_hash
24
24
  from ..core.timestamp import now, string_to_datetime
25
25
  from .fastapi import DefaultErrorRoute, Route
26
26
  from ..ontology.inheritance import is_child_concept as is_child_concept_fn, get_label_keys
27
- from ..ontology.expand_entities import expand_entities
28
27
 
29
28
  RELATIONSHIP_ENTITY_NAME = "os_relationship"
30
29
  LOCAL_RELATIONSHIP_ENTITY_NAME = "os_workspace_relationship"
@@ -163,12 +162,16 @@ class NifiEntityProxy(object):
163
162
  if child_entity.uid == uid_to_search:
164
163
  found_entity = child_entity
165
164
  else:
166
- found_entity = _recursive_search_expanded_proxy(child_entity._proxy, uid_to_search)
165
+ found_entity = _recursive_search_expanded_proxy(
166
+ child_entity._proxy, uid_to_search
167
+ )
167
168
  if found_entity:
168
169
  return found_entity
169
170
 
170
171
  if not self._proxy:
171
- main_entities = itertools.chain(*[b.entities for b in self.context.in_batches])
172
+ main_entities = itertools.chain(
173
+ *[b.entities for b in self.context.in_batches]
174
+ )
172
175
  main_entities = {e.record["entity_id"]: e for e in main_entities}
173
176
  if main_entities.get(self.uid):
174
177
  self._proxy = main_entities.get(self.uid)
@@ -179,7 +182,9 @@ class NifiEntityProxy(object):
179
182
  self._proxy = found_entity._proxy
180
183
  return self._proxy
181
184
  ## TODO: Try to get the entity from the database with query_ontology()
182
- raise AttributeError(f"Cannot find children with UUID {self.uid}! It may exist in the database?")
185
+ raise AttributeError(
186
+ f"Cannot find children with UUID {self.uid}! It may exist in the database?"
187
+ )
183
188
 
184
189
  def __getattr__(self, name):
185
190
  if name in self.__dict__:
@@ -213,24 +218,28 @@ class NifiFragmenter(object):
213
218
  raise ValueError("Must have at least 2 entities for fragmentation")
214
219
  identifier = str(uuid.uuid4())
215
220
  for i, entity in enumerate(fragments):
216
- travel_dict(entity.request["nifi_attributes"], fragmenter_keylist.split("."), "w")(
217
- {"identifier": identifier, "count": count, "index": i}
218
- )
221
+ travel_dict(
222
+ entity.request["nifi_attributes"], fragmenter_keylist.split("."), "w"
223
+ )({"identifier": identifier, "count": count, "index": i})
219
224
  if "fragment" not in entity.request["config"]:
220
225
  entity.request["config"]["fragment"] = {}
221
226
  if "fragments_stack" not in entity.request["config"]["fragment"]:
222
227
  entity.request["config"]["fragment"]["fragments_stack"] = []
223
- entity.request["config"]["fragment"]["fragments_stack"].insert(0, fragmenter_keylist)
224
- entity.request["nifi_attributes"]["fragments_stack"] = entity.request["config"]["fragment"][
225
- "fragments_stack"
226
- ]
227
- travel_dict(entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w")(
228
- {"identifier": identifier, "count": count, "index": i}
228
+ entity.request["config"]["fragment"]["fragments_stack"].insert(
229
+ 0, fragmenter_keylist
229
230
  )
231
+ entity.request["nifi_attributes"]["fragments_stack"] = entity.request[
232
+ "config"
233
+ ]["fragment"]["fragments_stack"]
234
+ travel_dict(
235
+ entity.request["config"]["fragment"], fragmenter_keylist.split("."), "w"
236
+ )({"identifier": identifier, "count": count, "index": i})
230
237
 
231
238
  def push_defragment_strategy(fragment, defragmenter_config):
232
239
  pointer = fragment.request["config"]
233
- last_fragmenter_keylist = fragment.request["config"]["fragment"]["fragments_stack"][0]
240
+ last_fragmenter_keylist = fragment.request["config"]["fragment"][
241
+ "fragments_stack"
242
+ ][0]
234
243
  for k in ("fragment." + last_fragmenter_keylist).split("."):
235
244
  if not pointer.get(k):
236
245
  pointer[k] = {}
@@ -249,7 +258,7 @@ class NifiEntityBatch(object):
249
258
 
250
259
  class NifiContextManager(object):
251
260
  HEADLESS_PROCESSOR_NAME = "headless"
252
-
261
+
253
262
  class SyncFlag(Enum):
254
263
  UPSERT_ENTITY_ALL = 0 # bool
255
264
  UPSERT_ENTITY_SPECIFIC_FIELDS = 1 # 'fields': list of record fields
@@ -270,7 +279,9 @@ class NifiContextManager(object):
270
279
  @property
271
280
  def ontology(self):
272
281
  if not self._ontology:
273
- self._ontology = fetch_ontology_data.sync(ontology_name=self.ontology_name, client=self.client)
282
+ self._ontology = fetch_ontology_data.sync(
283
+ ontology_name=self.ontology_name, client=self.client
284
+ )
274
285
  return self._ontology
275
286
 
276
287
  def _config_get(entity, keylist):
@@ -339,7 +350,9 @@ class NifiContextManager(object):
339
350
  return self
340
351
 
341
352
  def get_workspaces_permissions(self, workspace_ids):
342
- permissions_to_fetch = list(set(workspace_ids).difference(set(list(self.permissions.keys()))))
353
+ permissions_to_fetch = list(
354
+ set(workspace_ids).difference(set(list(self.permissions.keys())))
355
+ )
343
356
  if permissions_to_fetch:
344
357
  permissions = get_permissions.sync(permissions_to_fetch, client=self.client)
345
358
  self.permissions.update(permissions)
@@ -369,13 +382,18 @@ class NifiContextManager(object):
369
382
  entities.append(entity)
370
383
  for child_entity in entity.children_entities:
371
384
  if not child_entity.drop_on_output:
372
- if child_entity.output_as_independent or child_entity.output_as_child:
385
+ if (
386
+ child_entity.output_as_independent
387
+ or child_entity.output_as_child
388
+ ):
373
389
  if processor_name != NifiContextManager.HEADLESS_PROCESSOR_NAME:
374
390
  child_entity.request["last_processor_name"] = processor_name
375
391
  if child_entity.output_as_independent:
376
392
  if not child_entity._proxy:
377
393
  child_entity.fetch_proxy()
378
- entities.extend(_process_entity(child_entity._proxy, processor_name))
394
+ entities.extend(
395
+ _process_entity(child_entity._proxy, processor_name)
396
+ )
379
397
  return entities
380
398
 
381
399
  entities = itertools.chain(*[b.entities for b in entity_batches])
@@ -386,7 +404,9 @@ class NifiContextManager(object):
386
404
  all_entities,
387
405
  key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
388
406
  )
389
- self.out_entities = list({e.record["entity_id"]: e for e in all_entities}.values())
407
+ self.out_entities = list(
408
+ {e.record["entity_id"]: e for e in all_entities}.values()
409
+ )
390
410
  self.sync_entities()
391
411
  return [entity for entity in self.jsonify(self.out_entities)["content"]]
392
412
 
@@ -394,19 +414,23 @@ class NifiContextManager(object):
394
414
  error_response = DefaultErrorRoute.format_error(exc)
395
415
  entity.request["exception"]["code"] = error_response.status_code
396
416
  entity.request["exception"]["body"] = json.loads(error_response.body)["message"]
397
- travel_dict(entity.request["nifi_attributes"], ["invokehttp", "response", "body"], "w")(
398
- entity.request["exception"]["body"]
399
- )
400
- travel_dict(entity.request["nifi_attributes"], ["invokehttp", "response", "code"], "w")(
401
- entity.request["exception"]["code"]
402
- )
417
+ travel_dict(
418
+ entity.request["nifi_attributes"], ["invokehttp", "response", "body"], "w"
419
+ )(entity.request["exception"]["body"])
420
+ travel_dict(
421
+ entity.request["nifi_attributes"], ["invokehttp", "response", "code"], "w"
422
+ )(entity.request["exception"]["code"])
403
423
  entity.request["nifi_attributes"]["raised_exc"] = True
404
424
 
405
425
  def sync_entities(self):
406
426
  if not self.lazy_sync:
407
427
  entities = self.out_entities
408
428
  else:
409
- entities = [e for e in self.out_entities if e.record["entity_id"] in self.nonlazy_sync_ids]
429
+ entities = [
430
+ e
431
+ for e in self.out_entities
432
+ if e.record["entity_id"] in self.nonlazy_sync_ids
433
+ ]
410
434
  if not entities:
411
435
  return
412
436
  reserved_fields = [
@@ -436,16 +460,18 @@ class NifiContextManager(object):
436
460
  for entity in entities:
437
461
  if entity.sync_params.get(NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS):
438
462
  concept_name = entity.record["entity_type"]
439
- rels_to_fetch = entity.sync_params.get(NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS, [])
463
+ rels_to_fetch = entity.sync_params.get(
464
+ NifiContextManager.SyncFlag.FETCH_RELATIONSHIPS, []
465
+ )
440
466
  for rel in rels_to_fetch:
441
467
  if rel not in fetch_relationships_entities:
442
468
  fetch_relationships_entities[rel] = []
443
469
  fetch_relationships_entities[rel].append(entity)
444
470
  if concept_name not in fetch_concept_relationships:
445
471
  fetch_concept_relationships[concept_name] = set()
446
- fetch_concept_relationships[concept_name] = fetch_concept_relationships[concept_name].union(
447
- set(rels_to_fetch)
448
- )
472
+ fetch_concept_relationships[concept_name] = fetch_concept_relationships[
473
+ concept_name
474
+ ].union(set(rels_to_fetch))
449
475
  for k in fetch_concept_relationships.keys():
450
476
  fetch_concept_relationships[k] = list(fetch_concept_relationships[k])
451
477
  # UPSERT ENTITIES
@@ -491,7 +517,7 @@ class NifiContextManager(object):
491
517
  file.request["is_temporary"] = False
492
518
  file.request["entity_timestamp"] = file.record["os_last_updated_at"]
493
519
  # FETCH RELATIONSHIPS
494
- """
520
+ '''
495
521
  if fetch_relationships_entities:
496
522
  relationship_mappings_info = relationship_mappings.sync_detailed(
497
523
  client=self.client
@@ -547,7 +573,7 @@ class NifiContextManager(object):
547
573
  child_rel.request["entity_timestamp"] = rel.get(
548
574
  "os_last_updated_at"
549
575
  )
550
- """
576
+ '''
551
577
  # CLEAN SYNC PARAMS
552
578
  for entity in entities:
553
579
  entity.sync_params = {}
@@ -556,10 +582,15 @@ class NifiContextManager(object):
556
582
  for entity in entities:
557
583
  fields = set()
558
584
 
559
- if entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_ALL) or entity.request["is_temporary"]:
585
+ if (
586
+ entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_ALL)
587
+ or entity.request["is_temporary"]
588
+ ):
560
589
  fields = fields.union(set(list(entity.record.keys())))
561
590
 
562
- if entity.sync_params.get(NifiContextManager.SyncFlag.UPSERT_ENTITY_SPECIFIC_FIELDS):
591
+ if entity.sync_params.get(
592
+ NifiContextManager.SyncFlag.UPSERT_ENTITY_SPECIFIC_FIELDS
593
+ ):
563
594
  fields = fields.union(
564
595
  set(
565
596
  entity.sync_params.get(
@@ -570,7 +601,9 @@ class NifiContextManager(object):
570
601
  )
571
602
  )
572
603
  if fields:
573
- entities_to_upsert.append((entity, [f for f in list(fields) if f not in reserved_fields]))
604
+ entities_to_upsert.append(
605
+ (entity, [f for f in list(fields) if f not in reserved_fields])
606
+ )
574
607
 
575
608
  def __exit__(self, exc_type, exc_val, exc_tb):
576
609
  if exc_val is not None:
@@ -584,7 +617,9 @@ class NifiContextManager(object):
584
617
  for entity in entities:
585
618
  if isinstance(entity, NifiEntityProxy):
586
619
  children.extend(entity.children_entities)
587
- children.extend(_recursive_collect_proxies(entity.children_entities))
620
+ children.extend(
621
+ _recursive_collect_proxies(entity.children_entities)
622
+ )
588
623
  return children
589
624
 
590
625
  all_proxies = _recursive_collect_proxies(entities)
@@ -597,21 +632,15 @@ class NifiContextManager(object):
597
632
 
598
633
 
599
634
  class NifiEntity(object):
600
- def __init__(self, context, request, record, annotations, all_independent_uids, children=[], contents=None):
635
+ def __init__(
636
+ self, context, request, record, annotations, all_independent_uids, children=[], contents=None
637
+ ):
601
638
  self.context = context
602
639
  self.request = request
603
640
  self.record = record
604
641
  self.annotations = annotations
605
- assert (
606
- self.record.get("os_entity_uid")
607
- and self.record.get("entity_id")
608
- and self.record["os_entity_uid"] == self.record["entity_id"]
609
- )
610
- assert (
611
- self.record.get("os_concept")
612
- and self.record.get("entity_type")
613
- and self.record["os_concept"] == self.record["entity_type"]
614
- )
642
+ assert self.record.get("os_entity_uid") and self.record.get("entity_id") and self.record["os_entity_uid"] == self.record["entity_id"]
643
+ assert self.record.get("os_concept") and self.record.get("entity_type") and self.record["os_concept"] == self.record["entity_type"]
615
644
  if "entity_label" not in self.record:
616
645
  self.record["entity_label"] = self.label
617
646
  children = [c for c in children if isinstance(c, (str, dict))]
@@ -631,7 +660,9 @@ class NifiEntity(object):
631
660
  child_types = [c["entity_type"] for c in proxy_entity_children] + [
632
661
  c["record"]["entity_type"] for c in full_entity_children
633
662
  ]
634
- output_as_child = [False] * len(proxy_entity_children) + [True] * len(full_entity_children)
663
+ output_as_child = [False] * len(proxy_entity_children) + [True] * len(
664
+ full_entity_children
665
+ )
635
666
  output_as_independent = [uid in all_independent_uids for uid in child_uids]
636
667
  full_entity_children = [
637
668
  NifiEntity(
@@ -645,7 +676,9 @@ class NifiEntity(object):
645
676
  )
646
677
  for c in full_entity_children
647
678
  ]
648
- proxy_otm_children = [NifiOTMRelationshipProxy(**otm_child) for otm_child in proxy_otm_children]
679
+ proxy_otm_children = [
680
+ NifiOTMRelationshipProxy(**otm_child) for otm_child in proxy_otm_children
681
+ ]
649
682
  child_proxies = [None] * len(proxy_entity_children) + full_entity_children
650
683
  self.children = [
651
684
  NifiEntityProxy(
@@ -673,18 +706,21 @@ class NifiEntity(object):
673
706
 
674
707
  @property
675
708
  def sync_params(self):
676
- return {NifiContextManager.SyncFlag[k]: v for k, v in (self.request.get("sync_params") or {}).items()}
709
+ return {
710
+ NifiContextManager.SyncFlag[k]: v
711
+ for k, v in (self.request.get("sync_params") or {}).items()
712
+ }
677
713
 
678
714
  @sync_params.setter
679
715
  def sync_params(self, new_params):
680
716
  self.request["sync_params"] = {
681
- (k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v for k, v in new_params.items()
717
+ (k.name if isinstance(k, NifiContextManager.SyncFlag) else k): v
718
+ for k, v in new_params.items()
682
719
  }
683
720
 
684
721
  @property
685
722
  def metadata(self):
686
723
  return self.annotations
687
-
688
724
  @metadata.setter
689
725
  def metadata(self, new_metadata):
690
726
  self.annotations = new_metadata
@@ -707,7 +743,9 @@ class NifiEntity(object):
707
743
  contents_pointer = deepcopy(self.request["contents_pointer"])
708
744
  ptr_location = contents_pointer.get("location")
709
745
  if ptr_location == "attachment" and not contents_pointer.get("pointer"):
710
- contents_pointer["pointer"] = f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
746
+ contents_pointer["pointer"] = (
747
+ f"{self.record['os_workspace']}/{self.record['os_entity_uid']}"
748
+ )
711
749
  return self.request["contents_pointer"]
712
750
 
713
751
  @contents_pointer.setter
@@ -727,7 +765,9 @@ class NifiEntity(object):
727
765
  return list(
728
766
  filter(
729
767
  lambda x: isinstance(x, NifiOTMRelationshipProxy)
730
- or is_child_concept_fn(x.entity_type, RELATIONSHIP_ENTITY_NAME, self.context.ontology),
768
+ or is_child_concept_fn(
769
+ x.entity_type, RELATIONSHIP_ENTITY_NAME, self.context.ontology
770
+ ),
731
771
  self.children,
732
772
  )
733
773
  )
@@ -746,12 +786,18 @@ class NifiEntity(object):
746
786
  )
747
787
  if (
748
788
  self.record.get("os_workspace")
749
- and (permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE) >= PermissionLevel.WRITE
789
+ and (
790
+ permissions.get(self.record.get("os_workspace")) or PermissionLevel.NONE
791
+ )
792
+ >= PermissionLevel.WRITE
750
793
  ):
751
794
  return self.record["os_workspace"]
752
795
  elif (
753
796
  self.request.get("fallback_os_workspace")
754
- and (permissions.get(self.request.get("fallback_os_workspace")) or PermissionLevel.NONE)
797
+ and (
798
+ permissions.get(self.request.get("fallback_os_workspace"))
799
+ or PermissionLevel.NONE
800
+ )
755
801
  >= PermissionLevel.WRITE
756
802
  ):
757
803
  return self.request["fallback_os_workspace"]
@@ -785,10 +831,14 @@ class NifiEntity(object):
785
831
  entity_type = None
786
832
  if isinstance(self, NifiEntityProxy):
787
833
  entity_type = self.entity_type
788
- return entity_type == type or is_child_concept_fn(entity_type, type, self.context.ontology)
834
+ return entity_type == type or is_child_concept_fn(
835
+ entity_type, type, self.context.ontology
836
+ )
789
837
  else:
790
838
  entity_type = self.record["entity_type"]
791
- return entity_type == type or type in self.request["ontology_info"]["parents"]
839
+ return (
840
+ entity_type == type or type in self.request["ontology_info"]["parents"]
841
+ )
792
842
 
793
843
  def is_fragmented(self) -> bool:
794
844
  return bool(self.request["config"].get("fragment", {}).get("fragments_stack"))
@@ -804,7 +854,7 @@ class NifiEntity(object):
804
854
  if _is_sub_fragment_recursive(value):
805
855
  return True
806
856
  return False
807
-
857
+
808
858
  if not self.is_fragmented():
809
859
  return True
810
860
  fragment = entity.request.get("config", {}).get("fragment", {})
@@ -831,7 +881,10 @@ class NifiEntity(object):
831
881
  else:
832
882
  proxy_entity_children.append(child)
833
883
  proxy_entity_children = list({c.uid: c for c in proxy_entity_children}.values())
834
- proxy_entity_children = [{"entity_id": c.uid, "entity_type": c.entity_type} for c in proxy_entity_children]
884
+ proxy_entity_children = [
885
+ {"entity_id": c.uid, "entity_type": c.entity_type}
886
+ for c in proxy_entity_children
887
+ ]
835
888
  proxy_otm_children = list(
836
889
  {
837
890
  c.record["os_entity_uid_from"]
@@ -847,7 +900,9 @@ class NifiEntity(object):
847
900
  full_entity_children,
848
901
  key=lambda x: string_to_datetime(x.record.get("os_last_updated_at")),
849
902
  )
850
- full_entity_children = list({c.uid: c.to_json() for c in full_entity_children}.values())
903
+ full_entity_children = list(
904
+ {c.uid: c.to_json() for c in full_entity_children}.values()
905
+ )
851
906
  children = full_entity_children + proxy_entity_children + proxy_otm_children
852
907
  return {
853
908
  "request": self.request,
@@ -931,7 +986,11 @@ class NifiEntity(object):
931
986
  ):
932
987
  return self._add_entity(
933
988
  os_relationship_workspace,
934
- (LOCAL_RELATIONSHIP_ENTITY_NAME if os_relationship_workspace else RELATIONSHIP_ENTITY_NAME),
989
+ (
990
+ LOCAL_RELATIONSHIP_ENTITY_NAME
991
+ if os_relationship_workspace
992
+ else RELATIONSHIP_ENTITY_NAME
993
+ ),
935
994
  {
936
995
  **relationship_fields,
937
996
  "os_entity_uid_from": os_entity_uid_from,
@@ -1013,7 +1072,9 @@ class NifiEntity(object):
1013
1072
  os_relationship_type,
1014
1073
  )
1015
1074
  child_entity._contents = file
1016
- child_entity.request["contents_pointer"] = NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
1075
+ child_entity.request["contents_pointer"] = (
1076
+ NifiEntityModel.RequestModel.ContentsPointerModel(location="local")
1077
+ )
1017
1078
  return child_entity, child_rel
1018
1079
 
1019
1080
  def add_tag(self, os_workspace, name, group, order, color):
@@ -1026,20 +1087,21 @@ class NifiEntity(object):
1026
1087
  )
1027
1088
 
1028
1089
  def add_metadata(
1029
- self,
1030
- json,
1031
- merge_method: Callable[[Any, Any], Any],
1032
- recurse: Union[bool, int] = False,
1090
+ self, json, merge_method: Callable[[Any, Any], Any], recurse: Union[bool, int] = False,
1033
1091
  ):
1034
1092
  if not self.metadata:
1035
1093
  self.metadata = {}
1036
- self.metadata = recursive_update_dict(self.metadata, json, merge_method, recurse)
1094
+ self.metadata = recursive_update_dict(
1095
+ self.metadata, json, merge_method, recurse
1096
+ )
1037
1097
 
1038
1098
  def propagate_metadata(self, to_entity, fields=None, merge_method=lambda _, v2: v2):
1039
1099
  metadata_to_propagate = deepcopy(self.metadata)
1040
1100
  if fields:
1041
1101
  metadata_to_propagate = {k: v for k, v in self.metadata if k in fields}
1042
- to_entity.metadata = recursive_update_dict(to_entity.metadata, metadata_to_propagate, merge_method)
1102
+ to_entity.metadata = recursive_update_dict(
1103
+ to_entity.metadata, metadata_to_propagate, merge_method
1104
+ )
1043
1105
 
1044
1106
 
1045
1107
  def more_recent_than(record_a, record_b):
@@ -1098,10 +1160,19 @@ class NifiRoute(Route):
1098
1160
  query_params = request.query_params
1099
1161
  processor_suffix = query_params["processor_suffix"]
1100
1162
  body = await request.json()
1101
- processor_name = "processor." + self.processor_name + "." + op.replace("-", "_") + "." + processor_suffix
1163
+ processor_name = (
1164
+ "processor."
1165
+ + self.processor_name
1166
+ + "."
1167
+ + op.replace("-", "_")
1168
+ + "."
1169
+ + processor_suffix
1170
+ )
1102
1171
  if op not in self.endpoints.keys():
1103
1172
  raise StarletteHTTPException(401, f"Route {op} is forbidden for NiFi.")
1104
- task_id = await self.celery_executor.send_task(self.endpoints[op], args=[body, processor_name])
1173
+ task_id = await self.celery_executor.send_task(
1174
+ self.endpoints[op], args=[body, processor_name]
1175
+ )
1105
1176
  return task_id
1106
1177
 
1107
1178
  @staticmethod
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  import py3langid as langid
3
- import iso639 as languages
3
+ from iso639 import Lang
4
4
 
5
5
 
6
6
  def detect_language(text, min_confidence=None):
@@ -11,5 +11,11 @@ def detect_language(text, min_confidence=None):
11
11
  if min_confidence and confidence < min_confidence:
12
12
  return None, confidence
13
13
  detected_lang = re.sub("[^A-Za-z]", "", detected_lang).lower()
14
- detected_lang = languages.to_name(detected_lang).lower()
14
+ detected_lang = Lang(detected_lang).name.lower()
15
15
  return detected_lang, confidence
16
+
17
+ def to_name(alpha2):
18
+ return Lang(alpha2).name.lower()
19
+
20
+ def to_alpha2(name):
21
+ return Lang(name).pt1
@@ -12,4 +12,4 @@ def get_label_keys(type, ontology):
12
12
  if not label_key:
13
13
  continue
14
14
  label_keys[label_key] = None
15
- return list(label_keys.keys())
15
+ return list(label_keys.keys())
@@ -0,0 +1,28 @@
1
+ from .inheritance import is_child_concept
2
+
3
+
4
+ def get_relationships_between_concepts(source, target, ontology):
5
+ ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
6
+ from_rels = ontology["concepts"][source]["relationships"]
7
+ from_rels = [ontology_rels[r] for r in from_rels]
8
+ from_rels = [
9
+ r
10
+ for r in from_rels
11
+ if is_child_concept(
12
+ target,
13
+ r["target_concept"],
14
+ {
15
+ "concepts": ontology["concepts"],
16
+ "relationships": list(ontology_rels.values()),
17
+ },
18
+ )
19
+ ]
20
+ return from_rels
21
+
22
+
23
+ def invert_relationships(rels, ontology):
24
+ ontology_rels = {k["relationship_name"]: k for k in ontology["relationships"]}
25
+ inverses = []
26
+ for rel in rels:
27
+ inverses.append(ontology_rels[rel]["inverse_name"])
28
+ return inverses
@@ -1,594 +0,0 @@
1
- from itertools import groupby, chain
2
- import time
3
- import uuid
4
- from octostar.utils.ontology import multiquery_ontology
5
- from octostar.utils.exceptions import StopAsyncIterationWithResult
6
-
7
- from .inheritance import is_child_concept
8
-
9
- REL_FETCHED_FIELDS = [
10
- "os_relationship_name",
11
- "os_entity_uid_from",
12
- "os_entity_uid_to",
13
- "os_entity_uid",
14
- "os_entity_type_from",
15
- "os_entity_type_to",
16
- "os_workspace",
17
- ]
18
- TARGETS_FETCHED_FIELDS = ["os_entity_uid", "os_concept", "entity_label", "os_workspace"]
19
-
20
-
21
- class ExecutionMetrics:
22
- def __init__(self):
23
- self.n_queries = 0
24
- self.n_relationships = 0
25
- self.n_target_entities = 0
26
- self.exec_time = 0.0
27
- self.exec_times = {}
28
- self.n_cancelled_queries = 0
29
- self.relationship_names = set()
30
- self.timeout = 0
31
-
32
- def print_metrics(self):
33
- print("Execution Metrics:")
34
- print(f" Execution Time: {self.exec_time}")
35
- print(f" Per-Type Execution Times: {self.exec_times}")
36
- print(f" Number of Queries: {self.n_queries}")
37
- print(f" Number of Cancelled Queries: {self.n_cancelled_queries}")
38
- print(f" Timeout per Query: {self.timeout}")
39
- print(f" Number of Relationships: {self.n_relationships}")
40
- print(f" Number of Target Entities: {self.n_target_entities}")
41
- print(f" Relationship Names: {self.relationship_names}")
42
-
43
-
44
- async def _get_stream_result(stream):
45
- result = None
46
- try:
47
- async for _ in stream:
48
- pass
49
- except StopAsyncIterationWithResult as e:
50
- result = e.value
51
- return result
52
-
53
-
54
- def _left_join(left_list, right_list, left_keys, right_keys):
55
- left_keys = left_keys if isinstance(left_keys, (list, tuple)) else [left_keys]
56
- right_keys = right_keys if isinstance(right_keys, (list, tuple)) else [right_keys]
57
-
58
- def _make_composite_key(item, keys):
59
- return tuple(item[key] for key in keys)
60
-
61
- left_list.sort(key=lambda item: _make_composite_key(item, left_keys))
62
- right_list.sort(key=lambda item: _make_composite_key(item, right_keys))
63
- left_groups = groupby(
64
- left_list, key=lambda item: _make_composite_key(item, left_keys)
65
- )
66
- right_groups = groupby(
67
- right_list, key=lambda item: _make_composite_key(item, right_keys)
68
- )
69
- right_dict = {key: list(group) for key, group in right_groups}
70
- result = []
71
- for left_key_val, left_items in left_groups:
72
- left_items = list(left_items)
73
- associated_rights = right_dict.get(left_key_val, [])
74
- for left_item in left_items:
75
- result.append((left_item, associated_rights))
76
- return result
77
-
78
-
79
- async def expand_otm(
80
- expanded_entities,
81
- entities_by_concept,
82
- ontology_rels,
83
- concepts_to_otm_rels,
84
- metrics,
85
- client,
86
- timeout,
87
- limit,
88
- ):
89
- start_time = time.time()
90
- otm_queries = []
91
- target_fields = ",".join(
92
- ["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
93
- )
94
- for concept_name, rels in concepts_to_otm_rels.items():
95
- for rel_name in rels:
96
- rel = ontology_rels[rel_name]
97
- source_properties = rel["source_properties"].split(",")
98
- source_properties_values = [
99
- [entity[p] for p in source_properties]
100
- for entity in entities_by_concept[concept_name]
101
- ]
102
- source_properties_values = [
103
- values
104
- for values in source_properties_values
105
- if all(value is not None for value in values)
106
- ]
107
- if source_properties_values:
108
- target_prop_names = rel["target_properties"].split(",")
109
- target_prop_names = ",".join(
110
- ["`" + prop + "`" for prop in target_prop_names]
111
- )
112
- source_properties_values = ",".join(
113
- [
114
- "(" + ",".join(["'" + value + "'" for value in values]) + ")"
115
- for values in source_properties_values
116
- ]
117
- )
118
- if target_prop_names and source_properties_values:
119
- query = f"""SELECT {target_fields} FROM `etimbr`.`{rel['target_concept']}` WHERE ({target_prop_names}) IN ({source_properties_values}) LIMIT {limit}"""
120
- otm_queries.append(
121
- {
122
- "concept_name": concept_name,
123
- "query": query,
124
- "relationship_name": rel_name,
125
- "source_properties": rel["source_properties"].split(","),
126
- "target_properties": rel["target_properties"].split(","),
127
- }
128
- )
129
- otm_queries = {str(i): otm_queries[i] for i in range(len(otm_queries))}
130
- otm_stream = multiquery_ontology.streaming(
131
- sql_queries={
132
- query_id: query_data["query"]
133
- for query_id, query_data in otm_queries.items()
134
- },
135
- client=client,
136
- timeout=timeout,
137
- )
138
- metrics.n_queries += len(otm_queries)
139
- all_results = await _get_stream_result(otm_stream)
140
- for query_id, data in all_results.items():
141
- if data == None:
142
- metrics.n_cancelled_queries += 1
143
- if not data:
144
- continue
145
- query_data = otm_queries[query_id]
146
- data = _left_join(
147
- [
148
- e[0]
149
- for e in expanded_entities.values()
150
- if e[0]["os_concept"] == query_data["concept_name"]
151
- ],
152
- data,
153
- query_data["source_properties"],
154
- query_data["target_properties"],
155
- )
156
- data = {
157
- e[0]["os_entity_uid"]: (
158
- e[0],
159
- {
160
- r["os_entity_uid"]: (
161
- {"os_relationship_name": query_data["relationship_name"]},
162
- {r["os_entity_uid"]: r},
163
- )
164
- for r in e[1]
165
- },
166
- )
167
- for e in data
168
- }
169
- for entity_id, entities in data.items():
170
- if entity_id not in expanded_entities:
171
- expanded_entities[entity_id] = [entities[0], {}]
172
- for sub_entity_id, sub_entities in entities[1].items():
173
- if sub_entity_id not in expanded_entities[entity_id][1]:
174
- expanded_entities[entity_id][1][sub_entity_id] = [
175
- sub_entities[0],
176
- {},
177
- ]
178
- expanded_entities[entity_id][1][sub_entity_id][1].update(
179
- sub_entities[1]
180
- )
181
- metrics.exec_times["otm"] = time.time() - start_time
182
- return expanded_entities
183
-
184
-
185
- async def expand_mtm_mixed(
186
- expanded_entities,
187
- entities_by_concept,
188
- ontology,
189
- concepts_to_mixed_rels,
190
- ontology_rels,
191
- metrics,
192
- client,
193
- timeout,
194
- limit,
195
- ):
196
- start_time = time.time()
197
- mixed_queries = []
198
- match_patterns = {}
199
- rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
200
- for concept_name, rels in concepts_to_mixed_rels.items():
201
- if concept_name not in match_patterns:
202
- match_patterns[concept_name] = []
203
- match_patterns[concept_name].extend(
204
- [entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
205
- )
206
- for concept_name, entity_ids in match_patterns.items():
207
- rels = concepts_to_mixed_rels[concept_name]
208
- entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
209
- for rel_name in rels:
210
- rel = ontology_rels[rel_name]
211
- target_concept = rel["target_concept"]
212
- relationship_fields = ",".join(
213
- [
214
- f"`{rel_name}[{target_concept}]_"
215
- + prop
216
- + "` AS "
217
- + "`rel__"
218
- + prop
219
- + "`"
220
- for prop in REL_FETCHED_FIELDS
221
- ]
222
- )
223
- target_fields = ",".join(
224
- [
225
- f"`{rel_name}[{target_concept}]."
226
- + prop
227
- + "` AS "
228
- + "`tgt__"
229
- + prop
230
- + "`"
231
- for prop in TARGETS_FETCHED_FIELDS
232
- ]
233
- )
234
- all_fields = (
235
- f"{relationship_fields}, {target_fields}, `os_entity_uid`".strip(", ")
236
- )
237
- if entity_ids:
238
- mixed_queries.append(
239
- {
240
- "query": f"SELECT {all_fields} FROM `dtimbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
241
- }
242
- )
243
- mixed_queries = {str(i): mixed_queries[i] for i in range(len(mixed_queries))}
244
- metrics.n_queries += len(mixed_queries)
245
- local_stream = multiquery_ontology.streaming(
246
- sql_queries={
247
- query_id: query_data["query"]
248
- for query_id, query_data in mixed_queries.items()
249
- },
250
- client=client,
251
- timeout=timeout / 2.0,
252
- )
253
- all_results = await _get_stream_result(local_stream)
254
- for (
255
- _,
256
- data,
257
- ) in (
258
- all_results.items()
259
- ): ## TO BE TESTED (data is always empty at the moment due to timbr bug)
260
- if data == None:
261
- metrics.n_cancelled_queries += 1
262
- if not data:
263
- continue
264
- data = _left_join(
265
- [e[0] for e in expanded_entities.values()],
266
- data,
267
- "os_entity_uid",
268
- "os_entity_uid",
269
- )
270
- for elem in data:
271
- elem["#rel__os_entity_uid"] = elem["rel__os_entity_uid"] or (
272
- "temp-" + str(uuid.uuid4())
273
- )
274
- data = {
275
- e[0]["os_entity_uid"]: (
276
- e[0],
277
- {
278
- rt["#rel__os_entity_uid"]: (
279
- {k[5:]: v for k, v in rt.items() if k.startswith("rel__")},
280
- {
281
- rt["tgt__os_entity_uid"]: {
282
- k[5:]: v for k, v in rt.items() if k.startswith("tgt__")
283
- }
284
- },
285
- )
286
- for rt in e[1]
287
- },
288
- )
289
- for e in data
290
- }
291
- data = {
292
- e[0]["os_entity_uid"]: (
293
- e[0],
294
- {r["os_entity_uid"]: ({}, {r["os_entity_uid"]: r}) for r in e[1]},
295
- )
296
- for e in data
297
- }
298
- for entity_id, entities in data.items():
299
- if entity_id not in expanded_entities:
300
- expanded_entities[entity_id] = [entities[0], {}]
301
- for sub_entity_id, sub_entities in entities[1].items():
302
- if sub_entity_id not in expanded_entities[entity_id][1]:
303
- expanded_entities[entity_id][1][sub_entity_id] = [
304
- sub_entities[0],
305
- {},
306
- ]
307
- expanded_entities[entity_id][1][sub_entity_id][1].update(
308
- sub_entities[1]
309
- )
310
- metrics.exec_times["mtm_mixed"] = time.time() - start_time
311
- return expanded_entities
312
-
313
-
314
- async def expand_mtm_local(
315
- expanded_entities,
316
- entities_by_concept,
317
- ontology_rels,
318
- concepts_to_local_rels,
319
- metrics,
320
- client,
321
- timeout,
322
- limit,
323
- ):
324
- start_time = time.time()
325
- local_queries = {"from": [], "to": []}
326
- match_patterns = {}
327
- target_fields = ",".join(
328
- ["`" + rel_field + "`" for rel_field in TARGETS_FETCHED_FIELDS]
329
- )
330
- rel_fields = ",".join(["`" + rel_field + "`" for rel_field in REL_FETCHED_FIELDS])
331
- for concept_name in concepts_to_local_rels.keys():
332
- if concept_name not in match_patterns:
333
- match_patterns[concept_name] = []
334
- match_patterns[concept_name].extend(
335
- [entity["os_entity_uid"] for entity in entities_by_concept[concept_name]]
336
- )
337
- for concept_name, entity_ids in match_patterns.items():
338
- rel_names = concepts_to_local_rels[concept_name]
339
- inverse_names = [
340
- ontology_rels[rel_name]["inverse_name"] for rel_name in rel_names
341
- ]
342
- rel_names = ",".join(["'" + rel_name + "'" for rel_name in rel_names])
343
- inverse_names = ",".join(["'" + rel_name + "'" for rel_name in inverse_names])
344
- entity_ids = ",".join(["'" + uid + "'" for uid in entity_ids])
345
- if rel_names and inverse_names and entity_ids:
346
- local_queries["from"].append(
347
- {
348
- "query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({rel_names}) AND `os_entity_uid_from` IN ({entity_ids}) LIMIT {limit}"
349
- }
350
- )
351
- local_queries["to"].append(
352
- {
353
- "query": f"SELECT {rel_fields} FROM `timbr`.`os_workspace_relationship` WHERE `os_relationship_name` IN ({inverse_names}) AND `os_entity_uid_to` IN ({entity_ids}) LIMIT {limit}"
354
- }
355
- )
356
- local_queries = {
357
- **{
358
- "from_" + str(i): local_queries["from"][i]
359
- for i in range(len(local_queries["from"]))
360
- },
361
- **{
362
- "to_" + str(i): local_queries["to"][i]
363
- for i in range(len(local_queries["to"]))
364
- },
365
- }
366
- metrics.n_queries += len(local_queries)
367
- local_stream = multiquery_ontology.streaming(
368
- sql_queries={
369
- query_id: query_data["query"]
370
- for query_id, query_data in local_queries.items()
371
- },
372
- client=client,
373
- timeout=timeout / 2.0,
374
- )
375
- all_results = await _get_stream_result(local_stream)
376
- middle_entities = []
377
- for query_id, data in all_results.items():
378
- if data == None:
379
- metrics.n_cancelled_queries += 1
380
- if not data:
381
- continue
382
- query_id = query_id.split("_")
383
- data = _left_join(
384
- [e[0] for e in expanded_entities.values()],
385
- data,
386
- "os_entity_uid",
387
- "os_entity_uid_" + query_id[0],
388
- )
389
- inverse_direction = "to" if query_id[0] == "from" else "from"
390
- data = {
391
- e[0]["os_entity_uid"]: (e[0], {r["os_entity_uid"]: (r, {}) for r in e[1]})
392
- for e in data
393
- }
394
- middle_entities.extend(
395
- [(inverse_direction, r) for e in data.values() for r in e[1].values()]
396
- )
397
- for entity_id, entities in data.items():
398
- if entity_id not in expanded_entities:
399
- expanded_entities[entity_id] = [entities[0], {}]
400
- for sub_entity_id, sub_entities in entities[1].items():
401
- if sub_entity_id not in expanded_entities[entity_id][1]:
402
- expanded_entities[entity_id][1][sub_entity_id] = [
403
- sub_entities[0],
404
- {},
405
- ]
406
- expanded_entities[entity_id][1][sub_entity_id][1].update(
407
- sub_entities[1]
408
- )
409
- middle_entities = [
410
- {
411
- "entity_id": r[1][0]["os_entity_uid_" + r[0]],
412
- "concept_name": r[1][0]["os_entity_type_" + r[0]],
413
- "direction": r[0],
414
- "relationship": r[1],
415
- }
416
- for r in middle_entities
417
- ]
418
- middle_entities = sorted(middle_entities, key=lambda x: x["concept_name"])
419
- middle_entities = groupby(middle_entities, key=lambda x: x["concept_name"])
420
- middle_entities = {e[0]: list(e[1]) for e in middle_entities}
421
- local_queries_2 = []
422
- for concept_name, entities in middle_entities.items():
423
- entity_ids = ",".join(["'" + e["entity_id"] + "'" for e in entities])
424
- if entity_ids:
425
- local_queries_2.append(
426
- {
427
- "query": f"SELECT {target_fields} FROM `timbr`.`{concept_name}` WHERE `os_entity_uid` IN ({entity_ids}) LIMIT {limit}"
428
- }
429
- )
430
- local_queries_2 = {str(i): local_queries_2[i] for i in range(len(local_queries_2))}
431
- metrics.n_queries += len(local_queries_2)
432
- local_stream_2 = multiquery_ontology.streaming(
433
- sql_queries={
434
- query_id: query_data["query"]
435
- for query_id, query_data in local_queries_2.items()
436
- },
437
- client=client,
438
- timeout=timeout / 2.0,
439
- )
440
- all_results = await _get_stream_result(local_stream_2)
441
- middle_entities = list(chain(*middle_entities.values()))
442
- for query_id, data in all_results.items():
443
- if data == None:
444
- metrics.n_cancelled_queries += 1
445
- if not data:
446
- continue
447
- data = _left_join(middle_entities, data, "entity_id", "os_entity_uid")
448
- for entry in data:
449
- relationship_targets = entry[0]["relationship"]
450
- for target_entity in entry[1]:
451
- relationship_targets[1][target_entity["os_entity_uid"]] = target_entity
452
- if data:
453
- for rel in data:
454
- targets = rel[0]["relationship"][1]
455
- rel_uid = rel[0]["relationship"][0]["os_entity_uid"]
456
- for entity in expanded_entities.values():
457
- entity_rels = entity[1]
458
- if rel_uid in entity_rels:
459
- entity_rels[rel_uid][1].update(targets)
460
- metrics.exec_times["mtm_local"] = time.time() - start_time
461
- return expanded_entities
462
-
463
-
464
- async def expand_entities(
465
- entities,
466
- ontology,
467
- relationship_mappings_info,
468
- client,
469
- relationship_names_by_entity_type=None,
470
- batch_size=10000,
471
- avg_limit_per_entity=20,
472
- timeout=10.0,
473
- ):
474
- metrics = ExecutionMetrics()
475
- metrics.timeout = timeout
476
- start_time = time.time()
477
- ordered_entities = sorted(entities, key=lambda x: x["entity_type"])
478
- ordered_entities = list({e["os_entity_uid"]: e for e in entities}.values())
479
- entity_batches = [
480
- ordered_entities[i : i + batch_size]
481
- for i in range(0, len(ordered_entities), batch_size)
482
- ]
483
- expanded_entities = {}
484
- for batch in entity_batches:
485
- new_entities = await expand_entities_batch(
486
- batch,
487
- ontology,
488
- relationship_mappings_info,
489
- relationship_names_by_entity_type,
490
- metrics,
491
- client,
492
- avg_limit_per_entity,
493
- timeout,
494
- )
495
- expanded_entities.update(new_entities)
496
- metrics.exec_time = time.time() - start_time
497
- for _, entity_data in expanded_entities.items():
498
- for rel in entity_data[1].values():
499
- if rel[0]:
500
- metrics.n_relationships += 1
501
- metrics.relationship_names.add(rel[0]["os_relationship_name"])
502
- metrics.n_target_entities += len(rel[1])
503
- metrics.relationship_names = list(metrics.relationship_names)
504
- expanded_entities = [
505
- (e[0], [(r[0], list(r[1].values())[0]) for r in e[1].values()])
506
- for e in expanded_entities.values()
507
- ]
508
- return expanded_entities, metrics
509
-
510
-
511
- async def expand_entities_batch(
512
- entities,
513
- ontology,
514
- relationship_mappings,
515
- relationship_names_by_entity_type,
516
- metrics,
517
- client,
518
- avg_limit_per_entity=20,
519
- timeout=10.0,
520
- ):
521
- limit = avg_limit_per_entity * len(entities)
522
- expanded_entities = {e["os_entity_uid"]: [e, {}] for e in entities}
523
- entities_by_concept = sorted(entities, key=lambda x: x["entity_type"])
524
- entities_by_concept = groupby(entities_by_concept, key=lambda x: x["entity_type"])
525
- entities_by_concept = {e[0]: list(e[1]) for e in entities_by_concept}
526
- ontology_rels = {r["relationship_name"]: r for r in ontology["relationships"]}
527
- concepts_to_rels = {
528
- cn: c["relationships"] for cn, c in ontology["concepts"].items()
529
- }
530
- concepts_to_mixed_rels = {}
531
- concepts_to_otm_rels = {}
532
- concepts_to_local_rels = {}
533
- for concept_name in entities_by_concept.keys():
534
- relationship_names = None
535
- if relationship_names_by_entity_type:
536
- relationship_names = set()
537
- for cn_name in relationship_names_by_entity_type.keys():
538
- if is_child_concept(concept_name, cn_name, ontology):
539
- relationship_names = relationship_names.union(
540
- set(relationship_names_by_entity_type[cn_name])
541
- )
542
- rels = concepts_to_rels[concept_name]
543
- if relationship_names:
544
- rels = [r for r in rels if r in relationship_names]
545
- filtered_mtm_rels = []
546
- filtered_otm_rels = []
547
- filtered_local_mtm_rels = []
548
- for rel in rels:
549
- if not ontology_rels[rel]["is_mtm"]:
550
- filtered_otm_rels.append(rel)
551
- elif rel in relationship_mappings["unmapped"]:
552
- continue
553
- elif rel in relationship_mappings["local_only"]:
554
- filtered_local_mtm_rels.append(rel)
555
- else:
556
- filtered_mtm_rels.append(rel)
557
- concepts_to_mixed_rels[concept_name] = filtered_mtm_rels
558
- concepts_to_otm_rels[concept_name] = filtered_otm_rels
559
- concepts_to_local_rels[concept_name] = filtered_local_mtm_rels
560
- # OTM QUERIES: fetch target entities directly from the target tables with an IN statement
561
- expanded_entities = await expand_otm(
562
- expanded_entities,
563
- entities_by_concept,
564
- ontology_rels,
565
- concepts_to_otm_rels,
566
- metrics,
567
- client,
568
- timeout,
569
- limit,
570
- )
571
- # LOCAL MTM QUERIES: fetch relationships directly from the os_workspace_relationship table, then fetch target entities
572
- expanded_entities = await expand_mtm_local(
573
- expanded_entities,
574
- entities_by_concept,
575
- ontology_rels,
576
- concepts_to_local_rels,
577
- metrics,
578
- client,
579
- timeout,
580
- limit,
581
- )
582
- # MIXED MTM QUERIES: query via timbr to make the most of JOINs
583
- expanded_entities = await expand_mtm_mixed(
584
- expanded_entities,
585
- entities_by_concept,
586
- ontology,
587
- concepts_to_mixed_rels,
588
- ontology_rels,
589
- metrics,
590
- client,
591
- timeout,
592
- limit,
593
- )
594
- return expanded_entities