streamlit-octostar-utils 0.5.0.dev1__tar.gz → 0.5.0.dev3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/nifi.py +271 -17
  4. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/LICENSE +0 -0
  5. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/README.md +0 -0
  6. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/__init__.py +0 -0
  7. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  8. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/celery.py +0 -0
  9. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
  10. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  11. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parallelism.py +0 -0
  12. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  13. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  14. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  15. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  16. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  17. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  18. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  19. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  20. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  21. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  22. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/core/__init__.py +0 -0
  23. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/core/dict.py +0 -0
  24. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/core/filetypes.py +0 -0
  25. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  26. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  27. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/core/timestamp.py +0 -0
  28. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  29. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  30. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/nlp/language.py +0 -0
  31. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/nlp/ner.py +0 -0
  32. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  33. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/octostar/client.py +0 -0
  34. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/octostar/context.py +0 -0
  35. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  36. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  37. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
  38. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  39. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/ontology/validation.py +0 -0
  40. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/style/__init__.py +0 -0
  41. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/style/common.py +0 -0
  42. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/threading/__init__.py +0 -0
  43. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  44. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  45. {streamlit_octostar_utils-0.5.0.dev1 → streamlit_octostar_utils-0.5.0.dev3}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.5.0.dev1
3
+ Version: 0.5.0.dev3
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.5.0-dev.1"
8
+ version = "0.5.0-dev.3"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -1,4 +1,5 @@
1
1
  from copy import deepcopy
2
+ import hashlib
2
3
  import uuid
3
4
  from functools import wraps
4
5
  from contextlib import contextmanager
@@ -238,6 +239,216 @@ class NifiFragmenter(object):
238
239
  pointer.get("merge_params") or {}, defragmenter_config, lambda _, v2: v2
239
240
  )
240
241
 
242
+ _REQUIRED_FRAGMENT_FIELDS = ("index", "count", "identifier")
243
+
244
+ @staticmethod
245
+ def get_fragment_info(entity, fragmenter_keylist):
246
+ """Read fragment metadata (identifier, count, index, root_uid, merge_params)
247
+ for a given fragmenter level. Read-only -- does not mutate the entity.
248
+
249
+ Args:
250
+ entity: A NifiEntity or NifiEntityProxy.
251
+ fragmenter_keylist: Dot-separated key path into the fragment config
252
+ (e.g. "document_pages" or "audio_split").
253
+
254
+ Returns:
255
+ dict with keys like identifier, count, index, root_uid, merge_params.
256
+ Empty dict if fragmenter_keylist is empty or intermediate keys are
257
+ missing (entity not fragmented at this level).
258
+
259
+ Raises:
260
+ KeyError: If the final fragment key is missing from the config.
261
+ ValueError: If the fragment info exists but lacks required fields
262
+ (index, count, identifier).
263
+ """
264
+ if not fragmenter_keylist:
265
+ return {}
266
+ pointer = entity.request["config"]["fragment"]
267
+ for k in fragmenter_keylist.split(".")[:-1]:
268
+ if not pointer.get(k):
269
+ return {}
270
+ pointer = pointer[k]
271
+ info = pointer[fragmenter_keylist.split(".")[-1]]
272
+ missing = [f for f in NifiFragmenter._REQUIRED_FRAGMENT_FIELDS if f not in info]
273
+ if missing:
274
+ raise RuntimeError(
275
+ f"Fragment info for '{fragmenter_keylist}' is missing required "
276
+ f"field(s): {', '.join(missing)}"
277
+ )
278
+ return info
279
+
280
+ @staticmethod
281
+ def identify_fragment_groups(nifi_batches):
282
+ """Find all fragmented entities grouped by their active fragmenter level.
283
+
284
+ Args:
285
+ nifi_batches: List of NifiEntityBatch objects.
286
+
287
+ Returns:
288
+ dict mapping fragmenter_keylist to list of entities at that level.
289
+ Empty dict if no fragments found. Callers use get_fragment_info()
290
+ to fetch metadata per entity, and filter by index==0 to find roots.
291
+ """
292
+ all_entities = list(itertools.chain(*[b.entities for b in nifi_batches]))
293
+ groups = {}
294
+ for e in all_entities:
295
+ stack = e.request["config"].get("fragment", {}).get("fragments_stack", [])
296
+ if stack:
297
+ groups.setdefault(stack[0], []).append(e)
298
+ return groups
299
+
300
+ @staticmethod
301
+ def build_fragment_tree_from_children_entities(root_entity, fragmenter_keylist):
302
+ """Recursively build a tree from a root fragment entity by walking
303
+ its children_entities.
304
+
305
+ Args:
306
+ root_entity: The root entity (index 0) to start from.
307
+ fragmenter_keylist: The fragmenter level to build for.
308
+
309
+ Returns:
310
+ Nested dict with keys:
311
+ "entity": NifiEntity/NifiEntityProxy
312
+ "index": int
313
+ "merge_params": dict or None
314
+ "children": list of child trees
315
+ """
316
+ info = NifiFragmenter.get_fragment_info(root_entity, fragmenter_keylist)
317
+ child_fragments = []
318
+ for e in root_entity.children_entities:
319
+ try:
320
+ child_info = NifiFragmenter.get_fragment_info(e, fragmenter_keylist)
321
+ if child_info:
322
+ child_fragments.append(e)
323
+ except (AttributeError, KeyError):
324
+ pass
325
+ return {
326
+ "entity": root_entity,
327
+ "index": info.get("index"),
328
+ "merge_params": info.get("merge_params"),
329
+ "children": [
330
+ NifiFragmenter.build_fragment_tree_from_children_entities(child, fragmenter_keylist)
331
+ for child in child_fragments
332
+ ],
333
+ }
334
+
335
+ @staticmethod
336
+ def extract_tree_entities(tree):
337
+ """Flatten a fragment tree into a list of all entities (pre-order).
338
+
339
+ Args:
340
+ tree: Fragment tree node (from build_fragment_tree_from_children_entities).
341
+
342
+ Returns:
343
+ List of entities in pre-order traversal.
344
+ """
345
+ entities = [tree["entity"]]
346
+ for child in tree.get("children", []):
347
+ entities.extend(NifiFragmenter.extract_tree_entities(child))
348
+ return entities
349
+
350
+ @staticmethod
351
+ def iterate_fragments_tree(tree, order="post"):
352
+ """Yield tree nodes in traversal order.
353
+
354
+ Args:
355
+ tree: Fragment tree node (from build_fragment_tree_from_children_entities).
356
+ order: "post" (children first, default) or "pre" (parent first).
357
+
358
+ Yields:
359
+ dict nodes with "entity", "index", "merge_params", "children" keys.
360
+ """
361
+ children = sorted(tree.get("children", []), key=lambda x: x["index"])
362
+ if order == "pre":
363
+ yield tree
364
+ for child in children:
365
+ yield from NifiFragmenter.iterate_fragments_tree(child, order)
366
+ if order == "post":
367
+ yield tree
368
+
369
+ @staticmethod
370
+ def reduce_fragments_tree(tree, leaf_fn, parent_fn):
371
+ """Bottom-up tree reduction. Processes leaves first, then folds results up.
372
+
373
+ Args:
374
+ tree: Fragment tree node (from build_fragment_tree_from_children_entities).
375
+ leaf_fn: Callable(node) -> result, called on nodes with no children.
376
+ parent_fn: Callable(node, child_results) -> result, called on
377
+ nodes with children. child_results is a list of results from
378
+ child nodes, sorted by index.
379
+
380
+ Returns:
381
+ The result from the root node.
382
+ """
383
+ children = sorted(tree.get("children", []), key=lambda x: x["index"])
384
+ if not children:
385
+ return leaf_fn(tree)
386
+ child_results = [
387
+ NifiFragmenter.reduce_fragments_tree(child, leaf_fn, parent_fn)
388
+ for child in children
389
+ ]
390
+ return parent_fn(tree, child_results)
391
+
392
+ _FRAGMENTS_NAMESPACE = b"octostar.pipeline.fragments"
393
+
394
+ @staticmethod
395
+ def _create_deterministic_uuid(namespace: bytes, data: bytes):
396
+ return str(
397
+ uuid.uuid5(
398
+ uuid.UUID(bytes=hashlib.md5(namespace).digest()),
399
+ hashlib.md5(data).hexdigest(),
400
+ )
401
+ )
402
+
403
+ @staticmethod
404
+ def create_fragment_uuid(os_entity_uid, stable_fragment_identifier, processor_name, os_workspace):
405
+ """Create a deterministic UUID for a fragment.
406
+
407
+ The UUID is stable across re-runs for the same entity, fragment
408
+ identifier, processor, and workspace -- enabling idempotent fragment
409
+ creation while ensuring uniqueness across workspaces.
410
+
411
+ Args:
412
+ os_entity_uid: The parent entity UID.
413
+ stable_fragment_identifier: A unique and stable identifier within
414
+ the fragmentation (e.g. page range, keyframe number, face index).
415
+ processor_name: Name of the NiFi processor.
416
+ os_workspace: The workspace UID the fragment will be written to.
417
+
418
+ Returns:
419
+ A deterministic UUID string.
420
+ """
421
+ return NifiFragmenter._create_deterministic_uuid(
422
+ NifiFragmenter._FRAGMENTS_NAMESPACE,
423
+ (processor_name + "::" + os_workspace + "::" + os_entity_uid + "::" + stable_fragment_identifier).encode("utf-8"),
424
+ )
425
+
426
+ @staticmethod
427
+ def resolve_source_entity_uid(entity, fragment_root_source=None) -> str:
428
+ """Resolve the source_entity_uid to use for child fragments.
429
+
430
+ When fragment_root_source is set (a fragment name or stack index),
431
+ the UID is looked up via entity.get_fragment_root_uid() -- this is
432
+ necessary when the current entity is a clone that may not be persisted.
433
+ Otherwise falls back to the entity's own UID.
434
+
435
+ Args:
436
+ entity: A NifiEntity or NifiEntityProxy.
437
+ fragment_root_source: None to use the entity's own UID, an int
438
+ to index into the fragments stack, or a string fragmenter
439
+ keylist name.
440
+
441
+ Returns:
442
+ The resolved source entity UID string.
443
+ """
444
+ if fragment_root_source is None:
445
+ return entity.record["os_entity_uid"]
446
+ try:
447
+ idx = int(fragment_root_source)
448
+ return entity.get_fragment_root_uid(idx)
449
+ except (ValueError, TypeError):
450
+ return entity.get_fragment_root_uid(fragment_root_source)
451
+
241
452
 
242
453
  class NifiEntityBatch(object):
243
454
  def __init__(self, entities, config, config_key):
@@ -777,25 +988,68 @@ class NifiEntity(object):
777
988
  entity_type = self.record["entity_type"]
778
989
  return entity_type == type or type in self.request["ontology_info"]["parents"]
779
990
 
780
- def is_fragmented(self) -> bool:
781
- return bool(self.request["config"].get("fragment", {}).get("fragments_stack"))
782
-
783
- def is_root_fragment(self, entity) -> bool:
784
- def _is_sub_fragment_recursive(fragment: dict) -> bool:
785
- if not isinstance(fragment, dict):
786
- return False
787
- if all(k in fragment for k in ["index", "count", "identifier"]):
788
- return fragment.get("index", 0) != 0
789
- for value in fragment.values():
790
- if isinstance(value, dict):
791
- if _is_sub_fragment_recursive(value):
792
- return True
793
- return False
794
-
991
+ def is_fragmented(self, fragment_name_or_idx=None) -> bool:
992
+ """Check whether this entity is part of a fragmentation.
993
+
994
+ Args:
995
+ fragment_name_or_idx: If None (default), returns True if the entity
996
+ belongs to any fragmentation level. If an int, checks whether the
997
+ fragments_stack has an entry at that index. If a string, checks
998
+ whether that fragmenter keylist is present in the stack.
999
+
1000
+ Returns:
1001
+ True if the entity is fragmented (at the specified level, if given).
1002
+ """
1003
+ stack = self.request["config"].get("fragment", {}).get("fragments_stack", [])
1004
+ if fragment_name_or_idx is None:
1005
+ return bool(stack)
1006
+ if isinstance(fragment_name_or_idx, int):
1007
+ return abs(fragment_name_or_idx) <= len(stack)
1008
+ return fragment_name_or_idx in stack
1009
+
1010
+ def is_root_fragment(self, fragment_name_or_idx=-1, recurse=True) -> bool:
1011
+ """Check whether this entity is a root fragment (index == 0).
1012
+
1013
+ Args:
1014
+ fragment_name_or_idx: Which fragmentation level to check. An int
1015
+ indexes into fragments_stack (default -1 = oldest level),
1016
+ a string matches by fragmenter keylist name.
1017
+ recurse: If True (default), check from the starting level towards
1018
+ index 0 (most recent) and return True only if the entity is
1019
+ root at every checked level. If False, only check the single
1020
+ specified level.
1021
+
1022
+ Returns:
1023
+ True if the entity is a root fragment at the specified level(s),
1024
+ or if the entity is not fragmented at all.
1025
+ """
795
1026
  if not self.is_fragmented():
796
1027
  return True
797
- fragment = entity.request.get("config", {}).get("fragment", {})
798
- return not _is_sub_fragment_recursive(fragment)
1028
+ fragments_stack = self.request["config"]["fragment"]["fragments_stack"]
1029
+
1030
+ if isinstance(fragment_name_or_idx, int):
1031
+ try:
1032
+ resolved = (
1033
+ fragment_name_or_idx
1034
+ if fragment_name_or_idx >= 0
1035
+ else len(fragments_stack) + fragment_name_or_idx
1036
+ )
1037
+ key = fragments_stack[resolved]
1038
+ except (IndexError, ValueError):
1039
+ return True
1040
+ else:
1041
+ key = fragment_name_or_idx
1042
+ if key not in fragments_stack:
1043
+ return True
1044
+ resolved = fragments_stack.index(key)
1045
+
1046
+ if recurse:
1047
+ keys_to_check = fragments_stack[:resolved + 1]
1048
+ return all(
1049
+ NifiFragmenter.get_fragment_info(self, k).get("index", 0) == 0
1050
+ for k in keys_to_check
1051
+ )
1052
+ return NifiFragmenter.get_fragment_info(self, key).get("index", 0) == 0
799
1053
 
800
1054
  def get_fragment_root_uid(self, fragment_name_or_idx) -> str:
801
1055
  fragment_config = self.request.get("config", {}).get("fragment", {})