streamlit-octostar-utils 0.6.0__tar.gz → 0.6.3.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/PKG-INFO +1 -1
  2. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/pyproject.toml +1 -1
  3. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/celery.py +60 -5
  4. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/nifi.py +4 -3
  5. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parallelism.py +66 -46
  6. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/LICENSE +0 -0
  7. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/README.md +0 -0
  8. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/__init__.py +0 -0
  9. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/__init__.py +0 -0
  10. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/contents.py +0 -0
  11. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/fastapi.py +0 -0
  12. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/__init__.py +0 -0
  13. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/combine_fields.py +0 -0
  14. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/entities_parser.py +0 -0
  15. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/generics.py +0 -0
  16. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/info.py +0 -0
  17. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/linkchart_functions.py +0 -0
  18. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/matches.py +0 -0
  19. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/parameters.py +0 -0
  20. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/rules.py +0 -0
  21. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/api_crafter/parser/signals.py +0 -0
  22. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/__init__.py +0 -0
  23. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/dict.py +0 -0
  24. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/filetypes.py +0 -0
  25. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/opensearch_conversion.py +0 -0
  26. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/threading/__init__.py +0 -0
  27. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/threading/key_queue.py +0 -0
  28. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/core/timestamp.py +0 -0
  29. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/nlp/__init__.py +0 -0
  30. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/nlp/custom_recognizers.py +0 -0
  31. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/nlp/language.py +0 -0
  32. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/nlp/ner.py +0 -0
  33. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/octostar/__init__.py +0 -0
  34. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/octostar/client.py +0 -0
  35. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/octostar/context.py +0 -0
  36. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/octostar/permissions.py +0 -0
  37. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/ontology/__init__.py +0 -0
  38. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/ontology/inheritance.py +0 -0
  39. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/ontology/relationships.py +0 -0
  40. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/ontology/validation.py +0 -0
  41. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/style/__init__.py +0 -0
  42. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/style/common.py +0 -0
  43. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/threading/__init__.py +0 -0
  44. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/threading/async_task_manager.py +0 -0
  45. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/threading/session_callback_manager.py +0 -0
  46. {streamlit_octostar_utils-0.6.0 → streamlit_octostar_utils-0.6.3.dev1}/streamlit_octostar_utils/threading/session_state_hot_swapper.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: streamlit-octostar-utils
3
- Version: 0.6.0
3
+ Version: 0.6.3.dev1
4
4
  Summary:
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -5,7 +5,7 @@ include = '\.pyi?$'
5
5
 
6
6
  [tool.poetry]
7
7
  name = "streamlit-octostar-utils"
8
- version = "0.6.0"
8
+ version = "0.6.3-dev.1"
9
9
  description = ""
10
10
  license = "MIT"
11
11
  authors = ["Octostar"]
@@ -590,12 +590,64 @@ class CeleryExecutor(object):
590
590
  }
591
591
  logger.info(f"All resources preloaded for queue {queue}")
592
592
 
593
- def set_awaiting_state(self, sender=None, headers=None, **kwargs):
594
- task_id = headers.get("id") if headers else None
593
+ def _redis_result_ttl_while_awaiting(self, queue_name, hard_time_limit):
594
+ """TTL (seconds) for result-backend keys while a task may sit AWAITING in the broker.
595
+
596
+ Uses ``ceil(T * N / M) + result_expires`` when a positive hard time limit and queue
597
+ config exist; otherwise falls back to ``task_expires + result_expires`` so broker
598
+ message lifetime does not outlive the result key.
599
+ """
600
+ result_exp = int(self.app.conf.result_expires)
601
+ task_exp = int(self.app.conf.task_expires)
602
+ tl = int(hard_time_limit or 0)
603
+ qc = self.queue_config.get(queue_name)
604
+ if qc and tl > 0:
605
+ m_workers = max(int(qc.n_workers), 1)
606
+ n_cap = qc.max_tasks_in_queue
607
+ if n_cap is None:
608
+ n_cap = m_workers
609
+ n_cap = max(int(n_cap), 1)
610
+ queue_wait = (tl * n_cap + m_workers - 1) // m_workers
611
+ return queue_wait + result_exp
612
+ return max(result_exp, task_exp + result_exp)
613
+
614
+ @staticmethod
615
+ def _hard_time_limit_from_publish(body, headers, sender_task_name, app):
616
+ """Resolve hard time limit from AMQP publish (v1 body dict or v2 headers list)."""
617
+ h = headers or {}
618
+ raw = h.get("timelimit")
619
+ if not raw and isinstance(body, dict):
620
+ raw = body.get("timelimit")
621
+ if raw:
622
+ hard = raw[0] if isinstance(raw, (list, tuple)) else None
623
+ if hard is not None:
624
+ return int(hard)
625
+ if sender_task_name and sender_task_name in app.tasks:
626
+ t = getattr(app.tasks[sender_task_name], "time_limit", None)
627
+ if t is not None:
628
+ return int(t)
629
+ return 0
630
+
631
+ def set_awaiting_state(
632
+ self, sender=None, body=None, headers=None, routing_key=None, **kwargs
633
+ ):
634
+ headers = headers or {}
635
+ task_id = headers.get("id")
636
+ if not task_id and isinstance(body, dict):
637
+ task_id = body.get("id")
595
638
  if not task_id:
596
639
  return
597
640
  result = AsyncResult(task_id, app=self.app)
598
641
  result.backend.store_result(task_id, result=None, state=CeleryExecutor.AWAITING)
642
+ queue_name = routing_key or self.app.conf.task_default_routing_key
643
+ hard_tl = self._hard_time_limit_from_publish(body, headers, sender, self.app)
644
+ await_ttl = self._redis_result_ttl_while_awaiting(queue_name, hard_tl)
645
+ meta_key = f"{CeleryExecutor.CELERY_BROKER_PREFIX}{task_id}"
646
+ try:
647
+ if await_ttl > 0 and self.redis_client.exists(meta_key):
648
+ self.redis_client.expire(meta_key, await_ttl)
649
+ except Exception:
650
+ logger.debug("Could not extend AWAITING result TTL for %s", task_id, exc_info=True)
599
651
 
600
652
  def set_started_state(self, task_id, task, *args, **kwargs):
601
653
  result = AsyncResult(task_id, app=self.app)
@@ -1059,9 +1111,9 @@ class CeleryExecutor(object):
1059
1111
  def _send_task(task_fn, task_id, options):
1060
1112
  task_fn.apply_async(task_id=task_id, **options)
1061
1113
 
1062
- def _store_task_queue_mapping(task_id, queue_name):
1114
+ def _store_task_queue_mapping(task_id, queue_name, redis_await_ttl):
1063
1115
  pipe = self.redis_client.pipeline()
1064
- pipe.set(f"task:queue:{task_id}", queue_name, ex=self.app.conf.result_expires)
1116
+ pipe.set(f"task:queue:{task_id}", queue_name, ex=redis_await_ttl)
1065
1117
  pipe.set(f"queue:first_enqueued:{queue_name}", str(time.time()), nx=True)
1066
1118
  pipe.execute()
1067
1119
 
@@ -1075,6 +1127,9 @@ class CeleryExecutor(object):
1075
1127
  f"Queue '{queue_name}' is stalled. Service temporarily unavailable."
1076
1128
  )
1077
1129
 
1130
+ hard_tl = options.get("time_limit") or getattr(task_fn, "time_limit", None) or 0
1131
+ redis_await_ttl = self._redis_result_ttl_while_awaiting(queue_name, hard_tl)
1132
+
1078
1133
  try:
1079
1134
  if part is not None:
1080
1135
  await self._write_task_data_with_part(
@@ -1094,7 +1149,7 @@ class CeleryExecutor(object):
1094
1149
  self.set_thread_pool, _send_task, task_fn, task_id, options
1095
1150
  )
1096
1151
  await asyncio.get_running_loop().run_in_executor(
1097
- self.set_thread_pool, _store_task_queue_mapping, task_id, queue_name
1152
+ self.set_thread_pool, _store_task_queue_mapping, task_id, queue_name, redis_await_ttl
1098
1153
  )
1099
1154
  except asyncio.CancelledError:
1100
1155
  logger.info(f"Cancelling task {task_id} due to disconnect!")
@@ -670,6 +670,7 @@ class NifiContextManager(object):
670
670
  self.lazy_sync = lazy_sync
671
671
  self.client, self.ontology_name = self.get_client(json_data)
672
672
  self._ontology = None
673
+ self._input_loader = input_loader
673
674
 
674
675
  @property
675
676
  def ontology(self):
@@ -863,20 +864,20 @@ class NifiContextManager(object):
863
864
  revert it separately by including it in ``entities`` if needed.
864
865
 
865
866
  """
866
- if self._input_loader is None:
867
+ loader = getattr(self, "_input_loader", None)
868
+ if loader is None:
867
869
  raise RuntimeError(
868
870
  "revert is unavailable: this NifiContextManager was not "
869
871
  "constructed with an input_loader"
870
872
  )
871
873
  if not entities:
872
874
  return
873
- metadata, parts = self._input_loader.load()
875
+ metadata, parts = loader.load()
874
876
  if not parts:
875
877
  raise RuntimeError(
876
878
  "revert is unavailable: input loader returned no parts"
877
879
  )
878
880
  body = json.loads(parts[0])
879
- # Drop wrapper-side refs to the raw bytes/parts immediately.
880
881
  metadata = None
881
882
  parts = None
882
883
  try:
@@ -46,23 +46,34 @@ R = TypeVar('R')
46
46
  class ParallelismConfig:
47
47
  """
48
48
  Configuration for controlling parallelism and throughput.
49
-
49
+
50
+ `max_parallel` is the single user-facing concurrency knob; its concrete
51
+ meaning depends on the executor it is paired with:
52
+
53
+ * ``ThreadExecutor`` -- maximum number of items processed simultaneously
54
+ (semaphore + thread-pool size).
55
+ * ``BatchExecutor`` -- default number of items per ``batch_fn`` call when
56
+ the caller does not pass an explicit ``batch_size``. The batch dimension
57
+ is the natural parallelism for a batched-inference model.
58
+ * ``LoopExecutor`` -- ignored; the executor is intrinsically sequential
59
+ and only honors ``throttle``.
60
+
50
61
  Attributes:
51
- max_parallel: Maximum number of entities being processed simultaneously.
52
- Acts as a concurrency limit (semaphore).
53
- None = unlimited concurrency
54
-
62
+ max_parallel: Maximum number of items processed simultaneously, or
63
+ (for ``BatchExecutor``) the default batch dimension.
64
+ None = unlimited concurrency / batch_size defaults to 1.
65
+
55
66
  throttle: Maximum rate of entities starting processing per second.
56
67
  Acts as a rate limiter.
57
68
  None = unlimited rate
58
-
69
+
59
70
  Examples:
60
71
  >>> # At most 4 entities processed at once, no rate limit
61
72
  >>> config = ParallelismConfig(max_parallel=4, throttle=None)
62
-
73
+
63
74
  >>> # Unlimited concurrency, but max 10 entities/second
64
75
  >>> config = ParallelismConfig(max_parallel=None, throttle=10)
65
-
76
+
66
77
  >>> # Max 2 concurrent, max 5/second rate
67
78
  >>> config = ParallelismConfig(max_parallel=2, throttle=5)
68
79
  """
@@ -603,23 +614,29 @@ class LoopExecutor(Executor):
603
614
  class BatchExecutor(Executor):
604
615
  """
605
616
  Process items in batches with parallelism control.
606
-
617
+
607
618
  Useful for model inference where batching improves throughput
608
619
  (e.g., neural network inference, OCR, NER).
609
-
620
+
621
+ The user-facing knob is ``ParallelismConfig.max_parallel``, which controls
622
+ the default batch dimension: each ``batch_fn`` call receives up to
623
+ ``max_parallel`` items. Callers that need to pin a different batch size
624
+ (e.g. "one bin per call" when the items are themselves pre-batched, or
625
+ "all items in a single mega-batch") may pass an explicit ``batch_size``
626
+ to ``process()`` to override the default.
627
+
610
628
  Example:
611
- >>> config = ParallelismConfig(max_parallel=None, throttle=10)
629
+ >>> # Default: batch_size derived from max_parallel
630
+ >>> config = ParallelismConfig(max_parallel=8)
612
631
  >>> processor = BatchExecutor(config)
613
- >>>
632
+ >>>
614
633
  >>> def process_batch(texts):
615
- ... # Process batch of texts together
616
634
  ... return model.predict(texts)
617
- >>>
618
- >>> results = processor.process(
619
- ... all_texts,
620
- ... process_batch,
621
- ... batch_size=10
622
- ... )
635
+ >>>
636
+ >>> results = processor.process(all_texts, process_batch)
637
+ >>>
638
+ >>> # Or override explicitly:
639
+ >>> results = processor.process(all_texts, process_batch, batch_size=10)
623
640
  """
624
641
 
625
642
  def __init__(self, config: ParallelismConfig):
@@ -633,50 +650,53 @@ class BatchExecutor(Executor):
633
650
  self.controller = ParallelismController(config)
634
651
 
635
652
  def process(
636
- self,
637
- items: List[T],
653
+ self,
654
+ items: List[T],
638
655
  batch_fn: Callable[[List[T]], List[R]],
639
- batch_size: int = 1
656
+ batch_size: Optional[int] = None
640
657
  ) -> List[ProcessResult]:
641
658
  """
642
659
  Process items in batches.
643
-
660
+
644
661
  Args:
645
662
  items: List of items to process
646
- batch_fn: Function that takes a batch (list) and returns a list of results.
647
- The function should return results in the same order as input.
648
- Length of output must match length of input batch.
649
- batch_size: Size of each batch. Should be tuned based on model capacity
650
- and memory constraints.
651
-
663
+ batch_fn: Function that takes a batch (list) and returns a list of
664
+ results. The function should return results in the same
665
+ order as input. Length of output must match length of
666
+ input batch.
667
+ batch_size: Optional in-code override for the batch dimension. When
668
+ ``None`` (the default), the batch dimension is taken from
669
+ ``ParallelismConfig.max_parallel`` (or ``1`` when
670
+ ``max_parallel`` is also ``None``). Pass an explicit value
671
+ only when the caller needs a fixed batch size that does
672
+ not track ``max_parallel`` -- e.g. ``batch_size=1`` when
673
+ the items are themselves pre-batched units, or
674
+ ``batch_size=len(items)`` for "all-in-one-call" semantics.
675
+
652
676
  Returns:
653
677
  List of ProcessResult objects in same order as items.
654
678
  Call .result() on each to get value or raise stored exception.
655
-
679
+
656
680
  Note:
657
681
  If a batch fails, all items in that batch will have the same exception.
658
682
  The entire batch is processed atomically (acquires permits once per batch,
659
683
  not once per item), since batching assumes the batch is processed together.
660
-
684
+
661
685
  Example:
662
- >>> # Process texts in batches of 10
663
- >>> def ner_batch(texts):
664
- ... return ner_model.process_batch(texts)
665
- >>>
666
- >>> results = processor.process(
667
- ... texts,
668
- ... ner_batch,
669
- ... batch_size=10
670
- ... )
671
- >>> for r in results:
672
- ... try:
673
- ... entities = r.result()
674
- ... except Exception as e:
675
- ... print(f"Failed: {e}")
686
+ >>> # Default batch_size = max_parallel
687
+ >>> config = ParallelismConfig(max_parallel=10)
688
+ >>> processor = BatchExecutor(config)
689
+ >>> results = processor.process(texts, ner_batch)
690
+ >>>
691
+ >>> # Explicit override
692
+ >>> results = processor.process(texts, ner_batch, batch_size=10)
676
693
  """
677
694
  if not items:
678
695
  return []
679
-
696
+
697
+ if batch_size is None:
698
+ batch_size = self.config.max_parallel or 1
699
+
680
700
  if batch_size < 1:
681
701
  raise ValueError(f"batch_size must be >= 1, got {batch_size}")
682
702