PyPI - torchrl-nightly - Versions diffs - 2025.8.10__cp312-cp312-manylinux1_x86_64.whl → 2025.8.11__cp312-cp312-manylinux1_x86_64.whl - Mend

torchrl-nightly 2025.8.10__cp312-cp312-manylinux1_x86_64.whl → 2025.8.11__cp312-cp312-manylinux1_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

torchrl/modules/llm/policies/common.py CHANGED Viewed

@@ -1300,11 +1300,11 @@ def _extract_responses_from_full_histories(
 def _batching(func):
     @wraps(func)
     def _batched_func(self, td_input: TensorDictBase, **kwargs):
-        # -- 0. skip if batching is disabled
+        # -- 0. Bypass if batching disabled
         if not self.batching:
             return func(self, td_input, **kwargs)
-        # ── 1. Normalise input ──────────────────────────────────────────────────
+        # -- 1. Normalise --------------------------------------------------------
         if td_input.batch_dims > 1:
             raise ValueError(
                 f"Batching not supported for batch_dims > 1: {td_input.batch_dims}"
@@ -1313,52 +1313,59 @@ def _batching(func):
         single = td_input.batch_dims == 0
         inputs = [td_input] if single else list(td_input.unbind(0))
         futures = [Future() for _ in inputs]
+        pending = set(futures)  # ← track our own Futures
-        # ── 2. Enqueue work and, if first in, do the draining ───────────────────
+        # -- 2. Enqueue ----------------------------------------------------------
         self._batch_queue.extend(inputs)
         self._futures.extend(futures)
         min_bs = getattr(self, "_min_batch_size", 1)
         max_bs = getattr(self, "_max_batch_size", None)
+        # -- 3. Drain while holding the lock ------------------------------------
         with self._batching_lock:
-            # Only the thread that managed to grab the lock will run the loop
-            while len(self._batch_queue) >= min_bs:
-                # Determine slice
-                slice_size = (
-                    len(self._batch_queue)
-                    if max_bs is None
-                    else min(max_bs, len(self._batch_queue))
-                )
-                batch = self._batch_queue[:slice_size]
-                fut_slice = self._futures[:slice_size]
+            if all(f.done() for f in futures):
+                # Our items were already processed by another thread.
+                # Skip draining; other workers will handle the rest of the queue.
+                pass
+            else:
+                while len(self._batch_queue) >= min_bs:
+                    slice_size = (
+                        len(self._batch_queue)
+                        if max_bs is None
+                        else min(max_bs, len(self._batch_queue))
+                    )
+                    batch = self._batch_queue[:slice_size]
+                    fut_slice = self._futures[:slice_size]
+                    try:
+                        results = func(self, lazy_stack(batch), **kwargs).unbind(0)
+                        if len(results) != slice_size:
+                            raise RuntimeError(
+                                f"Expected {slice_size} results, got {len(results)}"
+                            )
+                        for fut, res in zip(fut_slice, results):
+                            fut.set_result(res)
+                            pending.discard(fut)  # ← mark as done
+                    except Exception as exc:
+                        for fut in fut_slice:
+                            fut.set_exception(exc)
+                            pending.discard(fut)
+                        raise
-                # Execute model
-                try:
-                    results = func(self, lazy_stack(batch), **kwargs).unbind(0)
-                    if len(results) != slice_size:  # sanity
-                        raise RuntimeError(
-                            f"Expected {slice_size} results, got {len(results)}"
-                        )
-                    # Fulfil the corresponding futures
-                    for fut, res in zip(fut_slice, results):
-                        fut.set_result(res)
-                except Exception as exc:
-                    for fut in fut_slice:
-                        fut.set_exception(exc)
-                    # Propagate to caller; other waiters will read the exception from their future
-                    raise
-                # Pop processed work
-                del self._batch_queue[:slice_size]
-                del self._futures[:slice_size]
-        # ── 3. Outside the lock: wait only for OUR futures (they may already be done) ──
-        wait(
-            futures
-        )  # no timeout → immediate return if set_result()/set_exception() already called
-        result = [f.result() for f in futures]
-        return result[0] if single else lazy_stack(result)
+                    # Pop processed work
+                    del self._batch_queue[:slice_size]
+                    del self._futures[:slice_size]
+                    # ---- Early-exit: all *our* Futures are done -------------------
+                    if not pending:
+                        break
+        # -- 4. Outside the lock: wait only on remaining (rare) -----------------
+        if pending:  # usually empty; safety for min_bs > queue size
+            wait(pending)
+        results = [f.result() for f in futures]
+        return results[0] if single else lazy_stack(results)
     return _batched_func

torchrl/modules/llm/policies/transformers_wrapper.py CHANGED Viewed

@@ -23,7 +23,7 @@ from tensordict import (
 from tensordict.utils import _zip_strict, NestedKey
 from torch import distributions as D
 from torch.nn.utils.rnn import pad_sequence
+from torchrl import logger as torchrl_logger
 from torchrl.modules.llm.policies.common import (
     _batching,
     _extract_responses_from_full_histories,
@@ -2443,7 +2443,12 @@ class RemoteTransformersWrapper:
     """
     def __init__(
-        self, model, max_concurrency: int = 16, validate_model: bool = True, **kwargs
+        self,
+        model,
+        max_concurrency: int = 16,
+        validate_model: bool = True,
+        actor_name: str = None,
+        **kwargs,
     ):
         import ray
@@ -2458,10 +2463,23 @@ class RemoteTransformersWrapper:
         if not ray.is_initialized():
             ray.init()
-        # Create the remote actor
+        if actor_name is not None:
+            # Check if an actor with this name already exists
+            try:
+                existing_actor = ray.get_actor(actor_name)
+                # If we can get the actor, assume it's alive and use it
+                self._remote_wrapper = existing_actor
+                torchrl_logger.info(f"Using existing actor {actor_name}")
+                return
+            except ValueError:
+                # Actor doesn't exist, create a new one
+                torchrl_logger.info(f"Creating new actor {actor_name}")
+        # Create the remote actor with the unique name
         self._remote_wrapper = (
             ray.remote(TransformersWrapper)
-            .options(max_concurrency=max_concurrency)
+            .options(max_concurrency=max_concurrency, name=actor_name)
             .remote(model, **kwargs)
         )

torchrl/modules/llm/policies/vllm_wrapper.py CHANGED Viewed

@@ -23,6 +23,7 @@ from tensordict.tensorclass import from_dataclass, TensorClass
 from tensordict.utils import _zip_strict, NestedKey
 from torch import distributions as D
 from torch.nn.utils.rnn import pad_sequence
+from torchrl import logger as torchrl_logger
 from torchrl.envs.utils import _classproperty
 from torchrl.modules.llm.policies.common import (
@@ -2101,7 +2102,12 @@ class RemotevLLMWrapper:
     """
     def __init__(
-        self, model, max_concurrency: int = 16, validate_model: bool = True, **kwargs
+        self,
+        model,
+        max_concurrency: int = 16,
+        validate_model: bool = True,
+        actor_name: str = None,
+        **kwargs,
     ):
         import ray
@@ -2141,10 +2147,22 @@ class RemotevLLMWrapper:
         if not ray.is_initialized():
             ray.init()
-        # Create the remote actor
+        if actor_name is not None:
+            # Check if an actor with this name already exists
+            try:
+                existing_actor = ray.get_actor(actor_name)
+                torchrl_logger.info(f"Using existing actor {actor_name}")
+                # If we can get the actor, assume it's alive and use it
+                self._remote_wrapper = existing_actor
+                return
+            except ValueError:
+                # Actor doesn't exist, create a new one
+                torchrl_logger.info(f"Creating new actor {actor_name}")
+        # Create the remote actor with the unique name
         self._remote_wrapper = (
             ray.remote(vLLMWrapper)
-            .options(max_concurrency=max_concurrency)
+            .options(max_concurrency=max_concurrency, name=actor_name)
             .remote(model, **kwargs)
         )

{torchrl_nightly-2025.8.10.dist-info → torchrl_nightly-2025.8.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torchrl-nightly
-Version: 2025.8.10
+Version: 2025.8.11
 Summary: A modular, primitive-first, python-first PyTorch library for Reinforcement Learning
 Author-email: torchrl contributors <vmoens@fb.com>
 Maintainer-email: torchrl contributors <vmoens@fb.com>

{torchrl_nightly-2025.8.10.dist-info → torchrl_nightly-2025.8.11.dist-info}/RECORD RENAMED Viewed

@@ -245,9 +245,9 @@ torchrl/modules/llm/utils.py,sha256=gf_F-4bEMwkcI3jLQM7ifB7nsjRctGebB5E2c-AznO0,
 torchrl/modules/llm/backends/__init__.py,sha256=WdVy9EdiAfk8i5zFa49TEkRvcUd0L4Un4v6wqWBy8l8,438
 torchrl/modules/llm/backends/vllm.py,sha256=x57Xop1xd5ZShicsh47ZFmz4VpfZ3eCzVx7k0COvpqQ,9387
 torchrl/modules/llm/policies/__init__.py,sha256=VYAiblw6ETlo4q1vSvaKaybuxwxuPXfC-QCFzZJk4PA,649
-torchrl/modules/llm/policies/common.py,sha256=LSJrDy9NW2xyqTttCrcurJHIqHVmk4F4jjYamjibfZs,56489
-torchrl/modules/llm/policies/transformers_wrapper.py,sha256=rh8Us_95U-NL-aM_AVYXQWfxneRl-z74ovHPTwTW12M,110340
-torchrl/modules/llm/policies/vllm_wrapper.py,sha256=SAh2cgmDmc4qiaQi6yHoaISqM3flLbrgriIbx9zQpIs,99125
+torchrl/modules/llm/policies/common.py,sha256=hjDx09kt-IG81AiwD_8SRwZU7Zf530nWJMpdvECilrE,56733
+torchrl/modules/llm/policies/transformers_wrapper.py,sha256=GzaJBoEDIO7NpLXzNWySiqjRNXelOapUISAHSn4dyx8,111044
+torchrl/modules/llm/policies/vllm_wrapper.py,sha256=_cXdE5HjYtJE0cuGSxSynhFu8cShWQuu0R0rxBnD4jc,99829
 torchrl/modules/models/__init__.py,sha256=DrOG-7hynjjUh_tc2EqysiUiNMRiDR0WLtZql9TPNcI,1743
 torchrl/modules/models/batchrenorm.py,sha256=TojpTUluIcFdTSemIVRLGtB2O5q54mRHy3vJP6DuI5I,4750
 torchrl/modules/models/decision_transformer.py,sha256=Lttf_wZMNqXbB_vpxMYgEp18gEzOvm3NvMnxQkHkH4M,6604
@@ -322,8 +322,8 @@ torchrl/trainers/helpers/losses.py,sha256=sHlJqjh02t8cKN73X35Azd_OoWGurohLuviB8Y
 torchrl/trainers/helpers/models.py,sha256=ihTERG2c96E8cS3Tnul6a_ys6iDEEJmHh05p9blQTW8,21807
 torchrl/trainers/helpers/replay_buffer.py,sha256=ZUZHOa0TILyeWJ3iahzTJ6UvMl_0FdxuZfJEja94Bn8,2001
 torchrl/trainers/helpers/trainers.py,sha256=j6B5XA7_FFHMQeOIQwjNcO0CGE_4mZKUC9_jH_iqqh4,12071
-torchrl_nightly-2025.8.10.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
-torchrl_nightly-2025.8.10.dist-info/METADATA,sha256=paA13xYvsRhQHdq4kbORTI9utZkTu1QAIDElif9MAqw,41412
-torchrl_nightly-2025.8.10.dist-info/WHEEL,sha256=ziAMZrFEBAMOBaTDNVVFwf5i-WiFj1yXRFZ4MRxHC0g,104
-torchrl_nightly-2025.8.10.dist-info/top_level.txt,sha256=-5FcSdmJ9DwdHF8aOIaofsPbz4Gm8G1eo7r7Sc2CHgE,59
-torchrl_nightly-2025.8.10.dist-info/RECORD,,
+torchrl_nightly-2025.8.11.dist-info/licenses/LICENSE,sha256=xdjS4_xk-IwnLuIFCvTYTl9Y8aXRejqpmke3dGam_nI,1098
+torchrl_nightly-2025.8.11.dist-info/METADATA,sha256=yho9bHnrIfIvRsr_JI-SLCVVViu_hSb2H94L8t8HDZ4,41412
+torchrl_nightly-2025.8.11.dist-info/WHEEL,sha256=ziAMZrFEBAMOBaTDNVVFwf5i-WiFj1yXRFZ4MRxHC0g,104
+torchrl_nightly-2025.8.11.dist-info/top_level.txt,sha256=-5FcSdmJ9DwdHF8aOIaofsPbz4Gm8G1eo7r7Sc2CHgE,59
+torchrl_nightly-2025.8.11.dist-info/RECORD,,

{torchrl_nightly-2025.8.10.dist-info → torchrl_nightly-2025.8.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{torchrl_nightly-2025.8.10.dist-info → torchrl_nightly-2025.8.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{torchrl_nightly-2025.8.10.dist-info → torchrl_nightly-2025.8.11.dist-info}/top_level.txt RENAMED Viewed

File without changes