PyPI - weco - Versions diffs - 0.2.14__tar.gz → 0.2.15__tar.gz - Mend

weco 0.2.14tar.gz → 0.2.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{weco-0.2.14 → weco-0.2.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: weco
-Version: 0.2.14
+Version: 0.2.15
 Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
 Author-email: Weco AI Team <contact@weco.ai>
 License: MIT

{weco-0.2.14 → weco-0.2.15}/examples/cuda/evaluate.py RENAMED Viewed

@@ -1,4 +1,3 @@
-import time
 import sys
 import os
 import pathlib
@@ -78,22 +77,27 @@ def get_inputs(batch_size, seq_len, n_embd, device):
     return torch.randn(batch_size, seq_len, n_embd, device=device, dtype=torch.float32)
+@torch.no_grad()
 def bench(f, inputs, n_warmup, n_rep):
-    with torch.no_grad():
-        # warmup
-        for _ in range(n_warmup):
-            f(inputs)  # noqa
-        # benchmark
-        t_avg = 0.0
-        for _ in range(n_rep):
-            torch.cuda.empty_cache()  # Clear cache before timing
-            start_time = time.time()
-            f(inputs)
-            torch.cuda.synchronize()  # Wait for all computations to complete
-            t_avg += time.time() - start_time
-        t_avg /= n_rep * 1e-3
-        return t_avg
+    start_event = torch.cuda.Event(enable_timing=True)
+    end_event = torch.cuda.Event(enable_timing=True)
+    # warmup
+    for _ in range(n_warmup):
+        f(inputs)  # noqa
+    torch.cuda.synchronize()
+    # benchmark
+    t_avg_ms = 0.0
+    for _ in range(n_rep):
+        # time the forward pass
+        start_event.record()
+        f(inputs)
+        end_event.record()
+        # wait for all computations to complete
+        torch.cuda.synchronize()
+        t_avg_ms += start_event.elapsed_time(end_event)
+    return t_avg_ms / n_rep
 if __name__ == "__main__":
@@ -113,7 +117,7 @@ if __name__ == "__main__":
     seq_len = 256
     n_embd = 768
     n_head = 8
-    # turn off dropout to measure correctness well
+    # turn off dropout to measure correctness
     attn_pdrop = 0.0
     resid_pdrop = 0.0

{weco-0.2.14 → weco-0.2.15}/examples/hello-kernel-world/evaluate.py RENAMED Viewed

@@ -62,20 +62,19 @@ def get_inputs(B, N, device):
 @torch.no_grad()
 def bench(f, inputs, n_warmup, n_rep):
-    # Warm up
+    device_type = inputs.device.type
+    # warm up
     for _ in range(n_warmup):
         f(inputs)  # noqa
+    if device_type == "cuda":
+        torch.cuda.synchronize()
+    elif device_type == "mps":
+        torch.mps.synchronize()
-    # Benchmark
-    device_type = inputs.device.type
+    # benchmark
     t_avg = 0.0
     for _ in range(n_rep):
-        # Clear cache before timing
-        if device_type == "cuda":
-            torch.cuda.empty_cache()
-        elif device_type == "mps":
-            torch.mps.empty_cache()
         # time forward pass
         start_time = time.time()
         f(inputs)

{weco-0.2.14 → weco-0.2.15}/examples/metal/evaluate.py RENAMED Viewed

@@ -55,15 +55,15 @@ def load_module_from_path(module_path: str, add_to_sys_modules: bool = False):
 # Benchmark
 ########################################################
 def get_inputs(batch_size, img_height, img_width, img_channels):
-    # MLX doesn't use device parameter like PyTorch, as it automatically uses Metal
     return mx.random.normal(shape=(batch_size, img_height, img_width, img_channels), dtype=mx.float32)
 def bench(f, inputs, n_warmup, n_rep):
-    # Warm up
+    # warm up
     for _ in range(n_warmup):
         result = f(inputs)
         mx.eval(result)  # Force computation due to lazy evaluation
+    mx.synchronize()  # Wait for all computations to complete
     t_avg = 0.0
     for _ in range(n_rep):

{weco-0.2.14 → weco-0.2.15}/examples/triton/evaluate.py RENAMED Viewed

@@ -1,4 +1,3 @@
-import time
 import sys
 import pathlib
 import importlib
@@ -76,20 +75,24 @@ def get_inputs(batch_size, seq_len, n_embd, device):
 @torch.no_grad()
 def bench(f, inputs, n_warmup, n_rep):
+    start_event = torch.cuda.Event(enable_timing=True)
+    end_event = torch.cuda.Event(enable_timing=True)
     # warmup
     for _ in range(n_warmup):
         f(inputs)  # noqa
+    torch.cuda.synchronize()
     # benchmark
-    t_avg = 0.0
+    t_avg_ms = 0.0
     for _ in range(n_rep):
-        torch.cuda.empty_cache()  # Clear cache before timing
-        start_time = time.time()
+        start_event.record()
         f(inputs)
-        torch.cuda.synchronize()  # Wait for all computations to complete
-        t_avg += time.time() - start_time
-    t_avg /= n_rep * 1e-3
-    return t_avg
+        end_event.record()
+        # wait for all computations to complete
+        torch.cuda.synchronize()
+        t_avg_ms += start_event.elapsed_time(end_event)
+    return t_avg_ms / n_rep
 if __name__ == "__main__":

{weco-0.2.14 → weco-0.2.15}/pyproject.toml RENAMED Viewed

@@ -10,7 +10,7 @@ authors = [
 ]
 description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
 readme = "README.md"
-version = "0.2.14"
+version = "0.2.15"
 license = {text = "MIT"}
 requires-python = ">=3.8"
 dependencies = ["requests", "rich"]

{weco-0.2.14 → weco-0.2.15}/weco/__init__.py RENAMED Viewed

@@ -1,7 +1,7 @@
 import os
 # DO NOT EDIT
-__pkg_version__ = "0.2.14"
+__pkg_version__ = "0.2.15"
 __api_version__ = "v1"
 __base_url__ = f"https://api.weco.ai/{__api_version__}"

{weco-0.2.14 → weco-0.2.15}/weco/cli.py RENAMED Viewed

@@ -265,14 +265,14 @@ def main() -> None:
                 "debug_prob": 0.5,
                 "max_debug_depth": max(1, math.ceil(0.1 * steps)),
             }
+            # API request timeout
+            timeout = 800
             # Read additional instructions
             additional_instructions = read_additional_instructions(additional_instructions=args.additional_instructions)
             # Read source code path
             source_fp = pathlib.Path(args.source)
             # Read source code content
             source_code = read_from_path(fp=source_fp, is_json=False)
-            # API request timeout
-            timeout = 800
             # --- Panel Initialization ---
             summary_panel = SummaryPanel(
@@ -310,9 +310,8 @@ def main() -> None:
                 runs_dir = pathlib.Path(args.log_dir) / session_id
                 runs_dir.mkdir(parents=True, exist_ok=True)
-                # Save the original code (.runs/<session-id>/original.<extension>)
-                runs_copy_source_fp = runs_dir / f"original{source_fp.suffix}"  # Use correct suffix
-                write_to_path(fp=runs_copy_source_fp, content=source_code)
+                # Write the initial code string to the logs
+                write_to_path(fp=runs_dir / f"step_0{source_fp.suffix}", content=session_response["code"])
                 # Write the initial code string to the source file path (if not preserving)
                 if not args.preserve_source:
@@ -380,7 +379,8 @@ def main() -> None:
                     transition_delay=0.1,
                 )
-                for step in range(1, steps):
+                # Starting from step 1 to steps (inclusive) because the baseline solution is step 0, so we want to optimize for steps worth of steps
+                for step in range(1, steps + 1):
                     # Re-read instructions from the original source (file path or string) BEFORE each suggest call
                     current_additional_instructions = read_additional_instructions(
                         additional_instructions=args.additional_instructions
@@ -553,9 +553,7 @@ def main() -> None:
                     best_solution_score = None
                 if best_solution_code is None or best_solution_score is None:
-                    best_solution_content = (
-                        f"# Weco could not find a better solution\n\n{read_from_path(fp=runs_copy_source_fp, is_json=False)}"
-                    )
+                    best_solution_content = f"# Weco could not find a better solution\n\n{read_from_path(fp=runs_dir / f'step_0{source_fp.suffix}', is_json=False)}"
                 else:
                     # Format score for the comment
                     best_score_str = (

{weco-0.2.14 → weco-0.2.15}/weco.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: weco
-Version: 0.2.14
+Version: 0.2.15
 Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
 Author-email: Weco AI Team <contact@weco.ai>
 License: MIT