weco 0.2.14__tar.gz → 0.2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {weco-0.2.14 → weco-0.2.15}/PKG-INFO +1 -1
  2. {weco-0.2.14 → weco-0.2.15}/examples/cuda/evaluate.py +21 -17
  3. {weco-0.2.14 → weco-0.2.15}/examples/hello-kernel-world/evaluate.py +8 -9
  4. {weco-0.2.14 → weco-0.2.15}/examples/metal/evaluate.py +2 -2
  5. {weco-0.2.14 → weco-0.2.15}/examples/triton/evaluate.py +11 -8
  6. {weco-0.2.14 → weco-0.2.15}/pyproject.toml +1 -1
  7. {weco-0.2.14 → weco-0.2.15}/weco/__init__.py +1 -1
  8. {weco-0.2.14 → weco-0.2.15}/weco/cli.py +7 -9
  9. {weco-0.2.14 → weco-0.2.15}/weco.egg-info/PKG-INFO +1 -1
  10. {weco-0.2.14 → weco-0.2.15}/.github/workflows/lint.yml +0 -0
  11. {weco-0.2.14 → weco-0.2.15}/.github/workflows/release.yml +0 -0
  12. {weco-0.2.14 → weco-0.2.15}/.gitignore +0 -0
  13. {weco-0.2.14 → weco-0.2.15}/.repomixignore +0 -0
  14. {weco-0.2.14 → weco-0.2.15}/LICENSE +0 -0
  15. {weco-0.2.14 → weco-0.2.15}/README.md +0 -0
  16. {weco-0.2.14 → weco-0.2.15}/examples/cuda/README.md +0 -0
  17. {weco-0.2.14 → weco-0.2.15}/examples/cuda/guide.md +0 -0
  18. {weco-0.2.14 → weco-0.2.15}/examples/cuda/optimize.py +0 -0
  19. {weco-0.2.14 → weco-0.2.15}/examples/hello-kernel-world/optimize.py +0 -0
  20. {weco-0.2.14 → weco-0.2.15}/examples/metal/README.md +0 -0
  21. {weco-0.2.14 → weco-0.2.15}/examples/metal/examples.rst +0 -0
  22. {weco-0.2.14 → weco-0.2.15}/examples/metal/optimize.py +0 -0
  23. {weco-0.2.14 → weco-0.2.15}/examples/prompt/README.md +0 -0
  24. {weco-0.2.14 → weco-0.2.15}/examples/prompt/eval.py +0 -0
  25. {weco-0.2.14 → weco-0.2.15}/examples/prompt/optimize.py +0 -0
  26. {weco-0.2.14 → weco-0.2.15}/examples/prompt/prompt_guide.md +0 -0
  27. {weco-0.2.14 → weco-0.2.15}/examples/spaceship-titanic/README.md +0 -0
  28. {weco-0.2.14 → weco-0.2.15}/examples/spaceship-titanic/baseline.py +0 -0
  29. {weco-0.2.14 → weco-0.2.15}/examples/spaceship-titanic/evaluate.py +0 -0
  30. {weco-0.2.14 → weco-0.2.15}/examples/spaceship-titanic/optimize.py +0 -0
  31. {weco-0.2.14 → weco-0.2.15}/examples/spaceship-titanic/requirements-test.txt +0 -0
  32. {weco-0.2.14 → weco-0.2.15}/examples/spaceship-titanic/utils.py +0 -0
  33. {weco-0.2.14 → weco-0.2.15}/examples/triton/README.md +0 -0
  34. {weco-0.2.14 → weco-0.2.15}/examples/triton/optimize.py +0 -0
  35. {weco-0.2.14 → weco-0.2.15}/setup.cfg +0 -0
  36. {weco-0.2.14 → weco-0.2.15}/weco/api.py +0 -0
  37. {weco-0.2.14 → weco-0.2.15}/weco/auth.py +0 -0
  38. {weco-0.2.14 → weco-0.2.15}/weco/panels.py +0 -0
  39. {weco-0.2.14 → weco-0.2.15}/weco/utils.py +0 -0
  40. {weco-0.2.14 → weco-0.2.15}/weco.egg-info/SOURCES.txt +0 -0
  41. {weco-0.2.14 → weco-0.2.15}/weco.egg-info/dependency_links.txt +0 -0
  42. {weco-0.2.14 → weco-0.2.15}/weco.egg-info/entry_points.txt +0 -0
  43. {weco-0.2.14 → weco-0.2.15}/weco.egg-info/requires.txt +0 -0
  44. {weco-0.2.14 → weco-0.2.15}/weco.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: weco
3
- Version: 0.2.14
3
+ Version: 0.2.15
4
4
  Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
5
5
  Author-email: Weco AI Team <contact@weco.ai>
6
6
  License: MIT
@@ -1,4 +1,3 @@
1
- import time
2
1
  import sys
3
2
  import os
4
3
  import pathlib
@@ -78,22 +77,27 @@ def get_inputs(batch_size, seq_len, n_embd, device):
78
77
  return torch.randn(batch_size, seq_len, n_embd, device=device, dtype=torch.float32)
79
78
 
80
79
 
80
+ @torch.no_grad()
81
81
  def bench(f, inputs, n_warmup, n_rep):
82
- with torch.no_grad():
83
- # warmup
84
- for _ in range(n_warmup):
85
- f(inputs) # noqa
86
-
87
- # benchmark
88
- t_avg = 0.0
89
- for _ in range(n_rep):
90
- torch.cuda.empty_cache() # Clear cache before timing
91
- start_time = time.time()
92
- f(inputs)
93
- torch.cuda.synchronize() # Wait for all computations to complete
94
- t_avg += time.time() - start_time
95
- t_avg /= n_rep * 1e-3
96
- return t_avg
82
+ start_event = torch.cuda.Event(enable_timing=True)
83
+ end_event = torch.cuda.Event(enable_timing=True)
84
+
85
+ # warmup
86
+ for _ in range(n_warmup):
87
+ f(inputs) # noqa
88
+ torch.cuda.synchronize()
89
+
90
+ # benchmark
91
+ t_avg_ms = 0.0
92
+ for _ in range(n_rep):
93
+ # time the forward pass
94
+ start_event.record()
95
+ f(inputs)
96
+ end_event.record()
97
+ # wait for all computations to complete
98
+ torch.cuda.synchronize()
99
+ t_avg_ms += start_event.elapsed_time(end_event)
100
+ return t_avg_ms / n_rep
97
101
 
98
102
 
99
103
  if __name__ == "__main__":
@@ -113,7 +117,7 @@ if __name__ == "__main__":
113
117
  seq_len = 256
114
118
  n_embd = 768
115
119
  n_head = 8
116
- # turn off dropout to measure correctness well
120
+ # turn off dropout to measure correctness
117
121
  attn_pdrop = 0.0
118
122
  resid_pdrop = 0.0
119
123
 
@@ -62,20 +62,19 @@ def get_inputs(B, N, device):
62
62
 
63
63
  @torch.no_grad()
64
64
  def bench(f, inputs, n_warmup, n_rep):
65
- # Warm up
65
+ device_type = inputs.device.type
66
+
67
+ # warm up
66
68
  for _ in range(n_warmup):
67
69
  f(inputs) # noqa
70
+ if device_type == "cuda":
71
+ torch.cuda.synchronize()
72
+ elif device_type == "mps":
73
+ torch.mps.synchronize()
68
74
 
69
- # Benchmark
70
- device_type = inputs.device.type
75
+ # benchmark
71
76
  t_avg = 0.0
72
77
  for _ in range(n_rep):
73
- # Clear cache before timing
74
- if device_type == "cuda":
75
- torch.cuda.empty_cache()
76
- elif device_type == "mps":
77
- torch.mps.empty_cache()
78
-
79
78
  # time forward pass
80
79
  start_time = time.time()
81
80
  f(inputs)
@@ -55,15 +55,15 @@ def load_module_from_path(module_path: str, add_to_sys_modules: bool = False):
55
55
  # Benchmark
56
56
  ########################################################
57
57
  def get_inputs(batch_size, img_height, img_width, img_channels):
58
- # MLX doesn't use device parameter like PyTorch, as it automatically uses Metal
59
58
  return mx.random.normal(shape=(batch_size, img_height, img_width, img_channels), dtype=mx.float32)
60
59
 
61
60
 
62
61
  def bench(f, inputs, n_warmup, n_rep):
63
- # Warm up
62
+ # warm up
64
63
  for _ in range(n_warmup):
65
64
  result = f(inputs)
66
65
  mx.eval(result) # Force computation due to lazy evaluation
66
+ mx.synchronize() # Wait for all computations to complete
67
67
 
68
68
  t_avg = 0.0
69
69
  for _ in range(n_rep):
@@ -1,4 +1,3 @@
1
- import time
2
1
  import sys
3
2
  import pathlib
4
3
  import importlib
@@ -76,20 +75,24 @@ def get_inputs(batch_size, seq_len, n_embd, device):
76
75
 
77
76
  @torch.no_grad()
78
77
  def bench(f, inputs, n_warmup, n_rep):
78
+ start_event = torch.cuda.Event(enable_timing=True)
79
+ end_event = torch.cuda.Event(enable_timing=True)
80
+
79
81
  # warmup
80
82
  for _ in range(n_warmup):
81
83
  f(inputs) # noqa
84
+ torch.cuda.synchronize()
82
85
 
83
86
  # benchmark
84
- t_avg = 0.0
87
+ t_avg_ms = 0.0
85
88
  for _ in range(n_rep):
86
- torch.cuda.empty_cache() # Clear cache before timing
87
- start_time = time.time()
89
+ start_event.record()
88
90
  f(inputs)
89
- torch.cuda.synchronize() # Wait for all computations to complete
90
- t_avg += time.time() - start_time
91
- t_avg /= n_rep * 1e-3
92
- return t_avg
91
+ end_event.record()
92
+ # wait for all computations to complete
93
+ torch.cuda.synchronize()
94
+ t_avg_ms += start_event.elapsed_time(end_event)
95
+ return t_avg_ms / n_rep
93
96
 
94
97
 
95
98
  if __name__ == "__main__":
@@ -10,7 +10,7 @@ authors = [
10
10
  ]
11
11
  description = "Documentation for `weco`, a CLI for using Weco AI's code optimizer."
12
12
  readme = "README.md"
13
- version = "0.2.14"
13
+ version = "0.2.15"
14
14
  license = {text = "MIT"}
15
15
  requires-python = ">=3.8"
16
16
  dependencies = ["requests", "rich"]
@@ -1,7 +1,7 @@
1
1
  import os
2
2
 
3
3
  # DO NOT EDIT
4
- __pkg_version__ = "0.2.14"
4
+ __pkg_version__ = "0.2.15"
5
5
  __api_version__ = "v1"
6
6
 
7
7
  __base_url__ = f"https://api.weco.ai/{__api_version__}"
@@ -265,14 +265,14 @@ def main() -> None:
265
265
  "debug_prob": 0.5,
266
266
  "max_debug_depth": max(1, math.ceil(0.1 * steps)),
267
267
  }
268
+ # API request timeout
269
+ timeout = 800
268
270
  # Read additional instructions
269
271
  additional_instructions = read_additional_instructions(additional_instructions=args.additional_instructions)
270
272
  # Read source code path
271
273
  source_fp = pathlib.Path(args.source)
272
274
  # Read source code content
273
275
  source_code = read_from_path(fp=source_fp, is_json=False)
274
- # API request timeout
275
- timeout = 800
276
276
 
277
277
  # --- Panel Initialization ---
278
278
  summary_panel = SummaryPanel(
@@ -310,9 +310,8 @@ def main() -> None:
310
310
  runs_dir = pathlib.Path(args.log_dir) / session_id
311
311
  runs_dir.mkdir(parents=True, exist_ok=True)
312
312
 
313
- # Save the original code (.runs/<session-id>/original.<extension>)
314
- runs_copy_source_fp = runs_dir / f"original{source_fp.suffix}" # Use correct suffix
315
- write_to_path(fp=runs_copy_source_fp, content=source_code)
313
+ # Write the initial code string to the logs
314
+ write_to_path(fp=runs_dir / f"step_0{source_fp.suffix}", content=session_response["code"])
316
315
 
317
316
  # Write the initial code string to the source file path (if not preserving)
318
317
  if not args.preserve_source:
@@ -380,7 +379,8 @@ def main() -> None:
380
379
  transition_delay=0.1,
381
380
  )
382
381
 
383
- for step in range(1, steps):
382
+ # Starting from step 1 to steps (inclusive) because the baseline solution is step 0, so we want to optimize for steps worth of steps
383
+ for step in range(1, steps + 1):
384
384
  # Re-read instructions from the original source (file path or string) BEFORE each suggest call
385
385
  current_additional_instructions = read_additional_instructions(
386
386
  additional_instructions=args.additional_instructions
@@ -553,9 +553,7 @@ def main() -> None:
553
553
  best_solution_score = None
554
554
 
555
555
  if best_solution_code is None or best_solution_score is None:
556
- best_solution_content = (
557
- f"# Weco could not find a better solution\n\n{read_from_path(fp=runs_copy_source_fp, is_json=False)}"
558
- )
556
+ best_solution_content = f"# Weco could not find a better solution\n\n{read_from_path(fp=runs_dir / f'step_0{source_fp.suffix}', is_json=False)}"
559
557
  else:
560
558
  # Format score for the comment
561
559
  best_score_str = (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: weco
3
- Version: 0.2.14
3
+ Version: 0.2.15
4
4
  Summary: Documentation for `weco`, a CLI for using Weco AI's code optimizer.
5
5
  Author-email: Weco AI Team <contact@weco.ai>
6
6
  License: MIT
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes