ziya 0.1.49__py3-none-any.whl → 0.1.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ziya might be problematic. Click here for more details.

Files changed (49) hide show
  1. app/agents/.agent.py.swp +0 -0
  2. app/agents/agent.py +315 -113
  3. app/agents/models.py +439 -0
  4. app/agents/prompts.py +32 -4
  5. app/main.py +70 -7
  6. app/server.py +403 -14
  7. app/utils/code_util.py +641 -215
  8. pyproject.toml +3 -3
  9. templates/asset-manifest.json +18 -20
  10. templates/index.html +1 -1
  11. templates/static/css/{main.87f30840.css → main.2bddf34e.css} +2 -2
  12. templates/static/css/main.2bddf34e.css.map +1 -0
  13. templates/static/js/46907.90c6a4f3.chunk.js +2 -0
  14. templates/static/js/46907.90c6a4f3.chunk.js.map +1 -0
  15. templates/static/js/56122.1d6a5c10.chunk.js +3 -0
  16. templates/static/js/56122.1d6a5c10.chunk.js.LICENSE.txt +9 -0
  17. templates/static/js/56122.1d6a5c10.chunk.js.map +1 -0
  18. templates/static/js/83953.61a908f4.chunk.js +3 -0
  19. templates/static/js/83953.61a908f4.chunk.js.map +1 -0
  20. templates/static/js/88261.1e90079d.chunk.js +3 -0
  21. templates/static/js/88261.1e90079d.chunk.js.map +1 -0
  22. templates/static/js/{96603.863a8f96.chunk.js → 96603.18c5d644.chunk.js} +2 -2
  23. templates/static/js/{96603.863a8f96.chunk.js.map → 96603.18c5d644.chunk.js.map} +1 -1
  24. templates/static/js/{97902.75670155.chunk.js → 97902.d1e262d6.chunk.js} +3 -3
  25. templates/static/js/{97902.75670155.chunk.js.map → 97902.d1e262d6.chunk.js.map} +1 -1
  26. templates/static/js/main.9b2b2b57.js +3 -0
  27. templates/static/js/{main.ee8b3c96.js.LICENSE.txt → main.9b2b2b57.js.LICENSE.txt} +8 -2
  28. templates/static/js/main.9b2b2b57.js.map +1 -0
  29. {ziya-0.1.49.dist-info → ziya-0.1.51.dist-info}/METADATA +5 -5
  30. {ziya-0.1.49.dist-info → ziya-0.1.51.dist-info}/RECORD +36 -35
  31. templates/static/css/main.87f30840.css.map +0 -1
  32. templates/static/js/23416.c33f07ab.chunk.js +0 -3
  33. templates/static/js/23416.c33f07ab.chunk.js.map +0 -1
  34. templates/static/js/3799.fedb612f.chunk.js +0 -2
  35. templates/static/js/3799.fedb612f.chunk.js.map +0 -1
  36. templates/static/js/46907.4a730107.chunk.js +0 -2
  37. templates/static/js/46907.4a730107.chunk.js.map +0 -1
  38. templates/static/js/64754.cf383335.chunk.js +0 -2
  39. templates/static/js/64754.cf383335.chunk.js.map +0 -1
  40. templates/static/js/88261.33450351.chunk.js +0 -3
  41. templates/static/js/88261.33450351.chunk.js.map +0 -1
  42. templates/static/js/main.ee8b3c96.js +0 -3
  43. templates/static/js/main.ee8b3c96.js.map +0 -1
  44. /templates/static/js/{23416.c33f07ab.chunk.js.LICENSE.txt → 83953.61a908f4.chunk.js.LICENSE.txt} +0 -0
  45. /templates/static/js/{88261.33450351.chunk.js.LICENSE.txt → 88261.1e90079d.chunk.js.LICENSE.txt} +0 -0
  46. /templates/static/js/{97902.75670155.chunk.js.LICENSE.txt → 97902.d1e262d6.chunk.js.LICENSE.txt} +0 -0
  47. {ziya-0.1.49.dist-info → ziya-0.1.51.dist-info}/LICENSE +0 -0
  48. {ziya-0.1.49.dist-info → ziya-0.1.51.dist-info}/WHEEL +0 -0
  49. {ziya-0.1.49.dist-info → ziya-0.1.51.dist-info}/entry_points.txt +0 -0
app/utils/code_util.py CHANGED
@@ -1,6 +1,9 @@
1
1
  import os
2
2
  import subprocess
3
3
  import json
4
+ import tempfile
5
+ import glob
6
+ from itertools import zip_longest
4
7
  from io import StringIO
5
8
  import time
6
9
  from typing import Dict, Optional, Union, List, Tuple, Any
@@ -9,7 +12,8 @@ import re
9
12
  from app.utils.logging_utils import logger
10
13
  import difflib
11
14
 
12
- MIN_CONFIDENCE = 0.75 # what confidence level we cut off forced diff apply after fuzzy match
15
+ MIN_CONFIDENCE = 0.72 # what confidence level we cut off forced diff apply after fuzzy match
16
+ MAX_OFFSET = 5 # max allowed line offset before considering a hunk apply failed
13
17
 
14
18
  class PatchApplicationError(Exception):
15
19
  """Custom exception for patch application failures"""
@@ -62,8 +66,6 @@ def clean_input_diff(diff_content: str) -> str:
62
66
  minus_seen = 0
63
67
  plus_seen = 0
64
68
 
65
- import re
66
-
67
69
  for line in lines:
68
70
  # Reset skip flag on new file header
69
71
  if line.startswith('diff --git'):
@@ -193,12 +195,12 @@ def is_new_file_creation(diff_lines: List[str]) -> bool:
193
195
  if line.startswith('@@ -0,0'):
194
196
  logger.debug("Detected new file from zero hunk marker")
195
197
  return True
196
-
198
+
197
199
  # Case 2: Empty source file indicator
198
200
  if line == '--- /dev/null':
199
201
  logger.debug("Detected new file from /dev/null source")
200
202
  return True
201
-
203
+
202
204
  # Case 3: New file mode
203
205
  if 'new file mode' in line:
204
206
  logger.debug("Detected new file from mode marker")
@@ -209,7 +211,7 @@ def is_new_file_creation(diff_lines: List[str]) -> bool:
209
211
  def create_new_file(git_diff: str, base_dir: str) -> None:
210
212
  """Create a new file from a git diff."""
211
213
  logger.info(f"Processing new file diff with length: {len(git_diff)} bytes")
212
-
214
+
213
215
  try:
214
216
  # Parse the diff content
215
217
  diff_lines = git_diff.splitlines()
@@ -389,7 +391,7 @@ def normalize_whitespace_in_diff(diff_lines: List[str]) -> List[str]:
389
391
  if line.startswith(('+', '-', ' ')):
390
392
  prefix = line[0] # Save the diff marker (+, -, or space)
391
393
  content = line[1:] # Get the actual content
392
-
394
+
393
395
  # Normalize the content while preserving essential indentation
394
396
  normalized = content.rstrip() # Remove trailing whitespace
395
397
  if normalized:
@@ -406,7 +408,7 @@ def correct_git_diff(git_diff: str, original_file_path: str) -> str:
406
408
  Maintains compatibility with existing function signature.
407
409
  """
408
410
  logger.info(f"Processing diff for {original_file_path}")
409
-
411
+
410
412
  try:
411
413
 
412
414
  # Clean up the diff content first
@@ -445,7 +447,7 @@ def correct_git_diff(git_diff: str, original_file_path: str) -> str:
445
447
 
446
448
  # Reconstruct normalized diff
447
449
  result = headers # start with original headers
448
-
450
+
449
451
  # Extract original hunks
450
452
  original_hunks = []
451
453
  current_hunk = []
@@ -479,49 +481,11 @@ def correct_git_diff(git_diff: str, original_file_path: str) -> str:
479
481
  except Exception as e:
480
482
  logger.error(f"Error normalizing diff: {str(e)}")
481
483
  raise
482
-
484
+
483
485
  except Exception as e:
484
486
  logger.error(f"Error correcting diff: {str(e)}")
485
487
  raise
486
488
 
487
- def apply_system_patch(diff_content: str, target_dir: str) -> bool:
488
- """
489
- Apply patch using system patch command.
490
- Returns True if successful, False otherwise.
491
- """
492
- logger.info("Attempting to apply with system patch command...")
493
- try:
494
- # Debug: Log the exact content we're sending to patch
495
- logger.info("Patch input content:")
496
- logger.info(diff_content)
497
- # Ensure we have string input and encode it just once
498
- if isinstance(diff_content, bytes):
499
- diff_content = diff_content.decode('utf-8')
500
- result = subprocess.run(
501
- ['patch', '-p1', '--forward', '--ignore-whitespace', '--verbose'],
502
- input=diff_content,
503
- cwd=target_dir,
504
- capture_output=True,
505
- text=True,
506
- timeout=10
507
- )
508
-
509
- logger.debug(f"Patch command output: stdout={result.stdout}, stderr={result.stderr}")
510
-
511
- # If any hunks were successfully applied, we need to modify the diff
512
- if result and 'Hunk #1 succeeded' in result.stderr:
513
- logger.debug("Some hunks succeeded, extracting remaining hunks")
514
- git_diff = extract_remaining_hunks(git_diff, patch_result.stderr)
515
- logger.info(f"Patch stdout: {result.stdout}")
516
- logger.info(f"Patch stderr: {result.stderr}")
517
- success = result.returncode == 0
518
- logger.info(f"Patch {'succeeded' if success else 'failed'} with return code {result.returncode}")
519
- return success, result
520
- except Exception as e:
521
- logger.error(f"System patch error output: {str(e)}")
522
- logger.error(f"System patch failed: {str(e)}")
523
- return False
524
-
525
489
  def validate_and_fix_diff(diff_content: str) -> str:
526
490
  """
527
491
  Validate diff format and ensure it has all required components.
@@ -663,95 +627,237 @@ def apply_diff_with_difflib(file_path: str, diff_content: str) -> None:
663
627
  # 2) apply forced-hybrid logic with error throwing
664
628
  final_lines = apply_diff_with_difflib_hybrid_forced(file_path, diff_content, original_lines)
665
629
 
666
- # 3) write result
630
+ # 3) write result back to file
667
631
  with open(file_path, 'w', encoding='utf-8') as f:
668
632
  f.writelines(final_lines)
633
+ logger.info(
634
+ f"Successfully applied forced-hybrid diff (with exceptions on mismatch) to {file_path}. "
635
+ f"Wrote {len(final_lines)} lines."
636
+ )
637
+
638
+ def is_hunk_already_applied(file_lines: List[str], hunk: Dict[str, Any], pos: int) -> bool:
639
+ """
640
+ Check if a hunk has already been applied at the given position.
641
+ Returns True only if ALL changes in the hunk are already present.
642
+ Checks if the target state matches exactly.
643
+ """
644
+ if pos >= len(file_lines):
645
+ logger.debug(f"Position {pos} beyond file length {len(file_lines)}")
646
+ return False
647
+
648
+ # Get the lines we're working with
649
+ window_size = max(len(hunk['old_block']), len(hunk['new_lines']))
650
+ available_lines = file_lines[pos:pos + window_size]
651
+
652
+ # Count actual changes needed (excluding context lines)
653
+ changes_needed = 0
654
+ changes_found = 0
655
+
656
+ # Map of line positions to their expected states
657
+ expected_states = {}
658
+
659
+ for old_line, new_line in zip_longest(hunk['old_block'], hunk['new_lines'], fillvalue=None):
660
+ if old_line != new_line:
661
+ changes_needed += 1
662
+
663
+ # Check each line in the window
664
+ for i, actual_line in enumerate(available_lines):
665
+ if i < len(hunk['new_lines']):
666
+ new_line = hunk['new_lines'][i]
667
+ old_line = hunk['old_block'][i] if i < len(hunk['old_block']) else None
668
+
669
+ # Line matches target state
670
+ if actual_line.rstrip() == new_line.rstrip():
671
+ changes_found += 1
672
+ continue
673
+
674
+ # Line matches original state and needs change
675
+ if old_line and actual_line.rstrip() == old_line.rstrip():
676
+ # This is a line that still needs changing
677
+ continue
678
+
679
+ # Line doesn't match either state
680
+ return False
681
+
682
+ # Calculate what percentage of changes are already applied
683
+ if changes_needed > 0 and changes_found > 0:
684
+ applied_ratio = changes_found / changes_needed
685
+ logger.debug(f"Hunk changes: needed={changes_needed}, found={changes_found}, ratio={applied_ratio:.2f}")
669
686
 
670
- logger.info(f"Successfully applied forced-hybrid diff (with exceptions on mismatch) to {file_path}.")
687
+ # Consider it applied if we found all changes
688
+ if applied_ratio >= 1.0: # Must match exactly, or have all needed changes+
689
+ logger.debug(f"All changes already present at pos {pos}")
690
+ return True
691
+ elif applied_ratio > 0:
692
+ logger.debug(f"Partial changes found ({applied_ratio:.2f}) - will apply remaining changes")
693
+ return False
694
+
695
+ # If we get here, no changes were found
696
+ if changes_needed > 0:
697
+ return False
671
698
 
699
+ # Default case - nothing to apply
700
+ logger.debug("No changes needed")
701
+ return True
672
702
 
673
703
  def apply_diff_with_difflib_hybrid_forced(file_path: str, diff_content: str, original_lines: list[str]) -> list[str]:
674
704
  # parse hunks
675
- hunks = parse_unified_diff_exact_plus(diff_content, file_path)
705
+ hunks = list(parse_unified_diff_exact_plus(diff_content, file_path))
706
+ logger.debug(f"Parsed hunks for difflib: {json.dumps([{'old_start': h['old_start'], 'old_count': len(h['old_block']), 'new_start': h['new_start'], 'new_count': len(h['new_lines'])} for h in hunks], indent=2)}")
707
+ already_applied_hunks = set()
676
708
  stripped_original = [ln.rstrip('\n') for ln in original_lines]
677
709
 
710
+ final_lines = stripped_original.copy()
678
711
  offset = 0
712
+ applied_content = set()
679
713
  for hunk_idx, h in enumerate(hunks, start=1):
680
- old_start = h['old_start']
681
- old_count = h['old_count']
682
- new_lines = h['new_lines']
683
- old_block = h['old_block']
684
-
685
- logger.debug(f"\n--- Hunk #{hunk_idx} => -{old_start},{old_count} +{h['new_start']},{h['new_count']}, new_lines={len(new_lines)}")
686
-
687
- # Phase A: strict check
688
- remove_pos = (old_start - 1) + offset
689
- remove_pos = clamp(remove_pos, 0, len(stripped_original))
690
- strict_ok = False
691
-
692
- # see if we have enough lines
693
- if remove_pos + old_count <= len(stripped_original):
694
- file_slice = stripped_original[remove_pos : remove_pos + old_count]
695
- # Compare to the first old_count lines from old_block
696
- if len(old_block) >= old_count:
697
- old_block_minus = old_block[:old_count] # The lines we think are removed
698
- if file_slice == old_block_minus:
699
- strict_ok = True
700
- logger.debug(f"Hunk #{hunk_idx}: strict match at pos={remove_pos}")
701
- else:
714
+ def calculate_initial_positions():
715
+ """Calculate initial positions and counts for the hunk."""
716
+ old_start = h['old_start'] - 1
717
+ old_count = h['old_count']
718
+ initial_remove_pos = clamp(old_start + offset, 0, len(final_lines))
719
+
720
+ # Adjust counts based on available lines
721
+ available_lines = len(final_lines) - initial_remove_pos
722
+ actual_old_count = min(old_count, available_lines)
723
+ end_remove = initial_remove_pos + actual_old_count
724
+
725
+ # Final position adjustment
726
+ remove_pos = clamp(initial_remove_pos, 0, len(stripped_original) - 1)
727
+
728
+ return {
729
+ 'remove_pos': remove_pos,
730
+ 'old_count': old_count,
731
+ 'actual_old_count': actual_old_count,
732
+ 'end_remove': end_remove
733
+ }
734
+
735
+ def try_strict_match(positions):
736
+ """Attempt a strict match of the hunk content."""
737
+ remove_pos = positions['remove_pos']
738
+
739
+ if remove_pos + len(h['old_block']) <= len(final_lines):
740
+ file_slice = final_lines[remove_pos : remove_pos + positions['old_count']]
741
+ if h['old_block'] and len(h['old_block']) >= positions['actual_old_count']:
742
+ old_block_minus = h['old_block'][:positions['old_count']]
743
+ if file_slice == old_block_minus:
744
+ logger.debug(f"Hunk #{hunk_idx}: strict match at pos={remove_pos}")
745
+ return True, remove_pos
702
746
  logger.debug(f"Hunk #{hunk_idx}: strict match failed at pos={remove_pos}")
703
- else:
704
- logger.debug(f"Hunk #{hunk_idx}: old_block is smaller than old_count => strict match not possible")
747
+ else:
748
+ logger.debug(f"Hunk #{hunk_idx}: old_block is smaller than old_count => strict match not possible")
749
+ return False, remove_pos
705
750
 
706
- if not strict_ok:
707
- # Phase B: fuzzy
751
+ def try_fuzzy_match(positions):
752
+ """Attempt a fuzzy match if strict match fails."""
753
+ remove_pos = positions['remove_pos']
708
754
  logger.debug(f"Hunk #{hunk_idx}: Attempting fuzzy near line {remove_pos}")
709
- best_pos, best_ratio = find_best_chunk_position(stripped_original, old_block, remove_pos)
710
- if best_ratio < MIN_CONFIDENCE:
711
- # Raise error if ratio is too low
755
+
756
+ best_pos, best_ratio = find_best_chunk_position(stripped_original, h['old_block'], remove_pos)
757
+
758
+ # First check if changes are already applied (with high confidence threshold)
759
+ if any(new_line in stripped_original for new_line in h['new_lines']):
760
+ already_applied = sum(1 for line in h['new_lines'] if line in stripped_original)
761
+ if already_applied / len(h['new_lines']) >= 0.98: # Require near-exact match
762
+ logger.info(f"Hunk #{hunk_idx} appears to be already applied")
763
+ return None, remove_pos # Signal skip to next hunk
764
+
765
+ # Then check if we have enough confidence in our match position
766
+ if best_ratio <= MIN_CONFIDENCE:
712
767
  msg = (f"Hunk #{hunk_idx} => low confidence match (ratio={best_ratio:.2f}) near {remove_pos}, "
713
768
  f"can't safely apply chunk. Failing.")
714
769
  logger.error(msg)
715
- raise PatchApplicationError(msg)
770
+ raise PatchApplicationError(msg, {
771
+ "status": "error",
772
+ "type": "low_confidence",
773
+ "hunk": hunk_idx,
774
+ "confidence": best_ratio
775
+ })
776
+
716
777
  logger.debug(f"Hunk #{hunk_idx}: fuzzy best pos={best_pos}, ratio={best_ratio:.2f}")
717
- remove_pos = best_pos
718
-
719
- # forcibly remove old_count lines at remove_pos
720
- remove_pos = clamp(remove_pos, 0, len(stripped_original))
721
- end_remove = remove_pos + old_count
722
- total_lines = len(stripped_original)
723
- if end_remove > total_lines:
724
- # Adjust old_count if we're near the end of file
725
- old_count = total_lines - remove_pos
726
- msg = (f"Hunk #{hunk_idx} => not enough lines to remove. "
727
- f"Wanted to remove {old_count} at pos={remove_pos}, but file len={len(stripped_original)}. Failing.")
728
- logger.error(msg)
729
- raise PatchApplicationError(msg)
730
-
731
- logger.debug(f"Hunk #{hunk_idx}: Removing lines {remove_pos}:{end_remove} from file")
732
- for i in range(remove_pos, end_remove):
733
- logger.debug(f" - {stripped_original[i]!r}")
734
- del stripped_original[remove_pos:end_remove]
735
-
736
- # Insert new_lines
737
- logger.debug(f"Hunk #{hunk_idx}: Inserting {len(new_lines)} lines at pos={remove_pos}")
738
- for i, ln in enumerate(new_lines):
739
- logger.debug(f" + {ln!r}")
740
- stripped_original.insert(remove_pos + i, ln)
741
-
742
- net_change = len(new_lines) - old_count
778
+ return (best_pos + offset if best_pos is not None else None), remove_pos
779
+
780
+ logger.debug(f"Processing hunk #{hunk_idx} with offset {offset}")
781
+
782
+ # Create a unique key for this hunk based on its content
783
+ already_found = False
784
+ hunk_key = (
785
+ tuple(h['old_block']),
786
+ tuple(h['new_lines'])
787
+ )
788
+ if hunk_key in already_applied_hunks:
789
+ continue
790
+
791
+ # First check if this hunk is already applied anywhere in the file
792
+ for pos in range(len(stripped_original)):
793
+ if is_hunk_already_applied(stripped_original, h, pos):
794
+ # Verify we have the exact new content, not just similar content
795
+ window = stripped_original[pos:pos+len(h['new_lines'])]
796
+ if all(line.rstrip() == new_line.rstrip() for line, new_line in zip(window, h['new_lines'])):
797
+ logger.info(f"Hunk #{hunk_idx} already present at position {pos}")
798
+ already_applied_hunks.add(hunk_key)
799
+ logger.debug(f"Verified hunk #{hunk_idx} is already applied")
800
+ already_found = True
801
+ break
802
+ # Content doesn't match exactly, continue looking
803
+ continue
804
+
805
+ if already_found:
806
+ continue
807
+
808
+ # Calculate initial positions
809
+ positions = calculate_initial_positions()
810
+
811
+ # Try strict match first
812
+ strict_ok, remove_pos = try_strict_match(positions)
813
+
814
+ # If strict match fails, try fuzzy match
815
+ if not strict_ok:
816
+ result = try_fuzzy_match(positions)
817
+ if result is None:
818
+ # Skip this hunk as it's already applied
819
+ continue # Skip this hunk (already applied)
820
+ new_pos, old_pos = result
821
+ if new_pos is not None: # Only update position if we got a valid match
822
+ remove_pos = new_pos
823
+
824
+ # Use actual line counts from the blocks
825
+ old_count = len(h['old_block'])
826
+ logger.debug(f"Replacing {old_count} lines with {len(h['new_lines'])} lines at pos={remove_pos}")
827
+
828
+ # Replace exactly the number of lines we counted
829
+ final_lines[remove_pos:remove_pos + old_count] = h['new_lines']
830
+ logger.debug(f" final_lines after insertion: {final_lines}")
831
+
832
+ # Calculate net change based on actual lines removed and added
833
+ actual_removed = min(positions['old_count'], len(h['old_block']))
834
+ logger.debug(f"Removal calculation: min({len(h['old_block'])}, {len(final_lines)} - {remove_pos})")
835
+ logger.debug(f"Old block lines: {h['old_block']}")
836
+ logger.debug(f"New lines: {h['new_lines']}")
837
+ logger.debug(f"Remove position: {remove_pos}")
838
+ logger.debug(f"Final lines length: {len(final_lines)}")
839
+ net_change = len(h['new_lines']) - positions['actual_old_count']
743
840
  offset += net_change
744
841
 
745
- # done all hunks
746
- final_lines = [ln + '\n' for ln in stripped_original]
747
- return final_lines
842
+ # Remove trailing empty line if present
843
+ while final_lines and final_lines[-1] == '':
844
+ final_lines.pop()
845
+
846
+ # Add newlines to all lines
847
+ result_lines = [
848
+ ln + '\n' if not ln.endswith('\n') else ln
849
+ for ln in final_lines
850
+ ]
851
+ logger.debug(f"Final result lines: {result_lines}")
852
+
853
+ return result_lines
748
854
 
749
855
  def strip_leading_dotslash(rel_path: str) -> str:
750
856
  """
751
857
  Remove leading '../' or './' segments from the relative path
752
858
  so it matches patch lines that are always 'frontend/...', not '../frontend/...'.
753
859
  """
754
- import re
860
+
755
861
  # Repeatedly strip leading '../' or './'
756
862
  pattern = re.compile(r'^\.\.?/')
757
863
  while pattern.match(rel_path):
@@ -763,24 +869,19 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> list[d
763
869
  Same logic: we gather old_block and new_lines. If we can't parse anything, we return an empty list.
764
870
  The calling code might handle that or raise an error if no hunks are found.
765
871
  """
766
- import re
872
+
767
873
  lines = diff_content.splitlines()
874
+ logger.debug(f"Parsing diff with {len(lines)} lines:\n{diff_content}")
768
875
  hunks = []
769
876
  current_hunk = None
770
877
  in_hunk = False
771
878
  skip_file = True
879
+ seen_hunks = set()
772
880
 
773
881
  # fixme: import ziya project directory if specified on invocation cli
774
882
  rel_path = os.path.relpath(target_file, os.getcwd())
775
883
  rel_path = strip_leading_dotslash(rel_path)
776
884
 
777
- def close_hunk():
778
- nonlocal current_hunk, in_hunk
779
- if current_hunk:
780
- hunks.append(current_hunk)
781
- current_hunk = None
782
- in_hunk = False
783
-
784
885
  i = 0
785
886
  while i < len(lines):
786
887
  line = lines[i]
@@ -794,68 +895,107 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> list[d
794
895
  i += 1
795
896
  continue
796
897
 
898
+ # Handle index lines and other git metadata
899
+ if line.startswith('index ') or line.startswith('new file mode ') or line.startswith('deleted file mode '):
900
+ i += 1
901
+ continue
902
+
797
903
  if line.startswith('@@ '):
798
- close_hunk()
799
- match = re.match(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@.*$', line)
904
+ match = re.match(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?:\s+Hunk #(\d+))?', line)
905
+ hunk_num = int(match.group(5)) if match and match.group(5) else len(hunks) + 1
800
906
  if match:
801
907
  old_start = int(match.group(1))
802
908
  # Validate line numbers
803
909
  if old_start < 1:
804
910
  logger.warning(f"Invalid hunk header - old_start ({old_start}) < 1")
805
911
  old_start = 1
806
-
912
+
807
913
  # Use default of 1 for count if not specified
808
914
  old_count = int(match.group(2)) if match.group(2) else 1
809
-
915
+
810
916
  new_start = int(match.group(3))
811
917
  new_count = int(match.group(4)) if match.group(4) else 1
812
- current_hunk = {
918
+
919
+ # Use original hunk number if present in header
920
+ if match.group(5):
921
+ hunk_num = int(match.group(5))
922
+
923
+ hunk = {
813
924
  'old_start': old_start,
814
925
  'old_count': old_count,
815
926
  'new_start': new_start,
816
927
  'new_count': new_count,
928
+ 'number': hunk_num,
817
929
  'old_block': [],
930
+ 'original_hunk': hunk_num, # Store original hunk number
818
931
  'new_lines': []
819
932
  }
933
+
934
+ # Start collecting content for this hunk
935
+ current_lines = []
820
936
  in_hunk = True
821
- hunks.append(current_hunk)
937
+ hunks.append(hunk)
938
+ current_hunk = hunk
939
+
822
940
  i += 1
823
941
  continue
824
942
 
825
- if in_hunk and current_hunk:
826
- if line.startswith('-'):
827
- text = line[1:].rstrip('\n')
828
- current_hunk['old_block'].append(text)
829
- elif line.startswith('+'):
830
- text = line[1:].rstrip('\n')
831
- current_hunk['new_lines'].append(text)
832
- else:
833
- # context => belongs to both old_block & new_lines
834
- text = line[1:].rstrip('\n') if line.startswith(' ') else line.rstrip('\n')
835
- current_hunk['old_block'].append(text)
836
- current_hunk['new_lines'].append(text)
837
- i += 1
943
+ seen_hunks = set()
944
+ if in_hunk:
945
+ # End of hunk reached if we see a line that doesn't start with ' ', '+', '-', or '\'
946
+ if not line.startswith((' ', '+', '-', '\\')):
947
+ in_hunk = False
948
+ if current_hunk:
949
+ # Check if this hunk is complete and unique
950
+ if len(current_hunk['old_block']) == current_hunk['old_count'] and \
951
+ len(current_hunk['new_lines']) == current_hunk['new_count']:
952
+ hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
953
+ if hunk_key not in seen_hunks:
954
+ seen_hunks.add(hunk_key)
955
+ hunks.append(current_hunk)
956
+ current_hunk = None
957
+ i += 1
958
+ continue
959
+ if current_hunk:
960
+ if line.startswith('-'):
961
+ text = line[1:]
962
+ current_hunk['old_block'].append(text)
963
+ current_hunk['old_count'] = len(current_hunk['old_block'])
964
+ elif line.startswith('+'):
965
+ text = line[1:]
966
+ current_hunk['new_lines'].append(text)
967
+ current_hunk['new_count'] = len(current_hunk['new_lines'])
968
+ elif line.startswith(' '):
969
+ text = line[1:]
970
+ if (not current_hunk['old_block'] or
971
+ current_hunk['old_block'][-1] != text):
972
+ current_hunk['old_block'].append(text)
973
+ if (not current_hunk['new_lines'] or
974
+ current_hunk['new_lines'][-1] != text):
975
+ current_hunk['new_lines'].append(text)
838
976
 
839
- close_hunk()
840
- if len(hunks) == 0:
841
- raise PatchApplicationError(f"No hunks found in diff for {target_file}", {
842
- 'status': 'no_hunks_found',
843
- 'details': f"Target file path: {target_file}\nDiff content:\n{diff_content[:500]}..."
844
- })
977
+ i += 1
845
978
  return hunks
846
979
 
847
-
848
980
  def find_best_chunk_position(file_lines: list[str], old_block: list[str], approximate_line: int) -> tuple[int, float]:
981
+ # Adjust approximate_line if it's outside file bounds
982
+ if approximate_line >= len(file_lines):
983
+ approximate_line = len(file_lines) - 1
984
+ elif approximate_line < 0:
985
+ approximate_line = 0
986
+
987
+ # Look for exact context matches first
988
+ context_lines = [line for line in old_block if line.startswith(' ')]
989
+
849
990
  """
850
991
  Return (best_pos, best_ratio). If best_ratio < MIN_CONFIDENCE, we raise or handle outside.
851
992
  """
852
993
  block_str = '\n'.join(old_block)
853
994
  file_len = len(file_lines)
854
995
  block_len = len(old_block)
855
-
856
- # search +/- 20 lines
857
- search_start = max(0, approximate_line - 20)
858
- search_end = min(file_len - block_len + 1, approximate_line + 20)
996
+
997
+ search_start = 0
998
+ search_end = file_len - block_len + 1
859
999
  if search_end < search_start:
860
1000
  search_start = 0
861
1001
  search_end = max(0, file_len - block_len + 1)
@@ -865,6 +1005,22 @@ def find_best_chunk_position(file_lines: list[str], old_block: list[str], approx
865
1005
  import difflib
866
1006
  matcher = difflib.SequenceMatcher(None)
867
1007
 
1008
+ # First try exact matches with context
1009
+ for pos in range(search_start, search_end + 1):
1010
+ if pos + block_len > file_len:
1011
+ continue
1012
+
1013
+ # Check if we have an exact match of the first and last lines
1014
+ if (old_block[0] == file_lines[pos] and
1015
+ old_block[-1] == file_lines[pos + len(old_block) - 1]):
1016
+ window = file_lines[pos:pos+block_len]
1017
+ window_str = '\n'.join(window)
1018
+ matcher.set_seqs(block_str, window_str)
1019
+ ratio = matcher.ratio()
1020
+ if ratio > 0.9: # High confidence exact match
1021
+ return pos, ratio
1022
+
1023
+ # If no high-confidence exact match, try fuzzy matching
868
1024
  for pos in range(search_start, search_end + 1):
869
1025
  window = file_lines[pos:pos+block_len]
870
1026
  window_str = '\n'.join(window)
@@ -873,7 +1029,7 @@ def find_best_chunk_position(file_lines: list[str], old_block: list[str], approx
873
1029
  if ratio > best_ratio:
874
1030
  best_ratio = ratio
875
1031
  best_pos = pos
876
- if best_ratio > 0.98:
1032
+ if best_ratio >= 0.98:
877
1033
  break
878
1034
 
879
1035
  logger.debug(f"find_best_chunk_position => best ratio={best_ratio:.2f} at pos={best_pos}, approximate_line={approximate_line}")
@@ -1006,14 +1162,47 @@ def extract_function_name(line: str) -> str:
1006
1162
  after_def = line[4:].split('(')[0]
1007
1163
  return after_def.strip()
1008
1164
 
1165
+ def cleanup_patch_artifacts(base_dir: str, file_path: str) -> None:
1166
+ """
1167
+ Clean up .rej and .orig files that might be left behind by patch application.
1168
+
1169
+ Args:
1170
+ base_dir: The base directory where the codebase is located
1171
+ file_path: The path to the file that was patched
1172
+ """
1173
+ try:
1174
+ # Get the directory containing the file
1175
+ file_dir = os.path.dirname(os.path.join(base_dir, file_path))
1176
+
1177
+ # Find and remove .rej and .orig files
1178
+ for pattern in ['*.rej', '*.orig']:
1179
+ for artifact in glob.glob(os.path.join(file_dir, pattern)):
1180
+ logger.info(f"Removing patch artifact: {artifact}")
1181
+ os.remove(artifact)
1182
+ except Exception as e:
1183
+ logger.warning(f"Error cleaning up patch artifacts: {str(e)}")
1184
+
1009
1185
  def use_git_to_apply_code_diff(git_diff: str, file_path: str) -> None:
1010
1186
  """
1011
1187
  Apply a git diff to the user's codebase.
1012
1188
  Main entry point for patch application.
1189
+
1190
+ If ZIYA_FORCE_DIFFLIB environment variable is set, bypasses system patch
1191
+ and uses difflib directly.
1192
+
1193
+ Args:
1194
+ git_diff (str): The git diff to apply
1195
+ file_path (str): Path to the target file
1013
1196
  """
1014
1197
  logger.info("Starting diff application process...")
1015
1198
  logger.debug("Original diff content:")
1016
1199
  logger.debug(git_diff)
1200
+ changes_written = False
1201
+ results = {
1202
+ "succeeded": [],
1203
+ "failed": [],
1204
+ "already_applied": []
1205
+ }
1017
1206
 
1018
1207
  # Correct the diff using existing functionality
1019
1208
  if file_path:
@@ -1039,107 +1228,344 @@ def use_git_to_apply_code_diff(git_diff: str, file_path: str) -> None:
1039
1228
  # Handle new file creation
1040
1229
  if is_new_file_creation(diff_lines):
1041
1230
  create_new_file(git_diff, user_codebase_dir)
1231
+ cleanup_patch_artifacts(user_codebase_dir, file_path)
1042
1232
  return
1233
+
1234
+ # If force difflib flag is set, skip system patch entirely
1235
+ if os.environ.get('ZIYA_FORCE_DIFFLIB'):
1236
+ logger.info("Force difflib mode enabled, bypassing system patch")
1237
+ try:
1238
+ apply_diff_with_difflib(file_path, git_diff)
1239
+ return
1240
+ except Exception as e:
1241
+ raise PatchApplicationError(str(e), {"status": "error", "type": "difflib_error"})
1242
+
1243
+ results = {"succeeded": [], "already_applied": [], "failed": []}
1244
+
1245
+ # Read original content before any modifications
1246
+ try:
1247
+ with open(file_path, 'r', encoding='utf-8') as f:
1248
+ original_content = f.read()
1249
+ except FileNotFoundError:
1250
+ original_content = ""
1043
1251
 
1044
1252
  try:
1045
- # Try system patch first
1253
+ # Check if file exists before attempting patch
1254
+ if not os.path.exists(file_path) and not is_new_file_creation(diff_lines):
1255
+ raise PatchApplicationError(f"Target file does not exist: {file_path}", {
1256
+ "status": "error",
1257
+ "type": "missing_file",
1258
+ "file": file_path
1259
+ })
1260
+ logger.info("Starting patch application pipeline...")
1046
1261
  logger.debug("About to run patch command with:")
1047
1262
  logger.debug(f"CWD: {user_codebase_dir}")
1048
1263
  logger.debug(f"Input length: {len(git_diff)} bytes")
1264
+ changes_written = False
1265
+ # Do a dry run to see what we're up against on first pass
1049
1266
  patch_result = subprocess.run(
1050
- ['patch', '-p1', '--forward', '--ignore-whitespace', '-i', '-'],
1267
+ ['patch', '-p1', '--forward', '--no-backup-if-mismatch', '--reject-file=-', '--batch', '--ignore-whitespace', '--verbose', '--dry-run', '-i', '-'],
1051
1268
  input=git_diff,
1269
+ encoding='utf-8',
1052
1270
  cwd=user_codebase_dir,
1053
1271
  capture_output=True,
1054
1272
  text=True,
1055
1273
  timeout=10
1056
1274
  )
1057
- logger.debug("Patch command completed with:")
1058
1275
  logger.debug(f"stdout: {patch_result.stdout}")
1059
1276
  logger.debug(f"stderr: {patch_result.stderr}")
1060
-
1061
- if patch_result.returncode == 0:
1062
- logger.info("System patch succeeded")
1063
- return
1064
- elif patch_result.returncode == 2: # Patch failed but gave output
1065
- logger.warning("System patch failed but provided output")
1066
-
1067
- # If patch fails, try git apply
1068
- logger.warning("System patch failed, trying git apply...")
1069
- timestamp = int(time.time() * 1000)
1070
- temp_file = os.path.join(user_codebase_dir, f'temp_{timestamp}.diff')
1071
-
1072
- try:
1073
- with open(temp_file, 'w', newline='\n') as f:
1074
- f.write(git_diff)
1075
-
1076
- git_result = subprocess.run(
1077
- ['git', 'apply', '--verbose', '--ignore-whitespace',
1078
- '--ignore-space-change', '--whitespace=nowarn',
1079
- '--reject', temp_file],
1277
+ logger.debug(f"Return code: {patch_result.returncode}")
1278
+
1279
+ hunk_status = {}
1280
+ patch_output = ""
1281
+ file_was_modified = False
1282
+ has_line_mismatch = False
1283
+ has_large_offset = False
1284
+ has_fuzz = False
1285
+ patch_reports_success = False
1286
+
1287
+ # Parse the dry run output
1288
+ dry_run_status = parse_patch_output(patch_result.stdout)
1289
+ hunk_status = dry_run_status
1290
+ already_applied = (not "No file to patch" in patch_result.stdout and "Reversed (or previously applied)" in patch_result.stdout and
1291
+ "failed" not in patch_result.stdout.lower())
1292
+ logger.debug("Returned from dry run, processing results...")
1293
+ logger.debug(f"Dry run status: {dry_run_status}")
1294
+
1295
+ # If patch indicates changes are already applied, return success
1296
+ if already_applied:
1297
+ logger.info("All changes are already applied")
1298
+ return {"status": "success", "details": {
1299
+ "succeeded": [],
1300
+ "failed": [],
1301
+ "failed": [],
1302
+ "already_applied": list(dry_run_status.keys())
1303
+ }}
1304
+
1305
+ # Apply successful hunks with system patch if any
1306
+ # fixme: we should probably be iterating success only, but this will also hit already applied cases
1307
+ if any(success for success in dry_run_status.values()):
1308
+ logger.info(f"Applying successful hunks ({sum(1 for v in dry_run_status.values() if v)}/{len(dry_run_status)}) with system patch...")
1309
+ patch_result = subprocess.run(
1310
+ ['patch', '-p1', '--forward', '--no-backup-if-mismatch', '--reject-file=-', '--batch', '--ignore-whitespace', '--verbose', '-i', '-'],
1311
+ input=git_diff,
1312
+ encoding='utf-8',
1080
1313
  cwd=user_codebase_dir,
1081
1314
  capture_output=True,
1082
- text=True
1315
+ text=True,
1316
+ timeout=10
1083
1317
  )
1084
1318
 
1085
- if git_result.returncode == 0:
1086
- logger.info("Git apply succeeded")
1319
+ # Actually write the successful changes
1320
+ if "misordered hunks" in patch_result.stderr:
1321
+ logger.warning("Patch reported misordered hunks - falling back to difflib")
1322
+ # Skip to difflib application
1323
+ apply_diff_with_difflib(file_path, git_diff)
1087
1324
  return
1325
+ elif patch_result.returncode == 0:
1326
+ logger.info("Successfully applied some hunks with patch, writing changes")
1327
+ # Verify changes were actually written
1328
+ changes_written = True
1088
1329
 
1089
- if 'patch does not apply' not in git_result.stderr:
1090
- git_diff = extract_remaining_hunks(git_diff, git_result.stderr)
1091
-
1092
- # If both patch and git apply fail, try difflib
1093
- logger.warning("Git apply failed, trying difflib...")
1330
+ else:
1331
+ logger.warning("Patch application had mixed results")
1332
+
1333
+ patch_output = patch_result.stdout
1334
+ logger.debug(f"Raw (system) patch stdout:\n{patch_output}")
1335
+ logger.debug(f"Raw (system) patch stdout:\n{patch_result.stderr}")
1336
+ hunk_status = parse_patch_output(patch_output)
1337
+
1338
+ # Record results from patch stage
1339
+ for hunk_num, success in dry_run_status.items():
1340
+ if success:
1341
+ if "Reversed (or previously applied)" in patch_output and f"Hunk #{hunk_num}" in patch_output:
1342
+ logger.info(f"Hunk #{hunk_num} was already applied")
1343
+ results["already_applied"].append(hunk_num)
1344
+ else:
1345
+ logger.info(f"Hunk #{hunk_num} applied successfully")
1346
+ results["succeeded"].append(hunk_num)
1347
+ changes_written = True
1348
+ else:
1349
+ logger.info(f"Hunk #{hunk_num} failed to apply")
1350
+ results["failed"].append(hunk_num)
1351
+
1352
+ if results["succeeded"] or results["already_applied"]:
1353
+ logger.info(f"Successfully applied {len(results['succeeded'])} hunks, "
1354
+ f"{len(results['already_applied'])} were already applied")
1355
+ changes_written = True
1356
+
1357
+ # If any hunks failed, extract them to pass onto next pipeline stage
1358
+ if results["failed"]:
1359
+ logger.info(f"Extracting {len(results['failed'])} failed hunks for next stage")
1360
+ git_diff = extract_remaining_hunks(git_diff, {h: False for h in results["failed"]})
1361
+ else:
1362
+ logger.info("Exiting pipeline die to full success condition.")
1363
+ return {"status": "success", "details": results}
1364
+
1365
+ # Proceed with git apply if we have any failed hunks
1366
+ if results["failed"]:
1367
+ logger.debug("Some failed hunks reported, processing..")
1368
+ if not git_diff.strip():
1369
+ logger.warning("No valid hunks remaining to process")
1370
+ return {"status": "partial", "details": results}
1371
+ temp_path = None
1372
+ logger.info("Proceeding with git apply for remaining hunks")
1094
1373
  try:
1095
- apply_diff_with_difflib(file_path, git_diff)
1096
- except PatchApplicationError as e:
1097
- if 'available_lines' in e.details:
1098
- logger.warning(
1099
- f"Not enough lines in file to apply patch. "
1100
- f"Requested {e.details['requested_lines']} lines at position {e.details['position']}, "
1101
- f"but only {e.details['available_lines']} lines available after that position."
1102
- )
1103
- elif e.details.get('status') == 'partial':
1104
- logger.warning(f"Partial success: {e.details.get('summary', '')}")
1105
- # Re-raise to let the endpoint handle the partial success
1106
- raise
1374
+ with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', suffix='.diff', delete=False) as temp_file:
1375
+ temp_file.write(git_diff)
1376
+ temp_path = temp_file.name
1377
+
1378
+ git_result = subprocess.run(
1379
+ ['git', 'apply', '--verbose', '--ignore-whitespace',
1380
+ '--ignore-space-change', '--whitespace=nowarn',
1381
+ '--check', temp_path],
1382
+ cwd=user_codebase_dir,
1383
+ capture_output=True,
1384
+ text=True
1385
+ )
1386
+
1387
+ if "patch does not apply" not in git_result.stderr:
1388
+ logger.info("Changes already applied according to git apply --check")
1389
+ return {"status": "success", "details": {
1390
+ "succeeded": [],
1391
+ "failed": [],
1392
+ "already_applied": results["failed"]
1393
+ }}
1394
+
1395
+ git_result = subprocess.run(
1396
+ ['git', 'apply', '--verbose', '--ignore-whitespace',
1397
+ '--ignore-space-change', '--whitespace=nowarn',
1398
+ '--reject', temp_path],
1399
+ cwd=user_codebase_dir,
1400
+ capture_output=True,
1401
+ text=True
1402
+ )
1403
+
1404
+ logger.debug(f"Git apply stdout:\n{git_result.stdout}")
1405
+ logger.debug(f"Git apply stderr:\n{git_result.stderr}")
1406
+
1407
+ if git_result.returncode == 0:
1408
+ logger.info("Git apply succeeded")
1409
+ # Move hunks from failed to succeeded
1410
+ for hunk_num in results["failed"][:]:
1411
+ results["failed"].remove(hunk_num)
1412
+ results["succeeded"].append(hunk_num)
1413
+ changes_written = True
1414
+ return {"status": "success", "details": results}
1415
+ elif "already applied" in git_result.stderr:
1416
+ # Move hunks from failed to already_applied
1417
+ for hunk_num in results["failed"][:]:
1418
+ results["failed"].remove(hunk_num)
1419
+ results["already_applied"].append(hunk_num)
1420
+ logger.info(f"Marking hunk {hunk_num} as already applied and continuing")
1107
1421
  else:
1108
- logger.error(f"Failed to apply changes: {str(e)}")
1109
- raise
1110
- logger.info("Difflib apply succeeded")
1111
- return
1422
+ logger.info("Git apply failed, moving to difflib stage...")
1423
+ # Continue to difflib
1424
+ finally:
1425
+ if os.path.exists(temp_path):
1426
+ os.unlink(temp_path)
1427
+
1428
+ # If git apply failed, try difflib with the same hunks we just tried
1429
+ logger.info("Attempting to apply changes with difflib")
1430
+ try:
1431
+ logger.info("Starting difflib application...")
1432
+ # Parse the remaining hunks for difflib
1433
+ if git_diff:
1434
+ logger.debug(f"Passing to difflib:\n{git_diff}")
1435
+ try:
1436
+ apply_diff_with_difflib(file_path, git_diff)
1437
+ # If difflib succeeds, move remaining failed hunks to succeeded
1438
+ for hunk_num in results["failed"][:]:
1439
+ results["failed"].remove(hunk_num)
1440
+ results["succeeded"].append(hunk_num)
1441
+ changes_written = True
1442
+ return {"status": "success", "details": results}
1443
+ except Exception as e:
1444
+ if isinstance(e, PatchApplicationError) and e.details.get("type") == "already_applied":
1445
+ # Move failed hunks to already_applied
1446
+ for hunk_num in results["failed"][:]:
1447
+ results["failed"].remove(hunk_num)
1448
+ results["already_applied"].append(hunk_num)
1449
+ return {"status": "success", "details": results}
1450
+ logger.error(f"Difflib application failed: {str(e)}")
1451
+ raise
1452
+ except PatchApplicationError as e:
1453
+ logger.error(f"Difflib application failed: {str(e)}")
1454
+ if e.details.get("type") == "already_applied":
1455
+ return {"status": "success", "details": results}
1456
+ if changes_written:
1457
+ return {"status": "partial", "details": results}
1458
+ raise
1459
+ else:
1460
+ logger.debug("Unreachable? No hunks reported failure, exiting pipeline after system patch stage.")
1112
1461
 
1113
- finally:
1114
- if os.path.exists(temp_file):
1115
- os.remove(temp_file)
1116
1462
  except Exception as e:
1117
1463
  logger.error(f"Error applying patch: {str(e)}")
1118
1464
  raise
1465
+ finally:
1466
+ cleanup_patch_artifacts(user_codebase_dir, file_path)
1467
+
1468
+ # Return final status
1469
+ if len(results["failed"]) == 0:
1470
+ return {"status": "success", "details": results}
1471
+ elif changes_written:
1472
+ return {"status": "partial", "details": results}
1473
+ return {"status": "error", "details": results}
1474
+
1475
+ def parse_patch_output(patch_output: str) -> Dict[int, bool]:
1476
+ """Parse patch command output to determine which hunks succeeded/failed.
1477
+ Returns a dict mapping hunk number to success status."""
1478
+ hunk_status = {}
1479
+ logger.debug(f"Parsing patch output:\n{patch_output}")
1480
+
1481
+ in_patch_output = False
1482
+ current_hunk = None
1483
+ for line in patch_output.splitlines():
1484
+ if "Patching file" in line:
1485
+ in_patch_output = True
1486
+ continue
1487
+ if not in_patch_output:
1488
+ continue
1119
1489
 
1120
- def extract_remaining_hunks(git_diff: str, patch_output: str) -> str:
1490
+ # Track the current hunk number
1491
+ hunk_match = re.search(r'Hunk #(\d+)', line)
1492
+ if hunk_match:
1493
+ current_hunk = int(hunk_match.group(1))
1494
+
1495
+ # Check for significant adjustments that should invalidate "success"
1496
+ if current_hunk is not None:
1497
+ if "succeeded at" in line:
1498
+ hunk_status[current_hunk] = True
1499
+ logger.debug(f"Hunk {current_hunk} succeeded")
1500
+ elif "failed" in line:
1501
+ logger.debug(f"Hunk {current_hunk} failed")
1502
+
1503
+ # Match lines like "Hunk #1 succeeded at 6."
1504
+ match = re.search(r'Hunk #(\d+) (succeeded at \d+(?:\s+with fuzz \d+)?|failed)', line)
1505
+ if match:
1506
+ hunk_num = int(match.group(1))
1507
+ # Consider both clean success and fuzzy matches as successful
1508
+ success = 'succeeded' in match.group(2)
1509
+ hunk_status[hunk_num] = success
1510
+ logger.debug(f"Found hunk {hunk_num}: {'succeeded' if success else 'failed'}")
1511
+
1512
+ logger.debug(f"Final hunk status: {hunk_status}")
1513
+ return hunk_status
1514
+
1515
+ def extract_remaining_hunks(git_diff: str, hunk_status: Dict[int,bool]) -> str:
1121
1516
  """Extract hunks that weren't successfully applied."""
1122
1517
  logger.debug("Extracting remaining hunks from diff")
1123
-
1518
+
1519
+ logger.debug(f"Hunk status before extraction: {json.dumps(hunk_status, indent=2)}")
1520
+
1124
1521
  # Parse the original diff into hunks
1125
1522
  lines = git_diff.splitlines()
1126
1523
  hunks = []
1127
1524
  current_hunk = []
1128
-
1525
+ headers = []
1526
+ hunk_count = 0
1527
+ in_hunk = False
1528
+
1129
1529
  for line in lines:
1130
- if line.startswith('@@'):
1530
+ if line.startswith(('diff --git', '--- ', '+++ ')):
1531
+ headers.append(line)
1532
+ elif line.startswith('@@'):
1533
+ hunk_count += 1
1131
1534
  if current_hunk:
1132
- hunks.append(current_hunk)
1133
- current_hunk = [line]
1134
- elif current_hunk is not None:
1535
+ if current_hunk:
1536
+ hunks.append((hunk_count - 1, current_hunk))
1537
+
1538
+ # Only start collecting if this hunk failed
1539
+ if hunk_count in hunk_status and not hunk_status[hunk_count]:
1540
+ logger.debug(f"Including failed hunk #{hunk_count}")
1541
+ current_hunk = [f"{line} Hunk #{hunk_count}"]
1542
+ in_hunk = True
1543
+ else:
1544
+ logger.debug(f"Skipping successful hunk #{hunk_count}")
1545
+ current_hunk = []
1546
+ in_hunk = False
1547
+ elif in_hunk:
1135
1548
  current_hunk.append(line)
1136
-
1549
+ if not line.startswith((' ', '+', '-', '\\')):
1550
+ # End of hunk reached
1551
+ if current_hunk:
1552
+ hunks.append(current_hunk)
1553
+ current_hunk = []
1554
+ in_hunk = False
1555
+
1137
1556
  if current_hunk:
1138
- hunks.append(current_hunk)
1139
-
1140
- # Filter out successfully applied hunks
1141
- remaining_hunks = [hunk for i, hunk in enumerate(hunks, 1)
1142
- if f'Hunk #{i} succeeded' not in patch_output]
1143
-
1144
- return '\n'.join(sum(remaining_hunks, []))
1557
+ hunks.append((hunk_count, current_hunk))
1558
+
1559
+ # Build final result with proper spacing
1560
+ result = []
1561
+ result.extend(headers)
1562
+ for _, hunk_lines in hunks:
1563
+ result.extend(hunk_lines)
1564
+
1565
+ if not result:
1566
+ logger.warning("No hunks to extract")
1567
+ return ''
1145
1568
 
1569
+ final_diff = '\n'.join(result) + '\n'
1570
+ logger.debug(f"Extracted diff for remaining hunks:\n{final_diff}")
1571
+ return final_diff