ziya 0.1.49__py3-none-any.whl → 0.1.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/agents/.agent.py.swp +0 -0
- app/agents/agent.py +315 -113
- app/agents/models.py +439 -0
- app/agents/prompts.py +32 -4
- app/main.py +70 -7
- app/server.py +403 -14
- app/utils/code_util.py +641 -215
- pyproject.toml +2 -3
- templates/asset-manifest.json +18 -20
- templates/index.html +1 -1
- templates/static/css/{main.87f30840.css → main.2bddf34e.css} +2 -2
- templates/static/css/main.2bddf34e.css.map +1 -0
- templates/static/js/46907.90c6a4f3.chunk.js +2 -0
- templates/static/js/46907.90c6a4f3.chunk.js.map +1 -0
- templates/static/js/56122.1d6a5c10.chunk.js +3 -0
- templates/static/js/56122.1d6a5c10.chunk.js.LICENSE.txt +9 -0
- templates/static/js/56122.1d6a5c10.chunk.js.map +1 -0
- templates/static/js/83953.61a908f4.chunk.js +3 -0
- templates/static/js/83953.61a908f4.chunk.js.map +1 -0
- templates/static/js/88261.1e90079d.chunk.js +3 -0
- templates/static/js/88261.1e90079d.chunk.js.map +1 -0
- templates/static/js/{96603.863a8f96.chunk.js → 96603.18c5d644.chunk.js} +2 -2
- templates/static/js/{96603.863a8f96.chunk.js.map → 96603.18c5d644.chunk.js.map} +1 -1
- templates/static/js/{97902.75670155.chunk.js → 97902.d1e262d6.chunk.js} +3 -3
- templates/static/js/{97902.75670155.chunk.js.map → 97902.d1e262d6.chunk.js.map} +1 -1
- templates/static/js/main.9b2b2b57.js +3 -0
- templates/static/js/{main.ee8b3c96.js.LICENSE.txt → main.9b2b2b57.js.LICENSE.txt} +8 -2
- templates/static/js/main.9b2b2b57.js.map +1 -0
- {ziya-0.1.49.dist-info → ziya-0.1.50.dist-info}/METADATA +4 -5
- {ziya-0.1.49.dist-info → ziya-0.1.50.dist-info}/RECORD +36 -35
- templates/static/css/main.87f30840.css.map +0 -1
- templates/static/js/23416.c33f07ab.chunk.js +0 -3
- templates/static/js/23416.c33f07ab.chunk.js.map +0 -1
- templates/static/js/3799.fedb612f.chunk.js +0 -2
- templates/static/js/3799.fedb612f.chunk.js.map +0 -1
- templates/static/js/46907.4a730107.chunk.js +0 -2
- templates/static/js/46907.4a730107.chunk.js.map +0 -1
- templates/static/js/64754.cf383335.chunk.js +0 -2
- templates/static/js/64754.cf383335.chunk.js.map +0 -1
- templates/static/js/88261.33450351.chunk.js +0 -3
- templates/static/js/88261.33450351.chunk.js.map +0 -1
- templates/static/js/main.ee8b3c96.js +0 -3
- templates/static/js/main.ee8b3c96.js.map +0 -1
- /templates/static/js/{23416.c33f07ab.chunk.js.LICENSE.txt → 83953.61a908f4.chunk.js.LICENSE.txt} +0 -0
- /templates/static/js/{88261.33450351.chunk.js.LICENSE.txt → 88261.1e90079d.chunk.js.LICENSE.txt} +0 -0
- /templates/static/js/{97902.75670155.chunk.js.LICENSE.txt → 97902.d1e262d6.chunk.js.LICENSE.txt} +0 -0
- {ziya-0.1.49.dist-info → ziya-0.1.50.dist-info}/LICENSE +0 -0
- {ziya-0.1.49.dist-info → ziya-0.1.50.dist-info}/WHEEL +0 -0
- {ziya-0.1.49.dist-info → ziya-0.1.50.dist-info}/entry_points.txt +0 -0
app/utils/code_util.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import subprocess
|
|
3
3
|
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
import glob
|
|
6
|
+
from itertools import zip_longest
|
|
4
7
|
from io import StringIO
|
|
5
8
|
import time
|
|
6
9
|
from typing import Dict, Optional, Union, List, Tuple, Any
|
|
@@ -9,7 +12,8 @@ import re
|
|
|
9
12
|
from app.utils.logging_utils import logger
|
|
10
13
|
import difflib
|
|
11
14
|
|
|
12
|
-
MIN_CONFIDENCE = 0.
|
|
15
|
+
MIN_CONFIDENCE = 0.72 # what confidence level we cut off forced diff apply after fuzzy match
|
|
16
|
+
MAX_OFFSET = 5 # max allowed line offset before considering a hunk apply failed
|
|
13
17
|
|
|
14
18
|
class PatchApplicationError(Exception):
|
|
15
19
|
"""Custom exception for patch application failures"""
|
|
@@ -62,8 +66,6 @@ def clean_input_diff(diff_content: str) -> str:
|
|
|
62
66
|
minus_seen = 0
|
|
63
67
|
plus_seen = 0
|
|
64
68
|
|
|
65
|
-
import re
|
|
66
|
-
|
|
67
69
|
for line in lines:
|
|
68
70
|
# Reset skip flag on new file header
|
|
69
71
|
if line.startswith('diff --git'):
|
|
@@ -193,12 +195,12 @@ def is_new_file_creation(diff_lines: List[str]) -> bool:
|
|
|
193
195
|
if line.startswith('@@ -0,0'):
|
|
194
196
|
logger.debug("Detected new file from zero hunk marker")
|
|
195
197
|
return True
|
|
196
|
-
|
|
198
|
+
|
|
197
199
|
# Case 2: Empty source file indicator
|
|
198
200
|
if line == '--- /dev/null':
|
|
199
201
|
logger.debug("Detected new file from /dev/null source")
|
|
200
202
|
return True
|
|
201
|
-
|
|
203
|
+
|
|
202
204
|
# Case 3: New file mode
|
|
203
205
|
if 'new file mode' in line:
|
|
204
206
|
logger.debug("Detected new file from mode marker")
|
|
@@ -209,7 +211,7 @@ def is_new_file_creation(diff_lines: List[str]) -> bool:
|
|
|
209
211
|
def create_new_file(git_diff: str, base_dir: str) -> None:
|
|
210
212
|
"""Create a new file from a git diff."""
|
|
211
213
|
logger.info(f"Processing new file diff with length: {len(git_diff)} bytes")
|
|
212
|
-
|
|
214
|
+
|
|
213
215
|
try:
|
|
214
216
|
# Parse the diff content
|
|
215
217
|
diff_lines = git_diff.splitlines()
|
|
@@ -389,7 +391,7 @@ def normalize_whitespace_in_diff(diff_lines: List[str]) -> List[str]:
|
|
|
389
391
|
if line.startswith(('+', '-', ' ')):
|
|
390
392
|
prefix = line[0] # Save the diff marker (+, -, or space)
|
|
391
393
|
content = line[1:] # Get the actual content
|
|
392
|
-
|
|
394
|
+
|
|
393
395
|
# Normalize the content while preserving essential indentation
|
|
394
396
|
normalized = content.rstrip() # Remove trailing whitespace
|
|
395
397
|
if normalized:
|
|
@@ -406,7 +408,7 @@ def correct_git_diff(git_diff: str, original_file_path: str) -> str:
|
|
|
406
408
|
Maintains compatibility with existing function signature.
|
|
407
409
|
"""
|
|
408
410
|
logger.info(f"Processing diff for {original_file_path}")
|
|
409
|
-
|
|
411
|
+
|
|
410
412
|
try:
|
|
411
413
|
|
|
412
414
|
# Clean up the diff content first
|
|
@@ -445,7 +447,7 @@ def correct_git_diff(git_diff: str, original_file_path: str) -> str:
|
|
|
445
447
|
|
|
446
448
|
# Reconstruct normalized diff
|
|
447
449
|
result = headers # start with original headers
|
|
448
|
-
|
|
450
|
+
|
|
449
451
|
# Extract original hunks
|
|
450
452
|
original_hunks = []
|
|
451
453
|
current_hunk = []
|
|
@@ -479,49 +481,11 @@ def correct_git_diff(git_diff: str, original_file_path: str) -> str:
|
|
|
479
481
|
except Exception as e:
|
|
480
482
|
logger.error(f"Error normalizing diff: {str(e)}")
|
|
481
483
|
raise
|
|
482
|
-
|
|
484
|
+
|
|
483
485
|
except Exception as e:
|
|
484
486
|
logger.error(f"Error correcting diff: {str(e)}")
|
|
485
487
|
raise
|
|
486
488
|
|
|
487
|
-
def apply_system_patch(diff_content: str, target_dir: str) -> bool:
|
|
488
|
-
"""
|
|
489
|
-
Apply patch using system patch command.
|
|
490
|
-
Returns True if successful, False otherwise.
|
|
491
|
-
"""
|
|
492
|
-
logger.info("Attempting to apply with system patch command...")
|
|
493
|
-
try:
|
|
494
|
-
# Debug: Log the exact content we're sending to patch
|
|
495
|
-
logger.info("Patch input content:")
|
|
496
|
-
logger.info(diff_content)
|
|
497
|
-
# Ensure we have string input and encode it just once
|
|
498
|
-
if isinstance(diff_content, bytes):
|
|
499
|
-
diff_content = diff_content.decode('utf-8')
|
|
500
|
-
result = subprocess.run(
|
|
501
|
-
['patch', '-p1', '--forward', '--ignore-whitespace', '--verbose'],
|
|
502
|
-
input=diff_content,
|
|
503
|
-
cwd=target_dir,
|
|
504
|
-
capture_output=True,
|
|
505
|
-
text=True,
|
|
506
|
-
timeout=10
|
|
507
|
-
)
|
|
508
|
-
|
|
509
|
-
logger.debug(f"Patch command output: stdout={result.stdout}, stderr={result.stderr}")
|
|
510
|
-
|
|
511
|
-
# If any hunks were successfully applied, we need to modify the diff
|
|
512
|
-
if result and 'Hunk #1 succeeded' in result.stderr:
|
|
513
|
-
logger.debug("Some hunks succeeded, extracting remaining hunks")
|
|
514
|
-
git_diff = extract_remaining_hunks(git_diff, patch_result.stderr)
|
|
515
|
-
logger.info(f"Patch stdout: {result.stdout}")
|
|
516
|
-
logger.info(f"Patch stderr: {result.stderr}")
|
|
517
|
-
success = result.returncode == 0
|
|
518
|
-
logger.info(f"Patch {'succeeded' if success else 'failed'} with return code {result.returncode}")
|
|
519
|
-
return success, result
|
|
520
|
-
except Exception as e:
|
|
521
|
-
logger.error(f"System patch error output: {str(e)}")
|
|
522
|
-
logger.error(f"System patch failed: {str(e)}")
|
|
523
|
-
return False
|
|
524
|
-
|
|
525
489
|
def validate_and_fix_diff(diff_content: str) -> str:
|
|
526
490
|
"""
|
|
527
491
|
Validate diff format and ensure it has all required components.
|
|
@@ -663,95 +627,237 @@ def apply_diff_with_difflib(file_path: str, diff_content: str) -> None:
|
|
|
663
627
|
# 2) apply forced-hybrid logic with error throwing
|
|
664
628
|
final_lines = apply_diff_with_difflib_hybrid_forced(file_path, diff_content, original_lines)
|
|
665
629
|
|
|
666
|
-
# 3) write result
|
|
630
|
+
# 3) write result back to file
|
|
667
631
|
with open(file_path, 'w', encoding='utf-8') as f:
|
|
668
632
|
f.writelines(final_lines)
|
|
633
|
+
logger.info(
|
|
634
|
+
f"Successfully applied forced-hybrid diff (with exceptions on mismatch) to {file_path}. "
|
|
635
|
+
f"Wrote {len(final_lines)} lines."
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
def is_hunk_already_applied(file_lines: List[str], hunk: Dict[str, Any], pos: int) -> bool:
|
|
639
|
+
"""
|
|
640
|
+
Check if a hunk has already been applied at the given position.
|
|
641
|
+
Returns True only if ALL changes in the hunk are already present.
|
|
642
|
+
Checks if the target state matches exactly.
|
|
643
|
+
"""
|
|
644
|
+
if pos >= len(file_lines):
|
|
645
|
+
logger.debug(f"Position {pos} beyond file length {len(file_lines)}")
|
|
646
|
+
return False
|
|
647
|
+
|
|
648
|
+
# Get the lines we're working with
|
|
649
|
+
window_size = max(len(hunk['old_block']), len(hunk['new_lines']))
|
|
650
|
+
available_lines = file_lines[pos:pos + window_size]
|
|
651
|
+
|
|
652
|
+
# Count actual changes needed (excluding context lines)
|
|
653
|
+
changes_needed = 0
|
|
654
|
+
changes_found = 0
|
|
655
|
+
|
|
656
|
+
# Map of line positions to their expected states
|
|
657
|
+
expected_states = {}
|
|
658
|
+
|
|
659
|
+
for old_line, new_line in zip_longest(hunk['old_block'], hunk['new_lines'], fillvalue=None):
|
|
660
|
+
if old_line != new_line:
|
|
661
|
+
changes_needed += 1
|
|
662
|
+
|
|
663
|
+
# Check each line in the window
|
|
664
|
+
for i, actual_line in enumerate(available_lines):
|
|
665
|
+
if i < len(hunk['new_lines']):
|
|
666
|
+
new_line = hunk['new_lines'][i]
|
|
667
|
+
old_line = hunk['old_block'][i] if i < len(hunk['old_block']) else None
|
|
668
|
+
|
|
669
|
+
# Line matches target state
|
|
670
|
+
if actual_line.rstrip() == new_line.rstrip():
|
|
671
|
+
changes_found += 1
|
|
672
|
+
continue
|
|
673
|
+
|
|
674
|
+
# Line matches original state and needs change
|
|
675
|
+
if old_line and actual_line.rstrip() == old_line.rstrip():
|
|
676
|
+
# This is a line that still needs changing
|
|
677
|
+
continue
|
|
678
|
+
|
|
679
|
+
# Line doesn't match either state
|
|
680
|
+
return False
|
|
681
|
+
|
|
682
|
+
# Calculate what percentage of changes are already applied
|
|
683
|
+
if changes_needed > 0 and changes_found > 0:
|
|
684
|
+
applied_ratio = changes_found / changes_needed
|
|
685
|
+
logger.debug(f"Hunk changes: needed={changes_needed}, found={changes_found}, ratio={applied_ratio:.2f}")
|
|
669
686
|
|
|
670
|
-
|
|
687
|
+
# Consider it applied if we found all changes
|
|
688
|
+
if applied_ratio >= 1.0: # Must match exactly, or have all needed changes+
|
|
689
|
+
logger.debug(f"All changes already present at pos {pos}")
|
|
690
|
+
return True
|
|
691
|
+
elif applied_ratio > 0:
|
|
692
|
+
logger.debug(f"Partial changes found ({applied_ratio:.2f}) - will apply remaining changes")
|
|
693
|
+
return False
|
|
694
|
+
|
|
695
|
+
# If we get here, no changes were found
|
|
696
|
+
if changes_needed > 0:
|
|
697
|
+
return False
|
|
671
698
|
|
|
699
|
+
# Default case - nothing to apply
|
|
700
|
+
logger.debug("No changes needed")
|
|
701
|
+
return True
|
|
672
702
|
|
|
673
703
|
def apply_diff_with_difflib_hybrid_forced(file_path: str, diff_content: str, original_lines: list[str]) -> list[str]:
|
|
674
704
|
# parse hunks
|
|
675
|
-
hunks = parse_unified_diff_exact_plus(diff_content, file_path)
|
|
705
|
+
hunks = list(parse_unified_diff_exact_plus(diff_content, file_path))
|
|
706
|
+
logger.debug(f"Parsed hunks for difflib: {json.dumps([{'old_start': h['old_start'], 'old_count': len(h['old_block']), 'new_start': h['new_start'], 'new_count': len(h['new_lines'])} for h in hunks], indent=2)}")
|
|
707
|
+
already_applied_hunks = set()
|
|
676
708
|
stripped_original = [ln.rstrip('\n') for ln in original_lines]
|
|
677
709
|
|
|
710
|
+
final_lines = stripped_original.copy()
|
|
678
711
|
offset = 0
|
|
712
|
+
applied_content = set()
|
|
679
713
|
for hunk_idx, h in enumerate(hunks, start=1):
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
714
|
+
def calculate_initial_positions():
|
|
715
|
+
"""Calculate initial positions and counts for the hunk."""
|
|
716
|
+
old_start = h['old_start'] - 1
|
|
717
|
+
old_count = h['old_count']
|
|
718
|
+
initial_remove_pos = clamp(old_start + offset, 0, len(final_lines))
|
|
719
|
+
|
|
720
|
+
# Adjust counts based on available lines
|
|
721
|
+
available_lines = len(final_lines) - initial_remove_pos
|
|
722
|
+
actual_old_count = min(old_count, available_lines)
|
|
723
|
+
end_remove = initial_remove_pos + actual_old_count
|
|
724
|
+
|
|
725
|
+
# Final position adjustment
|
|
726
|
+
remove_pos = clamp(initial_remove_pos, 0, len(stripped_original) - 1)
|
|
727
|
+
|
|
728
|
+
return {
|
|
729
|
+
'remove_pos': remove_pos,
|
|
730
|
+
'old_count': old_count,
|
|
731
|
+
'actual_old_count': actual_old_count,
|
|
732
|
+
'end_remove': end_remove
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
def try_strict_match(positions):
|
|
736
|
+
"""Attempt a strict match of the hunk content."""
|
|
737
|
+
remove_pos = positions['remove_pos']
|
|
738
|
+
|
|
739
|
+
if remove_pos + len(h['old_block']) <= len(final_lines):
|
|
740
|
+
file_slice = final_lines[remove_pos : remove_pos + positions['old_count']]
|
|
741
|
+
if h['old_block'] and len(h['old_block']) >= positions['actual_old_count']:
|
|
742
|
+
old_block_minus = h['old_block'][:positions['old_count']]
|
|
743
|
+
if file_slice == old_block_minus:
|
|
744
|
+
logger.debug(f"Hunk #{hunk_idx}: strict match at pos={remove_pos}")
|
|
745
|
+
return True, remove_pos
|
|
702
746
|
logger.debug(f"Hunk #{hunk_idx}: strict match failed at pos={remove_pos}")
|
|
703
|
-
|
|
704
|
-
|
|
747
|
+
else:
|
|
748
|
+
logger.debug(f"Hunk #{hunk_idx}: old_block is smaller than old_count => strict match not possible")
|
|
749
|
+
return False, remove_pos
|
|
705
750
|
|
|
706
|
-
|
|
707
|
-
|
|
751
|
+
def try_fuzzy_match(positions):
|
|
752
|
+
"""Attempt a fuzzy match if strict match fails."""
|
|
753
|
+
remove_pos = positions['remove_pos']
|
|
708
754
|
logger.debug(f"Hunk #{hunk_idx}: Attempting fuzzy near line {remove_pos}")
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
755
|
+
|
|
756
|
+
best_pos, best_ratio = find_best_chunk_position(stripped_original, h['old_block'], remove_pos)
|
|
757
|
+
|
|
758
|
+
# First check if changes are already applied (with high confidence threshold)
|
|
759
|
+
if any(new_line in stripped_original for new_line in h['new_lines']):
|
|
760
|
+
already_applied = sum(1 for line in h['new_lines'] if line in stripped_original)
|
|
761
|
+
if already_applied / len(h['new_lines']) >= 0.98: # Require near-exact match
|
|
762
|
+
logger.info(f"Hunk #{hunk_idx} appears to be already applied")
|
|
763
|
+
return None, remove_pos # Signal skip to next hunk
|
|
764
|
+
|
|
765
|
+
# Then check if we have enough confidence in our match position
|
|
766
|
+
if best_ratio <= MIN_CONFIDENCE:
|
|
712
767
|
msg = (f"Hunk #{hunk_idx} => low confidence match (ratio={best_ratio:.2f}) near {remove_pos}, "
|
|
713
768
|
f"can't safely apply chunk. Failing.")
|
|
714
769
|
logger.error(msg)
|
|
715
|
-
raise PatchApplicationError(msg
|
|
770
|
+
raise PatchApplicationError(msg, {
|
|
771
|
+
"status": "error",
|
|
772
|
+
"type": "low_confidence",
|
|
773
|
+
"hunk": hunk_idx,
|
|
774
|
+
"confidence": best_ratio
|
|
775
|
+
})
|
|
776
|
+
|
|
716
777
|
logger.debug(f"Hunk #{hunk_idx}: fuzzy best pos={best_pos}, ratio={best_ratio:.2f}")
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
778
|
+
return (best_pos + offset if best_pos is not None else None), remove_pos
|
|
779
|
+
|
|
780
|
+
logger.debug(f"Processing hunk #{hunk_idx} with offset {offset}")
|
|
781
|
+
|
|
782
|
+
# Create a unique key for this hunk based on its content
|
|
783
|
+
already_found = False
|
|
784
|
+
hunk_key = (
|
|
785
|
+
tuple(h['old_block']),
|
|
786
|
+
tuple(h['new_lines'])
|
|
787
|
+
)
|
|
788
|
+
if hunk_key in already_applied_hunks:
|
|
789
|
+
continue
|
|
790
|
+
|
|
791
|
+
# First check if this hunk is already applied anywhere in the file
|
|
792
|
+
for pos in range(len(stripped_original)):
|
|
793
|
+
if is_hunk_already_applied(stripped_original, h, pos):
|
|
794
|
+
# Verify we have the exact new content, not just similar content
|
|
795
|
+
window = stripped_original[pos:pos+len(h['new_lines'])]
|
|
796
|
+
if all(line.rstrip() == new_line.rstrip() for line, new_line in zip(window, h['new_lines'])):
|
|
797
|
+
logger.info(f"Hunk #{hunk_idx} already present at position {pos}")
|
|
798
|
+
already_applied_hunks.add(hunk_key)
|
|
799
|
+
logger.debug(f"Verified hunk #{hunk_idx} is already applied")
|
|
800
|
+
already_found = True
|
|
801
|
+
break
|
|
802
|
+
# Content doesn't match exactly, continue looking
|
|
803
|
+
continue
|
|
804
|
+
|
|
805
|
+
if already_found:
|
|
806
|
+
continue
|
|
807
|
+
|
|
808
|
+
# Calculate initial positions
|
|
809
|
+
positions = calculate_initial_positions()
|
|
810
|
+
|
|
811
|
+
# Try strict match first
|
|
812
|
+
strict_ok, remove_pos = try_strict_match(positions)
|
|
813
|
+
|
|
814
|
+
# If strict match fails, try fuzzy match
|
|
815
|
+
if not strict_ok:
|
|
816
|
+
result = try_fuzzy_match(positions)
|
|
817
|
+
if result is None:
|
|
818
|
+
# Skip this hunk as it's already applied
|
|
819
|
+
continue # Skip this hunk (already applied)
|
|
820
|
+
new_pos, old_pos = result
|
|
821
|
+
if new_pos is not None: # Only update position if we got a valid match
|
|
822
|
+
remove_pos = new_pos
|
|
823
|
+
|
|
824
|
+
# Use actual line counts from the blocks
|
|
825
|
+
old_count = len(h['old_block'])
|
|
826
|
+
logger.debug(f"Replacing {old_count} lines with {len(h['new_lines'])} lines at pos={remove_pos}")
|
|
827
|
+
|
|
828
|
+
# Replace exactly the number of lines we counted
|
|
829
|
+
final_lines[remove_pos:remove_pos + old_count] = h['new_lines']
|
|
830
|
+
logger.debug(f" final_lines after insertion: {final_lines}")
|
|
831
|
+
|
|
832
|
+
# Calculate net change based on actual lines removed and added
|
|
833
|
+
actual_removed = min(positions['old_count'], len(h['old_block']))
|
|
834
|
+
logger.debug(f"Removal calculation: min({len(h['old_block'])}, {len(final_lines)} - {remove_pos})")
|
|
835
|
+
logger.debug(f"Old block lines: {h['old_block']}")
|
|
836
|
+
logger.debug(f"New lines: {h['new_lines']}")
|
|
837
|
+
logger.debug(f"Remove position: {remove_pos}")
|
|
838
|
+
logger.debug(f"Final lines length: {len(final_lines)}")
|
|
839
|
+
net_change = len(h['new_lines']) - positions['actual_old_count']
|
|
743
840
|
offset += net_change
|
|
744
841
|
|
|
745
|
-
#
|
|
746
|
-
final_lines
|
|
747
|
-
|
|
842
|
+
# Remove trailing empty line if present
|
|
843
|
+
while final_lines and final_lines[-1] == '':
|
|
844
|
+
final_lines.pop()
|
|
845
|
+
|
|
846
|
+
# Add newlines to all lines
|
|
847
|
+
result_lines = [
|
|
848
|
+
ln + '\n' if not ln.endswith('\n') else ln
|
|
849
|
+
for ln in final_lines
|
|
850
|
+
]
|
|
851
|
+
logger.debug(f"Final result lines: {result_lines}")
|
|
852
|
+
|
|
853
|
+
return result_lines
|
|
748
854
|
|
|
749
855
|
def strip_leading_dotslash(rel_path: str) -> str:
|
|
750
856
|
"""
|
|
751
857
|
Remove leading '../' or './' segments from the relative path
|
|
752
858
|
so it matches patch lines that are always 'frontend/...', not '../frontend/...'.
|
|
753
859
|
"""
|
|
754
|
-
|
|
860
|
+
|
|
755
861
|
# Repeatedly strip leading '../' or './'
|
|
756
862
|
pattern = re.compile(r'^\.\.?/')
|
|
757
863
|
while pattern.match(rel_path):
|
|
@@ -763,24 +869,19 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> list[d
|
|
|
763
869
|
Same logic: we gather old_block and new_lines. If we can't parse anything, we return an empty list.
|
|
764
870
|
The calling code might handle that or raise an error if no hunks are found.
|
|
765
871
|
"""
|
|
766
|
-
|
|
872
|
+
|
|
767
873
|
lines = diff_content.splitlines()
|
|
874
|
+
logger.debug(f"Parsing diff with {len(lines)} lines:\n{diff_content}")
|
|
768
875
|
hunks = []
|
|
769
876
|
current_hunk = None
|
|
770
877
|
in_hunk = False
|
|
771
878
|
skip_file = True
|
|
879
|
+
seen_hunks = set()
|
|
772
880
|
|
|
773
881
|
# fixme: import ziya project directory if specified on invocation cli
|
|
774
882
|
rel_path = os.path.relpath(target_file, os.getcwd())
|
|
775
883
|
rel_path = strip_leading_dotslash(rel_path)
|
|
776
884
|
|
|
777
|
-
def close_hunk():
|
|
778
|
-
nonlocal current_hunk, in_hunk
|
|
779
|
-
if current_hunk:
|
|
780
|
-
hunks.append(current_hunk)
|
|
781
|
-
current_hunk = None
|
|
782
|
-
in_hunk = False
|
|
783
|
-
|
|
784
885
|
i = 0
|
|
785
886
|
while i < len(lines):
|
|
786
887
|
line = lines[i]
|
|
@@ -794,68 +895,107 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> list[d
|
|
|
794
895
|
i += 1
|
|
795
896
|
continue
|
|
796
897
|
|
|
898
|
+
# Handle index lines and other git metadata
|
|
899
|
+
if line.startswith('index ') or line.startswith('new file mode ') or line.startswith('deleted file mode '):
|
|
900
|
+
i += 1
|
|
901
|
+
continue
|
|
902
|
+
|
|
797
903
|
if line.startswith('@@ '):
|
|
798
|
-
|
|
799
|
-
|
|
904
|
+
match = re.match(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?:\s+Hunk #(\d+))?', line)
|
|
905
|
+
hunk_num = int(match.group(5)) if match and match.group(5) else len(hunks) + 1
|
|
800
906
|
if match:
|
|
801
907
|
old_start = int(match.group(1))
|
|
802
908
|
# Validate line numbers
|
|
803
909
|
if old_start < 1:
|
|
804
910
|
logger.warning(f"Invalid hunk header - old_start ({old_start}) < 1")
|
|
805
911
|
old_start = 1
|
|
806
|
-
|
|
912
|
+
|
|
807
913
|
# Use default of 1 for count if not specified
|
|
808
914
|
old_count = int(match.group(2)) if match.group(2) else 1
|
|
809
|
-
|
|
915
|
+
|
|
810
916
|
new_start = int(match.group(3))
|
|
811
917
|
new_count = int(match.group(4)) if match.group(4) else 1
|
|
812
|
-
|
|
918
|
+
|
|
919
|
+
# Use original hunk number if present in header
|
|
920
|
+
if match.group(5):
|
|
921
|
+
hunk_num = int(match.group(5))
|
|
922
|
+
|
|
923
|
+
hunk = {
|
|
813
924
|
'old_start': old_start,
|
|
814
925
|
'old_count': old_count,
|
|
815
926
|
'new_start': new_start,
|
|
816
927
|
'new_count': new_count,
|
|
928
|
+
'number': hunk_num,
|
|
817
929
|
'old_block': [],
|
|
930
|
+
'original_hunk': hunk_num, # Store original hunk number
|
|
818
931
|
'new_lines': []
|
|
819
932
|
}
|
|
933
|
+
|
|
934
|
+
# Start collecting content for this hunk
|
|
935
|
+
current_lines = []
|
|
820
936
|
in_hunk = True
|
|
821
|
-
hunks.append(
|
|
937
|
+
hunks.append(hunk)
|
|
938
|
+
current_hunk = hunk
|
|
939
|
+
|
|
822
940
|
i += 1
|
|
823
941
|
continue
|
|
824
942
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
943
|
+
seen_hunks = set()
|
|
944
|
+
if in_hunk:
|
|
945
|
+
# End of hunk reached if we see a line that doesn't start with ' ', '+', '-', or '\'
|
|
946
|
+
if not line.startswith((' ', '+', '-', '\\')):
|
|
947
|
+
in_hunk = False
|
|
948
|
+
if current_hunk:
|
|
949
|
+
# Check if this hunk is complete and unique
|
|
950
|
+
if len(current_hunk['old_block']) == current_hunk['old_count'] and \
|
|
951
|
+
len(current_hunk['new_lines']) == current_hunk['new_count']:
|
|
952
|
+
hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
|
|
953
|
+
if hunk_key not in seen_hunks:
|
|
954
|
+
seen_hunks.add(hunk_key)
|
|
955
|
+
hunks.append(current_hunk)
|
|
956
|
+
current_hunk = None
|
|
957
|
+
i += 1
|
|
958
|
+
continue
|
|
959
|
+
if current_hunk:
|
|
960
|
+
if line.startswith('-'):
|
|
961
|
+
text = line[1:]
|
|
962
|
+
current_hunk['old_block'].append(text)
|
|
963
|
+
current_hunk['old_count'] = len(current_hunk['old_block'])
|
|
964
|
+
elif line.startswith('+'):
|
|
965
|
+
text = line[1:]
|
|
966
|
+
current_hunk['new_lines'].append(text)
|
|
967
|
+
current_hunk['new_count'] = len(current_hunk['new_lines'])
|
|
968
|
+
elif line.startswith(' '):
|
|
969
|
+
text = line[1:]
|
|
970
|
+
if (not current_hunk['old_block'] or
|
|
971
|
+
current_hunk['old_block'][-1] != text):
|
|
972
|
+
current_hunk['old_block'].append(text)
|
|
973
|
+
if (not current_hunk['new_lines'] or
|
|
974
|
+
current_hunk['new_lines'][-1] != text):
|
|
975
|
+
current_hunk['new_lines'].append(text)
|
|
838
976
|
|
|
839
|
-
|
|
840
|
-
if len(hunks) == 0:
|
|
841
|
-
raise PatchApplicationError(f"No hunks found in diff for {target_file}", {
|
|
842
|
-
'status': 'no_hunks_found',
|
|
843
|
-
'details': f"Target file path: {target_file}\nDiff content:\n{diff_content[:500]}..."
|
|
844
|
-
})
|
|
977
|
+
i += 1
|
|
845
978
|
return hunks
|
|
846
979
|
|
|
847
|
-
|
|
848
980
|
def find_best_chunk_position(file_lines: list[str], old_block: list[str], approximate_line: int) -> tuple[int, float]:
|
|
981
|
+
# Adjust approximate_line if it's outside file bounds
|
|
982
|
+
if approximate_line >= len(file_lines):
|
|
983
|
+
approximate_line = len(file_lines) - 1
|
|
984
|
+
elif approximate_line < 0:
|
|
985
|
+
approximate_line = 0
|
|
986
|
+
|
|
987
|
+
# Look for exact context matches first
|
|
988
|
+
context_lines = [line for line in old_block if line.startswith(' ')]
|
|
989
|
+
|
|
849
990
|
"""
|
|
850
991
|
Return (best_pos, best_ratio). If best_ratio < MIN_CONFIDENCE, we raise or handle outside.
|
|
851
992
|
"""
|
|
852
993
|
block_str = '\n'.join(old_block)
|
|
853
994
|
file_len = len(file_lines)
|
|
854
995
|
block_len = len(old_block)
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
search_end = min(file_len - block_len + 1, approximate_line + 20)
|
|
996
|
+
|
|
997
|
+
search_start = 0
|
|
998
|
+
search_end = file_len - block_len + 1
|
|
859
999
|
if search_end < search_start:
|
|
860
1000
|
search_start = 0
|
|
861
1001
|
search_end = max(0, file_len - block_len + 1)
|
|
@@ -865,6 +1005,22 @@ def find_best_chunk_position(file_lines: list[str], old_block: list[str], approx
|
|
|
865
1005
|
import difflib
|
|
866
1006
|
matcher = difflib.SequenceMatcher(None)
|
|
867
1007
|
|
|
1008
|
+
# First try exact matches with context
|
|
1009
|
+
for pos in range(search_start, search_end + 1):
|
|
1010
|
+
if pos + block_len > file_len:
|
|
1011
|
+
continue
|
|
1012
|
+
|
|
1013
|
+
# Check if we have an exact match of the first and last lines
|
|
1014
|
+
if (old_block[0] == file_lines[pos] and
|
|
1015
|
+
old_block[-1] == file_lines[pos + len(old_block) - 1]):
|
|
1016
|
+
window = file_lines[pos:pos+block_len]
|
|
1017
|
+
window_str = '\n'.join(window)
|
|
1018
|
+
matcher.set_seqs(block_str, window_str)
|
|
1019
|
+
ratio = matcher.ratio()
|
|
1020
|
+
if ratio > 0.9: # High confidence exact match
|
|
1021
|
+
return pos, ratio
|
|
1022
|
+
|
|
1023
|
+
# If no high-confidence exact match, try fuzzy matching
|
|
868
1024
|
for pos in range(search_start, search_end + 1):
|
|
869
1025
|
window = file_lines[pos:pos+block_len]
|
|
870
1026
|
window_str = '\n'.join(window)
|
|
@@ -873,7 +1029,7 @@ def find_best_chunk_position(file_lines: list[str], old_block: list[str], approx
|
|
|
873
1029
|
if ratio > best_ratio:
|
|
874
1030
|
best_ratio = ratio
|
|
875
1031
|
best_pos = pos
|
|
876
|
-
if best_ratio
|
|
1032
|
+
if best_ratio >= 0.98:
|
|
877
1033
|
break
|
|
878
1034
|
|
|
879
1035
|
logger.debug(f"find_best_chunk_position => best ratio={best_ratio:.2f} at pos={best_pos}, approximate_line={approximate_line}")
|
|
@@ -1006,14 +1162,47 @@ def extract_function_name(line: str) -> str:
|
|
|
1006
1162
|
after_def = line[4:].split('(')[0]
|
|
1007
1163
|
return after_def.strip()
|
|
1008
1164
|
|
|
1165
|
+
def cleanup_patch_artifacts(base_dir: str, file_path: str) -> None:
|
|
1166
|
+
"""
|
|
1167
|
+
Clean up .rej and .orig files that might be left behind by patch application.
|
|
1168
|
+
|
|
1169
|
+
Args:
|
|
1170
|
+
base_dir: The base directory where the codebase is located
|
|
1171
|
+
file_path: The path to the file that was patched
|
|
1172
|
+
"""
|
|
1173
|
+
try:
|
|
1174
|
+
# Get the directory containing the file
|
|
1175
|
+
file_dir = os.path.dirname(os.path.join(base_dir, file_path))
|
|
1176
|
+
|
|
1177
|
+
# Find and remove .rej and .orig files
|
|
1178
|
+
for pattern in ['*.rej', '*.orig']:
|
|
1179
|
+
for artifact in glob.glob(os.path.join(file_dir, pattern)):
|
|
1180
|
+
logger.info(f"Removing patch artifact: {artifact}")
|
|
1181
|
+
os.remove(artifact)
|
|
1182
|
+
except Exception as e:
|
|
1183
|
+
logger.warning(f"Error cleaning up patch artifacts: {str(e)}")
|
|
1184
|
+
|
|
1009
1185
|
def use_git_to_apply_code_diff(git_diff: str, file_path: str) -> None:
|
|
1010
1186
|
"""
|
|
1011
1187
|
Apply a git diff to the user's codebase.
|
|
1012
1188
|
Main entry point for patch application.
|
|
1189
|
+
|
|
1190
|
+
If ZIYA_FORCE_DIFFLIB environment variable is set, bypasses system patch
|
|
1191
|
+
and uses difflib directly.
|
|
1192
|
+
|
|
1193
|
+
Args:
|
|
1194
|
+
git_diff (str): The git diff to apply
|
|
1195
|
+
file_path (str): Path to the target file
|
|
1013
1196
|
"""
|
|
1014
1197
|
logger.info("Starting diff application process...")
|
|
1015
1198
|
logger.debug("Original diff content:")
|
|
1016
1199
|
logger.debug(git_diff)
|
|
1200
|
+
changes_written = False
|
|
1201
|
+
results = {
|
|
1202
|
+
"succeeded": [],
|
|
1203
|
+
"failed": [],
|
|
1204
|
+
"already_applied": []
|
|
1205
|
+
}
|
|
1017
1206
|
|
|
1018
1207
|
# Correct the diff using existing functionality
|
|
1019
1208
|
if file_path:
|
|
@@ -1039,107 +1228,344 @@ def use_git_to_apply_code_diff(git_diff: str, file_path: str) -> None:
|
|
|
1039
1228
|
# Handle new file creation
|
|
1040
1229
|
if is_new_file_creation(diff_lines):
|
|
1041
1230
|
create_new_file(git_diff, user_codebase_dir)
|
|
1231
|
+
cleanup_patch_artifacts(user_codebase_dir, file_path)
|
|
1042
1232
|
return
|
|
1233
|
+
|
|
1234
|
+
# If force difflib flag is set, skip system patch entirely
|
|
1235
|
+
if os.environ.get('ZIYA_FORCE_DIFFLIB'):
|
|
1236
|
+
logger.info("Force difflib mode enabled, bypassing system patch")
|
|
1237
|
+
try:
|
|
1238
|
+
apply_diff_with_difflib(file_path, git_diff)
|
|
1239
|
+
return
|
|
1240
|
+
except Exception as e:
|
|
1241
|
+
raise PatchApplicationError(str(e), {"status": "error", "type": "difflib_error"})
|
|
1242
|
+
|
|
1243
|
+
results = {"succeeded": [], "already_applied": [], "failed": []}
|
|
1244
|
+
|
|
1245
|
+
# Read original content before any modifications
|
|
1246
|
+
try:
|
|
1247
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
1248
|
+
original_content = f.read()
|
|
1249
|
+
except FileNotFoundError:
|
|
1250
|
+
original_content = ""
|
|
1043
1251
|
|
|
1044
1252
|
try:
|
|
1045
|
-
#
|
|
1253
|
+
# Check if file exists before attempting patch
|
|
1254
|
+
if not os.path.exists(file_path) and not is_new_file_creation(diff_lines):
|
|
1255
|
+
raise PatchApplicationError(f"Target file does not exist: {file_path}", {
|
|
1256
|
+
"status": "error",
|
|
1257
|
+
"type": "missing_file",
|
|
1258
|
+
"file": file_path
|
|
1259
|
+
})
|
|
1260
|
+
logger.info("Starting patch application pipeline...")
|
|
1046
1261
|
logger.debug("About to run patch command with:")
|
|
1047
1262
|
logger.debug(f"CWD: {user_codebase_dir}")
|
|
1048
1263
|
logger.debug(f"Input length: {len(git_diff)} bytes")
|
|
1264
|
+
changes_written = False
|
|
1265
|
+
# Do a dry run to see what we're up against on first pass
|
|
1049
1266
|
patch_result = subprocess.run(
|
|
1050
|
-
['patch', '-p1', '--forward', '--ignore-whitespace', '-i', '-'],
|
|
1267
|
+
['patch', '-p1', '--forward', '--no-backup-if-mismatch', '--reject-file=-', '--batch', '--ignore-whitespace', '--verbose', '--dry-run', '-i', '-'],
|
|
1051
1268
|
input=git_diff,
|
|
1269
|
+
encoding='utf-8',
|
|
1052
1270
|
cwd=user_codebase_dir,
|
|
1053
1271
|
capture_output=True,
|
|
1054
1272
|
text=True,
|
|
1055
1273
|
timeout=10
|
|
1056
1274
|
)
|
|
1057
|
-
logger.debug("Patch command completed with:")
|
|
1058
1275
|
logger.debug(f"stdout: {patch_result.stdout}")
|
|
1059
1276
|
logger.debug(f"stderr: {patch_result.stderr}")
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1277
|
+
logger.debug(f"Return code: {patch_result.returncode}")
|
|
1278
|
+
|
|
1279
|
+
hunk_status = {}
|
|
1280
|
+
patch_output = ""
|
|
1281
|
+
file_was_modified = False
|
|
1282
|
+
has_line_mismatch = False
|
|
1283
|
+
has_large_offset = False
|
|
1284
|
+
has_fuzz = False
|
|
1285
|
+
patch_reports_success = False
|
|
1286
|
+
|
|
1287
|
+
# Parse the dry run output
|
|
1288
|
+
dry_run_status = parse_patch_output(patch_result.stdout)
|
|
1289
|
+
hunk_status = dry_run_status
|
|
1290
|
+
already_applied = (not "No file to patch" in patch_result.stdout and "Reversed (or previously applied)" in patch_result.stdout and
|
|
1291
|
+
"failed" not in patch_result.stdout.lower())
|
|
1292
|
+
logger.debug("Returned from dry run, processing results...")
|
|
1293
|
+
logger.debug(f"Dry run status: {dry_run_status}")
|
|
1294
|
+
|
|
1295
|
+
# If patch indicates changes are already applied, return success
|
|
1296
|
+
if already_applied:
|
|
1297
|
+
logger.info("All changes are already applied")
|
|
1298
|
+
return {"status": "success", "details": {
|
|
1299
|
+
"succeeded": [],
|
|
1300
|
+
"failed": [],
|
|
1301
|
+
"failed": [],
|
|
1302
|
+
"already_applied": list(dry_run_status.keys())
|
|
1303
|
+
}}
|
|
1304
|
+
|
|
1305
|
+
# Apply successful hunks with system patch if any
|
|
1306
|
+
# fixme: we should probably be iterating success only, but this will also hit already applied cases
|
|
1307
|
+
if any(success for success in dry_run_status.values()):
|
|
1308
|
+
logger.info(f"Applying successful hunks ({sum(1 for v in dry_run_status.values() if v)}/{len(dry_run_status)}) with system patch...")
|
|
1309
|
+
patch_result = subprocess.run(
|
|
1310
|
+
['patch', '-p1', '--forward', '--no-backup-if-mismatch', '--reject-file=-', '--batch', '--ignore-whitespace', '--verbose', '-i', '-'],
|
|
1311
|
+
input=git_diff,
|
|
1312
|
+
encoding='utf-8',
|
|
1080
1313
|
cwd=user_codebase_dir,
|
|
1081
1314
|
capture_output=True,
|
|
1082
|
-
text=True
|
|
1315
|
+
text=True,
|
|
1316
|
+
timeout=10
|
|
1083
1317
|
)
|
|
1084
1318
|
|
|
1085
|
-
|
|
1086
|
-
|
|
1319
|
+
# Actually write the successful changes
|
|
1320
|
+
if "misordered hunks" in patch_result.stderr:
|
|
1321
|
+
logger.warning("Patch reported misordered hunks - falling back to difflib")
|
|
1322
|
+
# Skip to difflib application
|
|
1323
|
+
apply_diff_with_difflib(file_path, git_diff)
|
|
1087
1324
|
return
|
|
1325
|
+
elif patch_result.returncode == 0:
|
|
1326
|
+
logger.info("Successfully applied some hunks with patch, writing changes")
|
|
1327
|
+
# Verify changes were actually written
|
|
1328
|
+
changes_written = True
|
|
1088
1329
|
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
logger.
|
|
1330
|
+
else:
|
|
1331
|
+
logger.warning("Patch application had mixed results")
|
|
1332
|
+
|
|
1333
|
+
patch_output = patch_result.stdout
|
|
1334
|
+
logger.debug(f"Raw (system) patch stdout:\n{patch_output}")
|
|
1335
|
+
logger.debug(f"Raw (system) patch stdout:\n{patch_result.stderr}")
|
|
1336
|
+
hunk_status = parse_patch_output(patch_output)
|
|
1337
|
+
|
|
1338
|
+
# Record results from patch stage
|
|
1339
|
+
for hunk_num, success in dry_run_status.items():
|
|
1340
|
+
if success:
|
|
1341
|
+
if "Reversed (or previously applied)" in patch_output and f"Hunk #{hunk_num}" in patch_output:
|
|
1342
|
+
logger.info(f"Hunk #{hunk_num} was already applied")
|
|
1343
|
+
results["already_applied"].append(hunk_num)
|
|
1344
|
+
else:
|
|
1345
|
+
logger.info(f"Hunk #{hunk_num} applied successfully")
|
|
1346
|
+
results["succeeded"].append(hunk_num)
|
|
1347
|
+
changes_written = True
|
|
1348
|
+
else:
|
|
1349
|
+
logger.info(f"Hunk #{hunk_num} failed to apply")
|
|
1350
|
+
results["failed"].append(hunk_num)
|
|
1351
|
+
|
|
1352
|
+
if results["succeeded"] or results["already_applied"]:
|
|
1353
|
+
logger.info(f"Successfully applied {len(results['succeeded'])} hunks, "
|
|
1354
|
+
f"{len(results['already_applied'])} were already applied")
|
|
1355
|
+
changes_written = True
|
|
1356
|
+
|
|
1357
|
+
# If any hunks failed, extract them to pass onto next pipeline stage
|
|
1358
|
+
if results["failed"]:
|
|
1359
|
+
logger.info(f"Extracting {len(results['failed'])} failed hunks for next stage")
|
|
1360
|
+
git_diff = extract_remaining_hunks(git_diff, {h: False for h in results["failed"]})
|
|
1361
|
+
else:
|
|
1362
|
+
logger.info("Exiting pipeline die to full success condition.")
|
|
1363
|
+
return {"status": "success", "details": results}
|
|
1364
|
+
|
|
1365
|
+
# Proceed with git apply if we have any failed hunks
|
|
1366
|
+
if results["failed"]:
|
|
1367
|
+
logger.debug("Some failed hunks reported, processing..")
|
|
1368
|
+
if not git_diff.strip():
|
|
1369
|
+
logger.warning("No valid hunks remaining to process")
|
|
1370
|
+
return {"status": "partial", "details": results}
|
|
1371
|
+
temp_path = None
|
|
1372
|
+
logger.info("Proceeding with git apply for remaining hunks")
|
|
1094
1373
|
try:
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1374
|
+
with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', suffix='.diff', delete=False) as temp_file:
|
|
1375
|
+
temp_file.write(git_diff)
|
|
1376
|
+
temp_path = temp_file.name
|
|
1377
|
+
|
|
1378
|
+
git_result = subprocess.run(
|
|
1379
|
+
['git', 'apply', '--verbose', '--ignore-whitespace',
|
|
1380
|
+
'--ignore-space-change', '--whitespace=nowarn',
|
|
1381
|
+
'--check', temp_path],
|
|
1382
|
+
cwd=user_codebase_dir,
|
|
1383
|
+
capture_output=True,
|
|
1384
|
+
text=True
|
|
1385
|
+
)
|
|
1386
|
+
|
|
1387
|
+
if "patch does not apply" not in git_result.stderr:
|
|
1388
|
+
logger.info("Changes already applied according to git apply --check")
|
|
1389
|
+
return {"status": "success", "details": {
|
|
1390
|
+
"succeeded": [],
|
|
1391
|
+
"failed": [],
|
|
1392
|
+
"already_applied": results["failed"]
|
|
1393
|
+
}}
|
|
1394
|
+
|
|
1395
|
+
git_result = subprocess.run(
|
|
1396
|
+
['git', 'apply', '--verbose', '--ignore-whitespace',
|
|
1397
|
+
'--ignore-space-change', '--whitespace=nowarn',
|
|
1398
|
+
'--reject', temp_path],
|
|
1399
|
+
cwd=user_codebase_dir,
|
|
1400
|
+
capture_output=True,
|
|
1401
|
+
text=True
|
|
1402
|
+
)
|
|
1403
|
+
|
|
1404
|
+
logger.debug(f"Git apply stdout:\n{git_result.stdout}")
|
|
1405
|
+
logger.debug(f"Git apply stderr:\n{git_result.stderr}")
|
|
1406
|
+
|
|
1407
|
+
if git_result.returncode == 0:
|
|
1408
|
+
logger.info("Git apply succeeded")
|
|
1409
|
+
# Move hunks from failed to succeeded
|
|
1410
|
+
for hunk_num in results["failed"][:]:
|
|
1411
|
+
results["failed"].remove(hunk_num)
|
|
1412
|
+
results["succeeded"].append(hunk_num)
|
|
1413
|
+
changes_written = True
|
|
1414
|
+
return {"status": "success", "details": results}
|
|
1415
|
+
elif "already applied" in git_result.stderr:
|
|
1416
|
+
# Move hunks from failed to already_applied
|
|
1417
|
+
for hunk_num in results["failed"][:]:
|
|
1418
|
+
results["failed"].remove(hunk_num)
|
|
1419
|
+
results["already_applied"].append(hunk_num)
|
|
1420
|
+
logger.info(f"Marking hunk {hunk_num} as already applied and continuing")
|
|
1107
1421
|
else:
|
|
1108
|
-
logger.
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1422
|
+
logger.info("Git apply failed, moving to difflib stage...")
|
|
1423
|
+
# Continue to difflib
|
|
1424
|
+
finally:
|
|
1425
|
+
if os.path.exists(temp_path):
|
|
1426
|
+
os.unlink(temp_path)
|
|
1427
|
+
|
|
1428
|
+
# If git apply failed, try difflib with the same hunks we just tried
|
|
1429
|
+
logger.info("Attempting to apply changes with difflib")
|
|
1430
|
+
try:
|
|
1431
|
+
logger.info("Starting difflib application...")
|
|
1432
|
+
# Parse the remaining hunks for difflib
|
|
1433
|
+
if git_diff:
|
|
1434
|
+
logger.debug(f"Passing to difflib:\n{git_diff}")
|
|
1435
|
+
try:
|
|
1436
|
+
apply_diff_with_difflib(file_path, git_diff)
|
|
1437
|
+
# If difflib succeeds, move remaining failed hunks to succeeded
|
|
1438
|
+
for hunk_num in results["failed"][:]:
|
|
1439
|
+
results["failed"].remove(hunk_num)
|
|
1440
|
+
results["succeeded"].append(hunk_num)
|
|
1441
|
+
changes_written = True
|
|
1442
|
+
return {"status": "success", "details": results}
|
|
1443
|
+
except Exception as e:
|
|
1444
|
+
if isinstance(e, PatchApplicationError) and e.details.get("type") == "already_applied":
|
|
1445
|
+
# Move failed hunks to already_applied
|
|
1446
|
+
for hunk_num in results["failed"][:]:
|
|
1447
|
+
results["failed"].remove(hunk_num)
|
|
1448
|
+
results["already_applied"].append(hunk_num)
|
|
1449
|
+
return {"status": "success", "details": results}
|
|
1450
|
+
logger.error(f"Difflib application failed: {str(e)}")
|
|
1451
|
+
raise
|
|
1452
|
+
except PatchApplicationError as e:
|
|
1453
|
+
logger.error(f"Difflib application failed: {str(e)}")
|
|
1454
|
+
if e.details.get("type") == "already_applied":
|
|
1455
|
+
return {"status": "success", "details": results}
|
|
1456
|
+
if changes_written:
|
|
1457
|
+
return {"status": "partial", "details": results}
|
|
1458
|
+
raise
|
|
1459
|
+
else:
|
|
1460
|
+
logger.debug("Unreachable? No hunks reported failure, exiting pipeline after system patch stage.")
|
|
1112
1461
|
|
|
1113
|
-
finally:
|
|
1114
|
-
if os.path.exists(temp_file):
|
|
1115
|
-
os.remove(temp_file)
|
|
1116
1462
|
except Exception as e:
|
|
1117
1463
|
logger.error(f"Error applying patch: {str(e)}")
|
|
1118
1464
|
raise
|
|
1465
|
+
finally:
|
|
1466
|
+
cleanup_patch_artifacts(user_codebase_dir, file_path)
|
|
1467
|
+
|
|
1468
|
+
# Return final status
|
|
1469
|
+
if len(results["failed"]) == 0:
|
|
1470
|
+
return {"status": "success", "details": results}
|
|
1471
|
+
elif changes_written:
|
|
1472
|
+
return {"status": "partial", "details": results}
|
|
1473
|
+
return {"status": "error", "details": results}
|
|
1474
|
+
|
|
1475
|
+
def parse_patch_output(patch_output: str) -> Dict[int, bool]:
|
|
1476
|
+
"""Parse patch command output to determine which hunks succeeded/failed.
|
|
1477
|
+
Returns a dict mapping hunk number to success status."""
|
|
1478
|
+
hunk_status = {}
|
|
1479
|
+
logger.debug(f"Parsing patch output:\n{patch_output}")
|
|
1480
|
+
|
|
1481
|
+
in_patch_output = False
|
|
1482
|
+
current_hunk = None
|
|
1483
|
+
for line in patch_output.splitlines():
|
|
1484
|
+
if "Patching file" in line:
|
|
1485
|
+
in_patch_output = True
|
|
1486
|
+
continue
|
|
1487
|
+
if not in_patch_output:
|
|
1488
|
+
continue
|
|
1119
1489
|
|
|
1120
|
-
|
|
1490
|
+
# Track the current hunk number
|
|
1491
|
+
hunk_match = re.search(r'Hunk #(\d+)', line)
|
|
1492
|
+
if hunk_match:
|
|
1493
|
+
current_hunk = int(hunk_match.group(1))
|
|
1494
|
+
|
|
1495
|
+
# Check for significant adjustments that should invalidate "success"
|
|
1496
|
+
if current_hunk is not None:
|
|
1497
|
+
if "succeeded at" in line:
|
|
1498
|
+
hunk_status[current_hunk] = True
|
|
1499
|
+
logger.debug(f"Hunk {current_hunk} succeeded")
|
|
1500
|
+
elif "failed" in line:
|
|
1501
|
+
logger.debug(f"Hunk {current_hunk} failed")
|
|
1502
|
+
|
|
1503
|
+
# Match lines like "Hunk #1 succeeded at 6."
|
|
1504
|
+
match = re.search(r'Hunk #(\d+) (succeeded at \d+(?:\s+with fuzz \d+)?|failed)', line)
|
|
1505
|
+
if match:
|
|
1506
|
+
hunk_num = int(match.group(1))
|
|
1507
|
+
# Consider both clean success and fuzzy matches as successful
|
|
1508
|
+
success = 'succeeded' in match.group(2)
|
|
1509
|
+
hunk_status[hunk_num] = success
|
|
1510
|
+
logger.debug(f"Found hunk {hunk_num}: {'succeeded' if success else 'failed'}")
|
|
1511
|
+
|
|
1512
|
+
logger.debug(f"Final hunk status: {hunk_status}")
|
|
1513
|
+
return hunk_status
|
|
1514
|
+
|
|
1515
|
+
def extract_remaining_hunks(git_diff: str, hunk_status: Dict[int,bool]) -> str:
|
|
1121
1516
|
"""Extract hunks that weren't successfully applied."""
|
|
1122
1517
|
logger.debug("Extracting remaining hunks from diff")
|
|
1123
|
-
|
|
1518
|
+
|
|
1519
|
+
logger.debug(f"Hunk status before extraction: {json.dumps(hunk_status, indent=2)}")
|
|
1520
|
+
|
|
1124
1521
|
# Parse the original diff into hunks
|
|
1125
1522
|
lines = git_diff.splitlines()
|
|
1126
1523
|
hunks = []
|
|
1127
1524
|
current_hunk = []
|
|
1128
|
-
|
|
1525
|
+
headers = []
|
|
1526
|
+
hunk_count = 0
|
|
1527
|
+
in_hunk = False
|
|
1528
|
+
|
|
1129
1529
|
for line in lines:
|
|
1130
|
-
if line.startswith('
|
|
1530
|
+
if line.startswith(('diff --git', '--- ', '+++ ')):
|
|
1531
|
+
headers.append(line)
|
|
1532
|
+
elif line.startswith('@@'):
|
|
1533
|
+
hunk_count += 1
|
|
1131
1534
|
if current_hunk:
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1535
|
+
if current_hunk:
|
|
1536
|
+
hunks.append((hunk_count - 1, current_hunk))
|
|
1537
|
+
|
|
1538
|
+
# Only start collecting if this hunk failed
|
|
1539
|
+
if hunk_count in hunk_status and not hunk_status[hunk_count]:
|
|
1540
|
+
logger.debug(f"Including failed hunk #{hunk_count}")
|
|
1541
|
+
current_hunk = [f"{line} Hunk #{hunk_count}"]
|
|
1542
|
+
in_hunk = True
|
|
1543
|
+
else:
|
|
1544
|
+
logger.debug(f"Skipping successful hunk #{hunk_count}")
|
|
1545
|
+
current_hunk = []
|
|
1546
|
+
in_hunk = False
|
|
1547
|
+
elif in_hunk:
|
|
1135
1548
|
current_hunk.append(line)
|
|
1136
|
-
|
|
1549
|
+
if not line.startswith((' ', '+', '-', '\\')):
|
|
1550
|
+
# End of hunk reached
|
|
1551
|
+
if current_hunk:
|
|
1552
|
+
hunks.append(current_hunk)
|
|
1553
|
+
current_hunk = []
|
|
1554
|
+
in_hunk = False
|
|
1555
|
+
|
|
1137
1556
|
if current_hunk:
|
|
1138
|
-
hunks.append(current_hunk)
|
|
1139
|
-
|
|
1140
|
-
#
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1557
|
+
hunks.append((hunk_count, current_hunk))
|
|
1558
|
+
|
|
1559
|
+
# Build final result with proper spacing
|
|
1560
|
+
result = []
|
|
1561
|
+
result.extend(headers)
|
|
1562
|
+
for _, hunk_lines in hunks:
|
|
1563
|
+
result.extend(hunk_lines)
|
|
1564
|
+
|
|
1565
|
+
if not result:
|
|
1566
|
+
logger.warning("No hunks to extract")
|
|
1567
|
+
return ''
|
|
1145
1568
|
|
|
1569
|
+
final_diff = '\n'.join(result) + '\n'
|
|
1570
|
+
logger.debug(f"Extracted diff for remaining hunks:\n{final_diff}")
|
|
1571
|
+
return final_diff
|