ziya 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/main.py +2 -1
- app/server.py +11 -2
- app/templates/asset-manifest.json +17 -17
- app/templates/index.html +1 -1
- app/templates/static/js/14386.567bf803.chunk.js +2 -0
- app/templates/static/js/14386.567bf803.chunk.js.map +1 -0
- app/templates/static/js/94645.a352e47a.chunk.js +2 -0
- app/templates/static/js/94645.a352e47a.chunk.js.map +1 -0
- app/templates/static/js/98244.0b90f940.chunk.js +3 -0
- app/templates/static/js/98244.0b90f940.chunk.js.map +1 -0
- app/templates/static/js/99948.71670e91.chunk.js +2 -0
- app/templates/static/js/99948.71670e91.chunk.js.map +1 -0
- app/templates/static/js/{main.05ba4902.js → main.77e20f53.js} +3 -3
- app/templates/static/js/{main.05ba4902.js.map → main.77e20f53.js.map} +1 -1
- app/utils/aws_utils.py +48 -36
- app/utils/diff_utils/application/identical_blocks_handler.py +290 -0
- app/utils/diff_utils/application/patch_apply.py +248 -2
- app/utils/diff_utils/application/simple_identical_blocks_fix.py +129 -0
- app/utils/diff_utils/parsing/diff_parser.py +37 -13
- app/utils/diff_utils/pipeline/pipeline_manager.py +56 -3
- app/utils/diff_utils/validation/validators.py +201 -259
- app/utils/directory_util.py +34 -3
- app/utils/gitignore_parser.py +19 -6
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/METADATA +5 -2
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/RECORD +31 -29
- app/templates/static/js/14386.881399c5.chunk.js +0 -2
- app/templates/static/js/14386.881399c5.chunk.js.map +0 -1
- app/templates/static/js/19886.c4b3152d.chunk.js +0 -3
- app/templates/static/js/19886.c4b3152d.chunk.js.map +0 -1
- app/templates/static/js/94645.68d48e03.chunk.js +0 -2
- app/templates/static/js/94645.68d48e03.chunk.js.map +0 -1
- app/templates/static/js/99948.fdf17a82.chunk.js +0 -2
- app/templates/static/js/99948.fdf17a82.chunk.js.map +0 -1
- /app/templates/static/js/{19886.c4b3152d.chunk.js.LICENSE.txt → 98244.0b90f940.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{main.05ba4902.js.LICENSE.txt → main.77e20f53.js.LICENSE.txt} +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/LICENSE +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/WHEEL +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import List, Optional, Tuple
|
|
2
2
|
import re
|
|
3
3
|
import logging
|
|
4
|
+
import difflib
|
|
4
5
|
from ..core.exceptions import PatchApplicationError
|
|
5
6
|
from ..core.config import get_max_offset, get_confidence_threshold
|
|
6
7
|
from ..parsing.diff_parser import parse_unified_diff_exact_plus
|
|
@@ -201,15 +202,20 @@ def apply_diff_with_difflib_hybrid_forced(
|
|
|
201
202
|
fuzzy_initial_pos_search
|
|
202
203
|
)
|
|
203
204
|
|
|
205
|
+
# Store fuzzy match results for later use in indentation adaptation
|
|
206
|
+
hunk_fuzzy_ratio = fuzzy_best_ratio # Store for use in indentation adaptation
|
|
207
|
+
|
|
204
208
|
# Special handling for whitespace-only changes
|
|
205
209
|
if whitespace_only and (fuzzy_best_ratio < MIN_CONFIDENCE or fuzzy_best_pos is None):
|
|
206
210
|
logger.info(f"Hunk #{hunk_idx}: Detected whitespace-only change, using specialized handling")
|
|
207
211
|
fuzzy_best_pos = fuzzy_initial_pos_search
|
|
208
212
|
fuzzy_best_ratio = 0.9 # High confidence for whitespace changes
|
|
213
|
+
hunk_fuzzy_ratio = fuzzy_best_ratio
|
|
209
214
|
if fuzzy_best_ratio < MIN_CONFIDENCE and is_whitespace_only_change(h['old_block'], h['new_lines']):
|
|
210
215
|
logger.info(f"Hunk #{hunk_idx}: Detected whitespace-only change, using specialized handling")
|
|
211
216
|
fuzzy_best_pos = fuzzy_initial_pos_search
|
|
212
217
|
fuzzy_best_ratio = 0.9 # High confidence for whitespace changes
|
|
218
|
+
hunk_fuzzy_ratio = fuzzy_best_ratio
|
|
213
219
|
|
|
214
220
|
# --- End Inlined ---
|
|
215
221
|
|
|
@@ -338,8 +344,248 @@ def apply_diff_with_difflib_hybrid_forced(
|
|
|
338
344
|
hunk_failures.append((f"Unexpected duplicates detected for Hunk #{hunk_idx}", failure_info))
|
|
339
345
|
continue
|
|
340
346
|
|
|
341
|
-
# --- Apply the hunk
|
|
342
|
-
|
|
347
|
+
# --- Apply the hunk with intelligent indentation adaptation ---
|
|
348
|
+
# Handle systematic indentation loss and indentation mismatches from fuzzy matching
|
|
349
|
+
|
|
350
|
+
original_lines_to_replace = final_lines_with_endings[remove_pos:end_remove_pos]
|
|
351
|
+
|
|
352
|
+
# Check if we need indentation adaptation
|
|
353
|
+
needs_indentation_adaptation = False
|
|
354
|
+
adaptation_type = None
|
|
355
|
+
|
|
356
|
+
if len(new_lines_content) >= 1 and len(original_lines_to_replace) >= 1:
|
|
357
|
+
# Analyze indentation patterns
|
|
358
|
+
context_matches = 0
|
|
359
|
+
total_content_lines = 0
|
|
360
|
+
indentation_loss_count = 0
|
|
361
|
+
indentation_mismatch_count = 0
|
|
362
|
+
|
|
363
|
+
# Calculate average indentation in original and new content
|
|
364
|
+
orig_indents = []
|
|
365
|
+
new_indents = []
|
|
366
|
+
|
|
367
|
+
for new_line in new_lines_content:
|
|
368
|
+
new_content = new_line.strip()
|
|
369
|
+
if new_content:
|
|
370
|
+
total_content_lines += 1
|
|
371
|
+
new_indent = len(new_line) - len(new_line.lstrip())
|
|
372
|
+
new_indents.append(new_indent)
|
|
373
|
+
|
|
374
|
+
# Find matching content in original
|
|
375
|
+
for orig_line in original_lines_to_replace:
|
|
376
|
+
orig_content = orig_line.strip()
|
|
377
|
+
if orig_content and re.sub(r'\s+', ' ', orig_content) == re.sub(r'\s+', ' ', new_content):
|
|
378
|
+
context_matches += 1
|
|
379
|
+
orig_indent = len(orig_line) - len(orig_line.lstrip())
|
|
380
|
+
orig_indents.append(orig_indent)
|
|
381
|
+
|
|
382
|
+
# Check for systematic indentation patterns
|
|
383
|
+
indent_diff = orig_indent - new_indent
|
|
384
|
+
if indent_diff == 1:
|
|
385
|
+
indentation_loss_count += 1
|
|
386
|
+
elif abs(indent_diff) > 4: # Significant indentation mismatch
|
|
387
|
+
indentation_mismatch_count += 1
|
|
388
|
+
break
|
|
389
|
+
|
|
390
|
+
# Determine adaptation strategy
|
|
391
|
+
if (total_content_lines >= 3 and
|
|
392
|
+
context_matches >= max(2, total_content_lines * 0.6) and # At least 60% context matches
|
|
393
|
+
indentation_loss_count >= max(2, context_matches * 0.5)): # At least 50% have 1-space loss
|
|
394
|
+
needs_indentation_adaptation = True
|
|
395
|
+
adaptation_type = "systematic_loss"
|
|
396
|
+
elif (context_matches >= max(1, total_content_lines * 0.5) and # At least 50% context matches
|
|
397
|
+
indentation_mismatch_count >= max(1, context_matches * 0.5) and # Significant mismatches
|
|
398
|
+
orig_indents and new_indents): # We have indentation data
|
|
399
|
+
# This is likely a fuzzy match with indentation mismatch
|
|
400
|
+
avg_orig_indent = sum(orig_indents) / len(orig_indents)
|
|
401
|
+
avg_new_indent = sum(new_indents) / len(new_indents)
|
|
402
|
+
|
|
403
|
+
# If the diff has much more indentation than the target, adapt it
|
|
404
|
+
if avg_new_indent > avg_orig_indent + 8: # Significant indentation difference
|
|
405
|
+
needs_indentation_adaptation = True
|
|
406
|
+
adaptation_type = "fuzzy_mismatch"
|
|
407
|
+
logger.info(f"Hunk #{hunk_idx}: Detected indentation mismatch - diff avg: {avg_new_indent:.1f}, target avg: {avg_orig_indent:.1f}")
|
|
408
|
+
|
|
409
|
+
if needs_indentation_adaptation:
|
|
410
|
+
# Apply with indentation adaptation
|
|
411
|
+
corrected_new_lines = []
|
|
412
|
+
|
|
413
|
+
if adaptation_type == "systematic_loss":
|
|
414
|
+
# Original systematic loss handling
|
|
415
|
+
for new_line in new_lines_content:
|
|
416
|
+
new_content = new_line.strip()
|
|
417
|
+
|
|
418
|
+
if not new_content:
|
|
419
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
420
|
+
continue
|
|
421
|
+
|
|
422
|
+
# Look for matching content in original to preserve indentation
|
|
423
|
+
found_original_indentation = None
|
|
424
|
+
for orig_line in original_lines_to_replace:
|
|
425
|
+
orig_content = orig_line.strip()
|
|
426
|
+
if orig_content and re.sub(r'\s+', ' ', orig_content) == re.sub(r'\s+', ' ', new_content):
|
|
427
|
+
orig_indent = orig_line[:len(orig_line) - len(orig_line.lstrip())]
|
|
428
|
+
found_original_indentation = orig_indent
|
|
429
|
+
break
|
|
430
|
+
|
|
431
|
+
if found_original_indentation is not None:
|
|
432
|
+
corrected_new_lines.append(found_original_indentation + new_content + dominant_ending)
|
|
433
|
+
else:
|
|
434
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
435
|
+
|
|
436
|
+
elif adaptation_type == "fuzzy_mismatch":
|
|
437
|
+
# Adapt diff indentation to match target file's indentation style
|
|
438
|
+
# For high-confidence fuzzy matches with structural differences,
|
|
439
|
+
# analyze the semantic intent of the diff
|
|
440
|
+
|
|
441
|
+
if hunk_fuzzy_ratio > 0.9: # Very high confidence
|
|
442
|
+
# For very high confidence matches, try to understand the semantic intent
|
|
443
|
+
old_block = h.get('old_block', [])
|
|
444
|
+
new_lines = h.get('new_lines', [])
|
|
445
|
+
|
|
446
|
+
# Check if this is a removal operation (fewer new lines than old)
|
|
447
|
+
if len(new_lines) < len(old_block):
|
|
448
|
+
# This is likely a removal operation
|
|
449
|
+
# Find which lines from old_block are NOT in new_lines (these are being removed)
|
|
450
|
+
# Find which lines from old_block ARE in new_lines (these are being kept)
|
|
451
|
+
|
|
452
|
+
lines_to_remove = []
|
|
453
|
+
lines_to_keep_content = []
|
|
454
|
+
|
|
455
|
+
# Identify content that's being removed vs kept
|
|
456
|
+
for old_line in old_block:
|
|
457
|
+
old_content = old_line.strip()
|
|
458
|
+
if not old_content:
|
|
459
|
+
continue
|
|
460
|
+
|
|
461
|
+
# Check if this content appears in the new_lines
|
|
462
|
+
found_in_new = False
|
|
463
|
+
for new_line in new_lines:
|
|
464
|
+
new_content = new_line.strip()
|
|
465
|
+
if new_content and re.sub(r'\s+', ' ', old_content) == re.sub(r'\s+', ' ', new_content):
|
|
466
|
+
found_in_new = True
|
|
467
|
+
lines_to_keep_content.append(old_content)
|
|
468
|
+
break
|
|
469
|
+
|
|
470
|
+
if not found_in_new:
|
|
471
|
+
lines_to_remove.append(old_content)
|
|
472
|
+
|
|
473
|
+
logger.debug(f"Hunk #{hunk_idx}: Removal operation - keeping {len(lines_to_keep_content)} lines, removing {len(lines_to_remove)} lines")
|
|
474
|
+
|
|
475
|
+
# Now apply this semantic transformation to the original lines
|
|
476
|
+
result_lines = []
|
|
477
|
+
skip_until_closing = None
|
|
478
|
+
|
|
479
|
+
for orig_line in original_lines_to_replace:
|
|
480
|
+
orig_content = orig_line.strip()
|
|
481
|
+
should_keep = True
|
|
482
|
+
|
|
483
|
+
# Check if this line should be removed based on semantic analysis
|
|
484
|
+
for remove_content in lines_to_remove:
|
|
485
|
+
# Use fuzzy matching to handle minor differences
|
|
486
|
+
similarity = difflib.SequenceMatcher(None,
|
|
487
|
+
re.sub(r'\s+', ' ', orig_content),
|
|
488
|
+
re.sub(r'\s+', ' ', remove_content)).ratio()
|
|
489
|
+
if similarity > 0.8: # High similarity threshold
|
|
490
|
+
should_keep = False
|
|
491
|
+
logger.debug(f"Removing line due to semantic match: {repr(orig_content)}")
|
|
492
|
+
|
|
493
|
+
# Special handling for container elements
|
|
494
|
+
if orig_content.startswith('<div') and not orig_content.endswith('/>'):
|
|
495
|
+
# This opens a container, we should skip until its closing tag
|
|
496
|
+
skip_until_closing = '</div>'
|
|
497
|
+
break
|
|
498
|
+
|
|
499
|
+
# Handle skipping until closing tag
|
|
500
|
+
if skip_until_closing and orig_content == skip_until_closing:
|
|
501
|
+
should_keep = False
|
|
502
|
+
skip_until_closing = None
|
|
503
|
+
logger.debug(f"Removing closing tag: {repr(orig_content)}")
|
|
504
|
+
elif skip_until_closing:
|
|
505
|
+
should_keep = False
|
|
506
|
+
logger.debug(f"Skipping content inside container: {repr(orig_content)}")
|
|
507
|
+
|
|
508
|
+
if should_keep:
|
|
509
|
+
result_lines.append(orig_line)
|
|
510
|
+
|
|
511
|
+
corrected_new_lines = result_lines
|
|
512
|
+
else:
|
|
513
|
+
# Not a removal operation, use standard indentation adaptation
|
|
514
|
+
corrected_new_lines = []
|
|
515
|
+
for new_line in new_lines_content:
|
|
516
|
+
new_content = new_line.strip()
|
|
517
|
+
|
|
518
|
+
if not new_content:
|
|
519
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
520
|
+
continue
|
|
521
|
+
|
|
522
|
+
# Find the best matching line in the original to determine target indentation
|
|
523
|
+
best_match_indent = None
|
|
524
|
+
best_match_ratio = 0.0
|
|
525
|
+
|
|
526
|
+
for orig_line in original_lines_to_replace:
|
|
527
|
+
orig_content = orig_line.strip()
|
|
528
|
+
if orig_content:
|
|
529
|
+
# Calculate content similarity
|
|
530
|
+
content_ratio = difflib.SequenceMatcher(None,
|
|
531
|
+
re.sub(r'\s+', ' ', new_content),
|
|
532
|
+
re.sub(r'\s+', ' ', orig_content)).ratio()
|
|
533
|
+
if content_ratio > best_match_ratio:
|
|
534
|
+
best_match_ratio = content_ratio
|
|
535
|
+
best_match_indent = orig_line[:len(orig_line) - len(orig_line.lstrip())]
|
|
536
|
+
|
|
537
|
+
# If we found a good match, use its indentation
|
|
538
|
+
if best_match_indent is not None and best_match_ratio > 0.6:
|
|
539
|
+
corrected_new_lines.append(best_match_indent + new_content + dominant_ending)
|
|
540
|
+
else:
|
|
541
|
+
# Use common indentation from original
|
|
542
|
+
if original_lines_to_replace:
|
|
543
|
+
indents = [len(line) - len(line.lstrip())
|
|
544
|
+
for line in original_lines_to_replace if line.strip()]
|
|
545
|
+
if indents:
|
|
546
|
+
common_indent = max(set(indents), key=indents.count)
|
|
547
|
+
adapted_indent = ' ' * common_indent
|
|
548
|
+
corrected_new_lines.append(adapted_indent + new_content + dominant_ending)
|
|
549
|
+
else:
|
|
550
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
551
|
+
else:
|
|
552
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
553
|
+
else:
|
|
554
|
+
# Lower confidence, use standard indentation adaptation
|
|
555
|
+
corrected_new_lines = []
|
|
556
|
+
for new_line in new_lines_content:
|
|
557
|
+
new_content = new_line.strip()
|
|
558
|
+
|
|
559
|
+
if not new_content:
|
|
560
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
561
|
+
continue
|
|
562
|
+
|
|
563
|
+
# Use the most common indentation level in the original lines
|
|
564
|
+
if original_lines_to_replace:
|
|
565
|
+
indents = []
|
|
566
|
+
for orig_line in original_lines_to_replace:
|
|
567
|
+
if orig_line.strip():
|
|
568
|
+
indent_len = len(orig_line) - len(orig_line.lstrip())
|
|
569
|
+
indents.append(indent_len)
|
|
570
|
+
|
|
571
|
+
if indents:
|
|
572
|
+
# Use the most common indentation level
|
|
573
|
+
common_indent = max(set(indents), key=indents.count)
|
|
574
|
+
adapted_indent = ' ' * common_indent
|
|
575
|
+
corrected_new_lines.append(adapted_indent + new_content + dominant_ending)
|
|
576
|
+
else:
|
|
577
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
578
|
+
else:
|
|
579
|
+
corrected_new_lines.append(new_line + dominant_ending)
|
|
580
|
+
|
|
581
|
+
final_lines_with_endings[remove_pos:end_remove_pos] = corrected_new_lines
|
|
582
|
+
logger.info(f"Hunk #{hunk_idx}: Applied indentation adaptation ({adaptation_type})")
|
|
583
|
+
else:
|
|
584
|
+
# Standard application
|
|
585
|
+
new_lines_with_endings = []
|
|
586
|
+
for line in new_lines_content:
|
|
587
|
+
new_lines_with_endings.append(line + dominant_ending)
|
|
588
|
+
final_lines_with_endings[remove_pos:end_remove_pos] = new_lines_with_endings
|
|
343
589
|
|
|
344
590
|
# --- Update Offset ---
|
|
345
591
|
# The actual number of lines removed might be different from actual_remove_count
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Simple fix for identical adjacent blocks by improving position selection.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import difflib
|
|
7
|
+
from typing import List, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def find_best_position_for_identical_blocks(
|
|
12
|
+
file_lines: List[str],
|
|
13
|
+
old_lines: List[str],
|
|
14
|
+
expected_pos: int,
|
|
15
|
+
search_radius: int = 10
|
|
16
|
+
) -> Tuple[Optional[int], float]:
|
|
17
|
+
"""
|
|
18
|
+
Find the best position for applying changes when there are identical blocks.
|
|
19
|
+
|
|
20
|
+
This function is more conservative and prefers positions closer to the expected position.
|
|
21
|
+
"""
|
|
22
|
+
if not old_lines:
|
|
23
|
+
return expected_pos, 1.0
|
|
24
|
+
|
|
25
|
+
# First, check if the expected position is an exact match
|
|
26
|
+
if (expected_pos + len(old_lines) <= len(file_lines) and
|
|
27
|
+
expected_pos >= 0):
|
|
28
|
+
|
|
29
|
+
candidate_lines = file_lines[expected_pos:expected_pos + len(old_lines)]
|
|
30
|
+
if lines_match_exactly(old_lines, candidate_lines):
|
|
31
|
+
logger.debug(f"Exact match found at expected position {expected_pos}")
|
|
32
|
+
return expected_pos, 1.0
|
|
33
|
+
|
|
34
|
+
# Search in a small radius around the expected position
|
|
35
|
+
best_pos = None
|
|
36
|
+
best_score = 0.0
|
|
37
|
+
|
|
38
|
+
start_search = max(0, expected_pos - search_radius)
|
|
39
|
+
end_search = min(len(file_lines) - len(old_lines), expected_pos + search_radius)
|
|
40
|
+
|
|
41
|
+
for pos in range(start_search, end_search + 1):
|
|
42
|
+
if pos + len(old_lines) > len(file_lines):
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
candidate_lines = file_lines[pos:pos + len(old_lines)]
|
|
46
|
+
|
|
47
|
+
# Calculate match score
|
|
48
|
+
match_score = calculate_match_score(old_lines, candidate_lines)
|
|
49
|
+
|
|
50
|
+
# Add distance penalty - prefer positions closer to expected
|
|
51
|
+
distance_penalty = abs(pos - expected_pos) / max(search_radius, 1)
|
|
52
|
+
adjusted_score = match_score * (1.0 - distance_penalty * 0.3)
|
|
53
|
+
|
|
54
|
+
if adjusted_score > best_score:
|
|
55
|
+
best_score = adjusted_score
|
|
56
|
+
best_pos = pos
|
|
57
|
+
|
|
58
|
+
logger.debug(f"Position {pos}: match_score={match_score:.3f}, "
|
|
59
|
+
f"distance_penalty={distance_penalty:.3f}, "
|
|
60
|
+
f"adjusted_score={adjusted_score:.3f}")
|
|
61
|
+
|
|
62
|
+
return best_pos, best_score
|
|
63
|
+
|
|
64
|
+
def lines_match_exactly(lines1: List[str], lines2: List[str]) -> bool:
|
|
65
|
+
"""Check if two lists of lines match exactly (ignoring whitespace)."""
|
|
66
|
+
if len(lines1) != len(lines2):
|
|
67
|
+
return False
|
|
68
|
+
|
|
69
|
+
for l1, l2 in zip(lines1, lines2):
|
|
70
|
+
if l1.strip() != l2.strip():
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
def calculate_match_score(lines1: List[str], lines2: List[str]) -> float:
|
|
76
|
+
"""Calculate how well two lists of lines match."""
|
|
77
|
+
if not lines1 and not lines2:
|
|
78
|
+
return 1.0
|
|
79
|
+
if not lines1 or not lines2:
|
|
80
|
+
return 0.0
|
|
81
|
+
|
|
82
|
+
# Use difflib for similarity
|
|
83
|
+
text1 = '\n'.join(line.strip() for line in lines1)
|
|
84
|
+
text2 = '\n'.join(line.strip() for line in lines2)
|
|
85
|
+
|
|
86
|
+
return difflib.SequenceMatcher(None, text1, text2).ratio()
|
|
87
|
+
|
|
88
|
+
def detect_and_fix_identical_blocks_issue(
|
|
89
|
+
file_lines: List[str],
|
|
90
|
+
old_lines: List[str],
|
|
91
|
+
expected_pos: int
|
|
92
|
+
) -> Tuple[Optional[int], float]:
|
|
93
|
+
"""
|
|
94
|
+
Detect if this is an identical blocks case and return a better position.
|
|
95
|
+
"""
|
|
96
|
+
# Look for other occurrences of similar patterns
|
|
97
|
+
similar_positions = []
|
|
98
|
+
|
|
99
|
+
if len(old_lines) < 3:
|
|
100
|
+
# Too short to be meaningful
|
|
101
|
+
return None, 0.0
|
|
102
|
+
|
|
103
|
+
# Find the most distinctive line in the pattern
|
|
104
|
+
distinctive_line = None
|
|
105
|
+
for line in old_lines:
|
|
106
|
+
stripped = line.strip()
|
|
107
|
+
if (stripped and
|
|
108
|
+
len(stripped) > 10 and
|
|
109
|
+
stripped not in ['return None', 'pass', '{}', '[]'] and
|
|
110
|
+
not stripped.startswith('#')):
|
|
111
|
+
distinctive_line = stripped
|
|
112
|
+
break
|
|
113
|
+
|
|
114
|
+
if not distinctive_line:
|
|
115
|
+
return None, 0.0
|
|
116
|
+
|
|
117
|
+
# Find all occurrences of the distinctive line
|
|
118
|
+
for i, file_line in enumerate(file_lines):
|
|
119
|
+
if file_line.strip() == distinctive_line:
|
|
120
|
+
similar_positions.append(i)
|
|
121
|
+
|
|
122
|
+
if len(similar_positions) <= 1:
|
|
123
|
+
# Not an identical blocks case
|
|
124
|
+
return None, 0.0
|
|
125
|
+
|
|
126
|
+
logger.debug(f"Found identical blocks case with {len(similar_positions)} similar positions: {similar_positions}")
|
|
127
|
+
|
|
128
|
+
# Use the improved position finding
|
|
129
|
+
return find_best_position_for_identical_blocks(file_lines, old_lines, expected_pos)
|
|
@@ -238,6 +238,13 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> List[D
|
|
|
238
238
|
continue
|
|
239
239
|
|
|
240
240
|
if line.startswith('@@ '):
|
|
241
|
+
# If we were already in a hunk, finish processing it first
|
|
242
|
+
if in_hunk and current_hunk:
|
|
243
|
+
# Finalize the previous hunk
|
|
244
|
+
hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
|
|
245
|
+
if hunk_key not in seen_hunks:
|
|
246
|
+
seen_hunks.add(hunk_key)
|
|
247
|
+
|
|
241
248
|
match = re.match(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?:\s+Hunk #(\d+))?', line)
|
|
242
249
|
hunk_num = int(match.group(5)) if match and match.group(5) else len(hunks) + 1
|
|
243
250
|
if match:
|
|
@@ -279,29 +286,46 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> List[D
|
|
|
279
286
|
current_hunk = hunk
|
|
280
287
|
|
|
281
288
|
i += 1
|
|
282
|
-
# Validate the hunk header against the actual content
|
|
283
|
-
# This helps catch malformed hunks early
|
|
284
|
-
if current_hunk:
|
|
285
|
-
# Count the number of lines in the hunk
|
|
286
|
-
hunk_lines = []
|
|
287
|
-
j = i
|
|
288
|
-
while j < len(lines) and lines[j].startswith((' ', '+', '-', '\\')):
|
|
289
|
-
hunk_lines.append(lines[j])
|
|
290
|
-
j += 1
|
|
291
|
-
current_hunk['expected_line_count'] = len(hunk_lines)
|
|
292
289
|
continue
|
|
293
290
|
|
|
294
291
|
if in_hunk:
|
|
295
|
-
#
|
|
296
|
-
if
|
|
292
|
+
# Check if this line starts a new hunk (another @@ line) - this should end the current hunk
|
|
293
|
+
if line.startswith('@@ '):
|
|
294
|
+
# This will be handled by the @@ section above, so just end this hunk
|
|
297
295
|
in_hunk = False
|
|
298
296
|
if current_hunk:
|
|
299
297
|
# Check if this hunk is complete and unique
|
|
300
298
|
hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
|
|
301
299
|
if hunk_key not in seen_hunks:
|
|
302
300
|
seen_hunks.add(hunk_key)
|
|
303
|
-
i
|
|
301
|
+
# Don't increment i here, let the @@ handler process this line
|
|
304
302
|
continue
|
|
303
|
+
|
|
304
|
+
# End of hunk reached if we see a line that doesn't start with ' ', '+', '-', or '\'
|
|
305
|
+
# BUT we need to be more careful about what constitutes the end of a hunk
|
|
306
|
+
if not line.startswith((' ', '+', '-', '\\')):
|
|
307
|
+
# Check if this is actually the end of the diff content or just an empty line
|
|
308
|
+
# Empty lines within a hunk should be treated as context lines
|
|
309
|
+
if line.strip() == '':
|
|
310
|
+
# This is an empty line - treat it as a context line if we're still within the hunk bounds
|
|
311
|
+
if current_hunk:
|
|
312
|
+
current_hunk['lines'].append(line)
|
|
313
|
+
# Empty lines are context lines (should be in both old and new)
|
|
314
|
+
current_hunk['new_lines'].append('')
|
|
315
|
+
current_hunk['old_block'].append('')
|
|
316
|
+
i += 1
|
|
317
|
+
continue
|
|
318
|
+
else:
|
|
319
|
+
# This is a non-diff line, end the hunk
|
|
320
|
+
in_hunk = False
|
|
321
|
+
if current_hunk:
|
|
322
|
+
# Check if this hunk is complete and unique
|
|
323
|
+
hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
|
|
324
|
+
if hunk_key not in seen_hunks:
|
|
325
|
+
seen_hunks.add(hunk_key)
|
|
326
|
+
i += 1
|
|
327
|
+
continue
|
|
328
|
+
|
|
305
329
|
if current_hunk:
|
|
306
330
|
current_hunk['lines'].append(line)
|
|
307
331
|
if line.startswith('-'):
|
|
@@ -67,11 +67,29 @@ def apply_diff_pipeline(git_diff: str, file_path: str, request_id: Optional[str]
|
|
|
67
67
|
|
|
68
68
|
if len(individual_diffs) > 1:
|
|
69
69
|
# Find the diff that matches our target file
|
|
70
|
-
|
|
71
|
-
|
|
70
|
+
# Compare using basename to handle full paths vs relative paths
|
|
71
|
+
target_basename = os.path.basename(file_path)
|
|
72
|
+
matching_diff = None
|
|
73
|
+
|
|
74
|
+
for diff in individual_diffs:
|
|
75
|
+
diff_target = extract_target_file_from_diff(diff)
|
|
76
|
+
if diff_target:
|
|
77
|
+
# Try exact match first
|
|
78
|
+
if diff_target == file_path or diff_target == target_basename:
|
|
79
|
+
matching_diff = diff
|
|
80
|
+
break
|
|
81
|
+
# Try basename match
|
|
82
|
+
elif os.path.basename(diff_target) == target_basename:
|
|
83
|
+
matching_diff = diff
|
|
84
|
+
break
|
|
85
|
+
|
|
72
86
|
if matching_diff:
|
|
87
|
+
logger.debug(f"Found matching diff for target file: {file_path}")
|
|
73
88
|
git_diff = matching_diff
|
|
74
89
|
pipeline.current_diff = git_diff
|
|
90
|
+
else:
|
|
91
|
+
logger.warning(f"No matching diff found for target file: {file_path}")
|
|
92
|
+
logger.debug(f"Available diff targets: {[extract_target_file_from_diff(d) for d in individual_diffs]}")
|
|
75
93
|
|
|
76
94
|
# Get the base directory
|
|
77
95
|
user_codebase_dir = os.environ.get("ZIYA_USER_CODEBASE_DIR")
|
|
@@ -175,7 +193,25 @@ def apply_diff_pipeline(git_diff: str, file_path: str, request_id: Optional[str]
|
|
|
175
193
|
# If force difflib flag is set, skip system patch and git apply
|
|
176
194
|
if os.environ.get('ZIYA_FORCE_DIFFLIB'):
|
|
177
195
|
logger.info("Force difflib mode enabled, bypassing system patch and git apply")
|
|
178
|
-
|
|
196
|
+
pipeline.update_stage(PipelineStage.DIFFLIB)
|
|
197
|
+
difflib_result = run_difflib_stage(pipeline, file_path, git_diff, original_lines)
|
|
198
|
+
|
|
199
|
+
# Complete the pipeline and return the proper result dictionary
|
|
200
|
+
if difflib_result:
|
|
201
|
+
pipeline.result.changes_written = True
|
|
202
|
+
|
|
203
|
+
# Set the final status based on hunk results
|
|
204
|
+
if all(tracker.status in (HunkStatus.SUCCEEDED, HunkStatus.ALREADY_APPLIED)
|
|
205
|
+
for tracker in pipeline.result.hunks.values()):
|
|
206
|
+
pipeline.result.status = "success"
|
|
207
|
+
elif any(tracker.status == HunkStatus.SUCCEEDED
|
|
208
|
+
for tracker in pipeline.result.hunks.values()):
|
|
209
|
+
pipeline.result.status = "partial"
|
|
210
|
+
else:
|
|
211
|
+
pipeline.result.status = "error"
|
|
212
|
+
|
|
213
|
+
pipeline.complete()
|
|
214
|
+
return pipeline.result.to_dict()
|
|
179
215
|
|
|
180
216
|
# Stage 1: System Patch
|
|
181
217
|
pipeline.update_stage(PipelineStage.SYSTEM_PATCH)
|
|
@@ -882,6 +918,23 @@ def run_difflib_stage(pipeline: DiffPipeline, file_path: str, git_diff: str, ori
|
|
|
882
918
|
|
|
883
919
|
# If the file already contains the target state, mark it as already applied
|
|
884
920
|
if normalized_file_slice == normalized_new_lines:
|
|
921
|
+
# CRITICAL FIX: For deletion hunks, we need to check if the content to be deleted
|
|
922
|
+
# still exists in the file. If it does, the hunk is NOT already applied.
|
|
923
|
+
if 'removed_lines' in hunk:
|
|
924
|
+
removed_lines = hunk.get('removed_lines', [])
|
|
925
|
+
|
|
926
|
+
# If this is a deletion hunk (has lines to remove)
|
|
927
|
+
if removed_lines:
|
|
928
|
+
# Check if the content to be deleted still exists anywhere in the file
|
|
929
|
+
removed_content = "\n".join([normalize_line_for_comparison(line) for line in removed_lines])
|
|
930
|
+
file_content = "\n".join([normalize_line_for_comparison(line) for line in original_lines])
|
|
931
|
+
|
|
932
|
+
# If the content to be deleted still exists in the file,
|
|
933
|
+
# then the hunk is NOT already applied
|
|
934
|
+
if removed_content in file_content:
|
|
935
|
+
logger.debug(f"Deletion hunk not applied - content to be deleted still exists in file at pos {pos}")
|
|
936
|
+
continue
|
|
937
|
+
|
|
885
938
|
# CRITICAL FIX: Also check if the old_block matches what's in the file
|
|
886
939
|
# This prevents marking a hunk as "already applied" when the file has content
|
|
887
940
|
# that doesn't match what we're trying to remove
|