PyPI - ziya - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl - Mend

ziya 0.2.4py3-none-any.whl → 0.2.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ziya might be problematic. Click here for more details.

Files changed (38) hide show

app/utils/diff_utils/application/patch_apply.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import List, Optional, Tuple
 import re
 import logging
+import difflib
 from ..core.exceptions import PatchApplicationError
 from ..core.config import get_max_offset, get_confidence_threshold
 from ..parsing.diff_parser import parse_unified_diff_exact_plus
@@ -201,15 +202,20 @@ def apply_diff_with_difflib_hybrid_forced(
                 fuzzy_initial_pos_search
             )
+            # Store fuzzy match results for later use in indentation adaptation
+            hunk_fuzzy_ratio = fuzzy_best_ratio  # Store for use in indentation adaptation
             # Special handling for whitespace-only changes
             if whitespace_only and (fuzzy_best_ratio < MIN_CONFIDENCE or fuzzy_best_pos is None):
                 logger.info(f"Hunk #{hunk_idx}: Detected whitespace-only change, using specialized handling")
                 fuzzy_best_pos = fuzzy_initial_pos_search
                 fuzzy_best_ratio = 0.9  # High confidence for whitespace changes
+                hunk_fuzzy_ratio = fuzzy_best_ratio
             if fuzzy_best_ratio < MIN_CONFIDENCE and is_whitespace_only_change(h['old_block'], h['new_lines']):
                 logger.info(f"Hunk #{hunk_idx}: Detected whitespace-only change, using specialized handling")
                 fuzzy_best_pos = fuzzy_initial_pos_search
                 fuzzy_best_ratio = 0.9  # High confidence for whitespace changes
+                hunk_fuzzy_ratio = fuzzy_best_ratio
             # --- End Inlined ---
@@ -338,8 +344,248 @@ def apply_diff_with_difflib_hybrid_forced(
             hunk_failures.append((f"Unexpected duplicates detected for Hunk #{hunk_idx}", failure_info))
             continue
-        # --- Apply the hunk (only if duplication check passes) ---
-        final_lines_with_endings[remove_pos:end_remove_pos] = new_lines_with_endings
+        # --- Apply the hunk with intelligent indentation adaptation ---
+        # Handle systematic indentation loss and indentation mismatches from fuzzy matching
+        original_lines_to_replace = final_lines_with_endings[remove_pos:end_remove_pos]
+        # Check if we need indentation adaptation
+        needs_indentation_adaptation = False
+        adaptation_type = None
+        if len(new_lines_content) >= 1 and len(original_lines_to_replace) >= 1:
+            # Analyze indentation patterns
+            context_matches = 0
+            total_content_lines = 0
+            indentation_loss_count = 0
+            indentation_mismatch_count = 0
+            # Calculate average indentation in original and new content
+            orig_indents = []
+            new_indents = []
+            for new_line in new_lines_content:
+                new_content = new_line.strip()
+                if new_content:
+                    total_content_lines += 1
+                    new_indent = len(new_line) - len(new_line.lstrip())
+                    new_indents.append(new_indent)
+                    # Find matching content in original
+                    for orig_line in original_lines_to_replace:
+                        orig_content = orig_line.strip()
+                        if orig_content and re.sub(r'\s+', ' ', orig_content) == re.sub(r'\s+', ' ', new_content):
+                            context_matches += 1
+                            orig_indent = len(orig_line) - len(orig_line.lstrip())
+                            orig_indents.append(orig_indent)
+                            # Check for systematic indentation patterns
+                            indent_diff = orig_indent - new_indent
+                            if indent_diff == 1:
+                                indentation_loss_count += 1
+                            elif abs(indent_diff) > 4:  # Significant indentation mismatch
+                                indentation_mismatch_count += 1
+                            break
+            # Determine adaptation strategy
+            if (total_content_lines >= 3 and
+                context_matches >= max(2, total_content_lines * 0.6) and  # At least 60% context matches
+                indentation_loss_count >= max(2, context_matches * 0.5)):  # At least 50% have 1-space loss
+                needs_indentation_adaptation = True
+                adaptation_type = "systematic_loss"
+            elif (context_matches >= max(1, total_content_lines * 0.5) and  # At least 50% context matches
+                  indentation_mismatch_count >= max(1, context_matches * 0.5) and  # Significant mismatches
+                  orig_indents and new_indents):  # We have indentation data
+                # This is likely a fuzzy match with indentation mismatch
+                avg_orig_indent = sum(orig_indents) / len(orig_indents)
+                avg_new_indent = sum(new_indents) / len(new_indents)
+                # If the diff has much more indentation than the target, adapt it
+                if avg_new_indent > avg_orig_indent + 8:  # Significant indentation difference
+                    needs_indentation_adaptation = True
+                    adaptation_type = "fuzzy_mismatch"
+                    logger.info(f"Hunk #{hunk_idx}: Detected indentation mismatch - diff avg: {avg_new_indent:.1f}, target avg: {avg_orig_indent:.1f}")
+        if needs_indentation_adaptation:
+            # Apply with indentation adaptation
+            corrected_new_lines = []
+            if adaptation_type == "systematic_loss":
+                # Original systematic loss handling
+                for new_line in new_lines_content:
+                    new_content = new_line.strip()
+                    if not new_content:
+                        corrected_new_lines.append(new_line + dominant_ending)
+                        continue
+                    # Look for matching content in original to preserve indentation
+                    found_original_indentation = None
+                    for orig_line in original_lines_to_replace:
+                        orig_content = orig_line.strip()
+                        if orig_content and re.sub(r'\s+', ' ', orig_content) == re.sub(r'\s+', ' ', new_content):
+                            orig_indent = orig_line[:len(orig_line) - len(orig_line.lstrip())]
+                            found_original_indentation = orig_indent
+                            break
+                    if found_original_indentation is not None:
+                        corrected_new_lines.append(found_original_indentation + new_content + dominant_ending)
+                    else:
+                        corrected_new_lines.append(new_line + dominant_ending)
+            elif adaptation_type == "fuzzy_mismatch":
+                # Adapt diff indentation to match target file's indentation style
+                # For high-confidence fuzzy matches with structural differences,
+                # analyze the semantic intent of the diff
+                if hunk_fuzzy_ratio > 0.9:  # Very high confidence
+                    # For very high confidence matches, try to understand the semantic intent
+                    old_block = h.get('old_block', [])
+                    new_lines = h.get('new_lines', [])
+                    # Check if this is a removal operation (fewer new lines than old)
+                    if len(new_lines) < len(old_block):
+                        # This is likely a removal operation
+                        # Find which lines from old_block are NOT in new_lines (these are being removed)
+                        # Find which lines from old_block ARE in new_lines (these are being kept)
+                        lines_to_remove = []
+                        lines_to_keep_content = []
+                        # Identify content that's being removed vs kept
+                        for old_line in old_block:
+                            old_content = old_line.strip()
+                            if not old_content:
+                                continue
+                            # Check if this content appears in the new_lines
+                            found_in_new = False
+                            for new_line in new_lines:
+                                new_content = new_line.strip()
+                                if new_content and re.sub(r'\s+', ' ', old_content) == re.sub(r'\s+', ' ', new_content):
+                                    found_in_new = True
+                                    lines_to_keep_content.append(old_content)
+                                    break
+                            if not found_in_new:
+                                lines_to_remove.append(old_content)
+                        logger.debug(f"Hunk #{hunk_idx}: Removal operation - keeping {len(lines_to_keep_content)} lines, removing {len(lines_to_remove)} lines")
+                        # Now apply this semantic transformation to the original lines
+                        result_lines = []
+                        skip_until_closing = None
+                        for orig_line in original_lines_to_replace:
+                            orig_content = orig_line.strip()
+                            should_keep = True
+                            # Check if this line should be removed based on semantic analysis
+                            for remove_content in lines_to_remove:
+                                # Use fuzzy matching to handle minor differences
+                                similarity = difflib.SequenceMatcher(None,
+                                                                   re.sub(r'\s+', ' ', orig_content),
+                                                                   re.sub(r'\s+', ' ', remove_content)).ratio()
+                                if similarity > 0.8:  # High similarity threshold
+                                    should_keep = False
+                                    logger.debug(f"Removing line due to semantic match: {repr(orig_content)}")
+                                    # Special handling for container elements
+                                    if orig_content.startswith('<div') and not orig_content.endswith('/>'):
+                                        # This opens a container, we should skip until its closing tag
+                                        skip_until_closing = '</div>'
+                                    break
+                            # Handle skipping until closing tag
+                            if skip_until_closing and orig_content == skip_until_closing:
+                                should_keep = False
+                                skip_until_closing = None
+                                logger.debug(f"Removing closing tag: {repr(orig_content)}")
+                            elif skip_until_closing:
+                                should_keep = False
+                                logger.debug(f"Skipping content inside container: {repr(orig_content)}")
+                            if should_keep:
+                                result_lines.append(orig_line)
+                        corrected_new_lines = result_lines
+                    else:
+                        # Not a removal operation, use standard indentation adaptation
+                        corrected_new_lines = []
+                        for new_line in new_lines_content:
+                            new_content = new_line.strip()
+                            if not new_content:
+                                corrected_new_lines.append(new_line + dominant_ending)
+                                continue
+                            # Find the best matching line in the original to determine target indentation
+                            best_match_indent = None
+                            best_match_ratio = 0.0
+                            for orig_line in original_lines_to_replace:
+                                orig_content = orig_line.strip()
+                                if orig_content:
+                                    # Calculate content similarity
+                                    content_ratio = difflib.SequenceMatcher(None,
+                                                                          re.sub(r'\s+', ' ', new_content),
+                                                                          re.sub(r'\s+', ' ', orig_content)).ratio()
+                                    if content_ratio > best_match_ratio:
+                                        best_match_ratio = content_ratio
+                                        best_match_indent = orig_line[:len(orig_line) - len(orig_line.lstrip())]
+                            # If we found a good match, use its indentation
+                            if best_match_indent is not None and best_match_ratio > 0.6:
+                                corrected_new_lines.append(best_match_indent + new_content + dominant_ending)
+                            else:
+                                # Use common indentation from original
+                                if original_lines_to_replace:
+                                    indents = [len(line) - len(line.lstrip())
+                                             for line in original_lines_to_replace if line.strip()]
+                                    if indents:
+                                        common_indent = max(set(indents), key=indents.count)
+                                        adapted_indent = ' ' * common_indent
+                                        corrected_new_lines.append(adapted_indent + new_content + dominant_ending)
+                                    else:
+                                        corrected_new_lines.append(new_line + dominant_ending)
+                                else:
+                                    corrected_new_lines.append(new_line + dominant_ending)
+                else:
+                    # Lower confidence, use standard indentation adaptation
+                    corrected_new_lines = []
+                    for new_line in new_lines_content:
+                        new_content = new_line.strip()
+                        if not new_content:
+                            corrected_new_lines.append(new_line + dominant_ending)
+                            continue
+                        # Use the most common indentation level in the original lines
+                        if original_lines_to_replace:
+                            indents = []
+                            for orig_line in original_lines_to_replace:
+                                if orig_line.strip():
+                                    indent_len = len(orig_line) - len(orig_line.lstrip())
+                                    indents.append(indent_len)
+                            if indents:
+                                # Use the most common indentation level
+                                common_indent = max(set(indents), key=indents.count)
+                                adapted_indent = ' ' * common_indent
+                                corrected_new_lines.append(adapted_indent + new_content + dominant_ending)
+                            else:
+                                corrected_new_lines.append(new_line + dominant_ending)
+                        else:
+                            corrected_new_lines.append(new_line + dominant_ending)
+            final_lines_with_endings[remove_pos:end_remove_pos] = corrected_new_lines
+            logger.info(f"Hunk #{hunk_idx}: Applied indentation adaptation ({adaptation_type})")
+        else:
+            # Standard application
+            new_lines_with_endings = []
+            for line in new_lines_content:
+                new_lines_with_endings.append(line + dominant_ending)
+            final_lines_with_endings[remove_pos:end_remove_pos] = new_lines_with_endings
         # --- Update Offset ---
         # The actual number of lines removed might be different from actual_remove_count

app/utils/diff_utils/application/simple_identical_blocks_fix.py ADDED Viewed

@@ -0,0 +1,129 @@
+"""
+Simple fix for identical adjacent blocks by improving position selection.
+"""
+import logging
+import difflib
+from typing import List, Optional, Tuple
+logger = logging.getLogger(__name__)
+def find_best_position_for_identical_blocks(
+    file_lines: List[str],
+    old_lines: List[str],
+    expected_pos: int,
+    search_radius: int = 10
+) -> Tuple[Optional[int], float]:
+    """
+    Find the best position for applying changes when there are identical blocks.
+    This function is more conservative and prefers positions closer to the expected position.
+    """
+    if not old_lines:
+        return expected_pos, 1.0
+    # First, check if the expected position is an exact match
+    if (expected_pos + len(old_lines) <= len(file_lines) and
+        expected_pos >= 0):
+        candidate_lines = file_lines[expected_pos:expected_pos + len(old_lines)]
+        if lines_match_exactly(old_lines, candidate_lines):
+            logger.debug(f"Exact match found at expected position {expected_pos}")
+            return expected_pos, 1.0
+    # Search in a small radius around the expected position
+    best_pos = None
+    best_score = 0.0
+    start_search = max(0, expected_pos - search_radius)
+    end_search = min(len(file_lines) - len(old_lines), expected_pos + search_radius)
+    for pos in range(start_search, end_search + 1):
+        if pos + len(old_lines) > len(file_lines):
+            continue
+        candidate_lines = file_lines[pos:pos + len(old_lines)]
+        # Calculate match score
+        match_score = calculate_match_score(old_lines, candidate_lines)
+        # Add distance penalty - prefer positions closer to expected
+        distance_penalty = abs(pos - expected_pos) / max(search_radius, 1)
+        adjusted_score = match_score * (1.0 - distance_penalty * 0.3)
+        if adjusted_score > best_score:
+            best_score = adjusted_score
+            best_pos = pos
+        logger.debug(f"Position {pos}: match_score={match_score:.3f}, "
+                    f"distance_penalty={distance_penalty:.3f}, "
+                    f"adjusted_score={adjusted_score:.3f}")
+    return best_pos, best_score
+def lines_match_exactly(lines1: List[str], lines2: List[str]) -> bool:
+    """Check if two lists of lines match exactly (ignoring whitespace)."""
+    if len(lines1) != len(lines2):
+        return False
+    for l1, l2 in zip(lines1, lines2):
+        if l1.strip() != l2.strip():
+            return False
+    return True
+def calculate_match_score(lines1: List[str], lines2: List[str]) -> float:
+    """Calculate how well two lists of lines match."""
+    if not lines1 and not lines2:
+        return 1.0
+    if not lines1 or not lines2:
+        return 0.0
+    # Use difflib for similarity
+    text1 = '\n'.join(line.strip() for line in lines1)
+    text2 = '\n'.join(line.strip() for line in lines2)
+    return difflib.SequenceMatcher(None, text1, text2).ratio()
+def detect_and_fix_identical_blocks_issue(
+    file_lines: List[str],
+    old_lines: List[str],
+    expected_pos: int
+) -> Tuple[Optional[int], float]:
+    """
+    Detect if this is an identical blocks case and return a better position.
+    """
+    # Look for other occurrences of similar patterns
+    similar_positions = []
+    if len(old_lines) < 3:
+        # Too short to be meaningful
+        return None, 0.0
+    # Find the most distinctive line in the pattern
+    distinctive_line = None
+    for line in old_lines:
+        stripped = line.strip()
+        if (stripped and
+            len(stripped) > 10 and
+            stripped not in ['return None', 'pass', '{}', '[]'] and
+            not stripped.startswith('#')):
+            distinctive_line = stripped
+            break
+    if not distinctive_line:
+        return None, 0.0
+    # Find all occurrences of the distinctive line
+    for i, file_line in enumerate(file_lines):
+        if file_line.strip() == distinctive_line:
+            similar_positions.append(i)
+    if len(similar_positions) <= 1:
+        # Not an identical blocks case
+        return None, 0.0
+    logger.debug(f"Found identical blocks case with {len(similar_positions)} similar positions: {similar_positions}")
+    # Use the improved position finding
+    return find_best_position_for_identical_blocks(file_lines, old_lines, expected_pos)

app/utils/diff_utils/parsing/diff_parser.py CHANGED Viewed

@@ -238,6 +238,13 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> List[D
             continue
         if line.startswith('@@ '):
+            # If we were already in a hunk, finish processing it first
+            if in_hunk and current_hunk:
+                # Finalize the previous hunk
+                hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
+                if hunk_key not in seen_hunks:
+                    seen_hunks.add(hunk_key)
             match = re.match(r'^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@(?:\s+Hunk #(\d+))?', line)
             hunk_num = int(match.group(5)) if match and match.group(5) else len(hunks) + 1
             if match:
@@ -279,29 +286,46 @@ def parse_unified_diff_exact_plus(diff_content: str, target_file: str) -> List[D
                 current_hunk = hunk
             i += 1
-            # Validate the hunk header against the actual content
-            # This helps catch malformed hunks early
-            if current_hunk:
-                # Count the number of lines in the hunk
-                hunk_lines = []
-                j = i
-                while j < len(lines) and lines[j].startswith((' ', '+', '-', '\\')):
-                    hunk_lines.append(lines[j])
-                    j += 1
-                current_hunk['expected_line_count'] = len(hunk_lines)
             continue
         if in_hunk:
-            # End of hunk reached if we see a line that doesn't start with ' ', '+', '-', or '\'
-            if not line.startswith((' ', '+', '-', '\\')):
+            # Check if this line starts a new hunk (another @@ line) - this should end the current hunk
+            if line.startswith('@@ '):
+                # This will be handled by the @@ section above, so just end this hunk
                 in_hunk = False
                 if current_hunk:
                     # Check if this hunk is complete and unique
                     hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
                     if hunk_key not in seen_hunks:
                         seen_hunks.add(hunk_key)
-                i += 1
+                # Don't increment i here, let the @@ handler process this line
                 continue
+            # End of hunk reached if we see a line that doesn't start with ' ', '+', '-', or '\'
+            # BUT we need to be more careful about what constitutes the end of a hunk
+            if not line.startswith((' ', '+', '-', '\\')):
+                # Check if this is actually the end of the diff content or just an empty line
+                # Empty lines within a hunk should be treated as context lines
+                if line.strip() == '':
+                    # This is an empty line - treat it as a context line if we're still within the hunk bounds
+                    if current_hunk:
+                        current_hunk['lines'].append(line)
+                        # Empty lines are context lines (should be in both old and new)
+                        current_hunk['new_lines'].append('')
+                        current_hunk['old_block'].append('')
+                    i += 1
+                    continue
+                else:
+                    # This is a non-diff line, end the hunk
+                    in_hunk = False
+                    if current_hunk:
+                        # Check if this hunk is complete and unique
+                        hunk_key = (tuple(current_hunk['old_block']), tuple(current_hunk['new_lines']))
+                        if hunk_key not in seen_hunks:
+                            seen_hunks.add(hunk_key)
+                    i += 1
+                    continue
             if current_hunk:
                 current_hunk['lines'].append(line)
                 if line.startswith('-'):

app/utils/diff_utils/pipeline/pipeline_manager.py CHANGED Viewed

@@ -67,11 +67,29 @@ def apply_diff_pipeline(git_diff: str, file_path: str, request_id: Optional[str]
     if len(individual_diffs) > 1:
         # Find the diff that matches our target file
-        matching_diff = next((diff for diff in individual_diffs
-                            if extract_target_file_from_diff(diff) == file_path), None)
+        # Compare using basename to handle full paths vs relative paths
+        target_basename = os.path.basename(file_path)
+        matching_diff = None
+        for diff in individual_diffs:
+            diff_target = extract_target_file_from_diff(diff)
+            if diff_target:
+                # Try exact match first
+                if diff_target == file_path or diff_target == target_basename:
+                    matching_diff = diff
+                    break
+                # Try basename match
+                elif os.path.basename(diff_target) == target_basename:
+                    matching_diff = diff
+                    break
         if matching_diff:
+            logger.debug(f"Found matching diff for target file: {file_path}")
             git_diff = matching_diff
             pipeline.current_diff = git_diff
+        else:
+            logger.warning(f"No matching diff found for target file: {file_path}")
+            logger.debug(f"Available diff targets: {[extract_target_file_from_diff(d) for d in individual_diffs]}")
     # Get the base directory
     user_codebase_dir = os.environ.get("ZIYA_USER_CODEBASE_DIR")
@@ -175,7 +193,25 @@ def apply_diff_pipeline(git_diff: str, file_path: str, request_id: Optional[str]
     # If force difflib flag is set, skip system patch and git apply
     if os.environ.get('ZIYA_FORCE_DIFFLIB'):
         logger.info("Force difflib mode enabled, bypassing system patch and git apply")
-        return run_difflib_stage(pipeline, file_path, git_diff, original_lines)
+        pipeline.update_stage(PipelineStage.DIFFLIB)
+        difflib_result = run_difflib_stage(pipeline, file_path, git_diff, original_lines)
+        # Complete the pipeline and return the proper result dictionary
+        if difflib_result:
+            pipeline.result.changes_written = True
+        # Set the final status based on hunk results
+        if all(tracker.status in (HunkStatus.SUCCEEDED, HunkStatus.ALREADY_APPLIED)
+               for tracker in pipeline.result.hunks.values()):
+            pipeline.result.status = "success"
+        elif any(tracker.status == HunkStatus.SUCCEEDED
+                for tracker in pipeline.result.hunks.values()):
+            pipeline.result.status = "partial"
+        else:
+            pipeline.result.status = "error"
+        pipeline.complete()
+        return pipeline.result.to_dict()
     # Stage 1: System Patch
     pipeline.update_stage(PipelineStage.SYSTEM_PATCH)
@@ -882,6 +918,23 @@ def run_difflib_stage(pipeline: DiffPipeline, file_path: str, git_diff: str, ori
                     # If the file already contains the target state, mark it as already applied
                     if normalized_file_slice == normalized_new_lines:
+                        # CRITICAL FIX: For deletion hunks, we need to check if the content to be deleted
+                        # still exists in the file. If it does, the hunk is NOT already applied.
+                        if 'removed_lines' in hunk:
+                            removed_lines = hunk.get('removed_lines', [])
+                            # If this is a deletion hunk (has lines to remove)
+                            if removed_lines:
+                                # Check if the content to be deleted still exists anywhere in the file
+                                removed_content = "\n".join([normalize_line_for_comparison(line) for line in removed_lines])
+                                file_content = "\n".join([normalize_line_for_comparison(line) for line in original_lines])
+                                # If the content to be deleted still exists in the file,
+                                # then the hunk is NOT already applied
+                                if removed_content in file_content:
+                                    logger.debug(f"Deletion hunk not applied - content to be deleted still exists in file at pos {pos}")
+                                    continue
                         # CRITICAL FIX: Also check if the old_block matches what's in the file
                         # This prevents marking a hunk as "already applied" when the file has content
                         # that doesn't match what we're trying to remove

ziya 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl

Potentially problematic release.

ziya 0.2.4py3-none-any.whl → 0.2.4.2py3-none-any.whl