PyPI - ziya - Versions diffs - 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl - Mend

ziya 0.2.4py3-none-any.whl → 0.2.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ziya might be problematic. Click here for more details.

Files changed (38) hide show

app/utils/diff_utils/validation/validators.py CHANGED Viewed

@@ -167,289 +167,231 @@ def is_hunk_already_applied(file_lines: List[str], hunk: Dict[str, Any], pos: in
     # Extract the removed and added lines from the hunk
     removed_lines, added_lines = extract_diff_changes(hunk)
+    new_lines = hunk.get('new_lines', [])
+    # Validate hunk header if present
+    if not _is_valid_hunk_header(hunk):
+        return False
+    # Handle no-op hunks
+    if not removed_lines and not added_lines:
+        logger.debug("No actual changes in hunk (no removed or added lines)")
+        return True
-    # CRITICAL FIX: For hunks that add new content (like imports), we need to be more strict
-    # If this is a pure addition (no lines removed), check if the exact content exists
+    # For pure additions, check if content already exists in file
     if len(removed_lines) == 0 and len(added_lines) > 0:
-        # This is a pure addition - be more strict about considering it already applied
-        # Check if the exact content exists anywhere in the file
-        added_content = "\n".join([normalize_line_for_comparison(line) for line in added_lines])
-        file_content = "\n".join([normalize_line_for_comparison(line) for line in file_lines])
-        # If the exact added content doesn't exist in the file, it's not already applied
-        if added_content not in file_content:
-            logger.debug(f"Pure addition not found in file content")
-            return False
+        return _check_pure_addition_already_applied(file_lines, added_lines)
-    # Check if the file content at this position matches what we're trying to remove
-    # This is essential to prevent marking a hunk as "already applied" when the file content doesn't match
-    # what we're trying to remove
-    if removed_lines and pos + len(removed_lines) <= len(file_lines):
-        file_slice_for_removed = file_lines[pos:pos+len(removed_lines)]
-        # Normalize both for comparison
-        normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice_for_removed]
-        normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
-        # If the file content doesn't match what we're trying to remove,
-        # then this hunk can't be already applied here
-        if normalized_file_slice != normalized_removed_lines:
-            # Calculate similarity to help with debugging
-            similarity = difflib.SequenceMatcher(None,
-                                               "\n".join(normalized_file_slice),
-                                               "\n".join(normalized_removed_lines)).ratio()
-            logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
-            logger.debug(f"File content: {normalized_file_slice}")
-            logger.debug(f"Removed lines: {normalized_removed_lines}")
+    # For hunks with removals, validate that the content to be removed matches
+    if removed_lines and not _validate_removal_content(file_lines, removed_lines, pos):
+        return False
+    # Check if the expected result (new_lines) is already present at this position
+    return _check_expected_content_match(file_lines, new_lines, pos, ignore_whitespace)
+def _is_valid_hunk_header(hunk: Dict[str, Any]) -> bool:
+    """Check if the hunk header is valid."""
+    if 'header' in hunk and '@@ -' in hunk['header']:
+        header_match = re.match(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', hunk['header'])
+        if not header_match:
+            logger.warning(f"Malformed hunk header: {hunk['header']}")
             return False
+    return True
+def _check_pure_addition_already_applied(file_lines: List[str], added_lines: List[str]) -> bool:
+    """Check if a pure addition (no removals) is already applied."""
+    # Check if the exact content exists anywhere in the file
+    added_content = "\n".join([normalize_line_for_comparison(line) for line in added_lines])
+    file_content = "\n".join([normalize_line_for_comparison(line) for line in file_lines])
+    if added_content not in file_content:
+        logger.debug("Pure addition not found in file content")
+        return False
-    # CRITICAL FIX: Direct check if the expected content after applying the hunk is already present
-    # This is the most reliable way to determine if a hunk is already applied
-    new_lines = hunk.get('new_lines', [])
-    if pos + len(new_lines) <= len(file_lines):
-        file_slice = file_lines[pos:pos+len(new_lines)]
-        # Compare the file content with the expected content
-        exact_match = True
-        for i, (file_line, new_line) in enumerate(zip(file_slice, new_lines)):
-            if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(new_line):
-                exact_match = False
-                logger.debug(f"Line mismatch at position {pos+i}")
-                logger.debug(f"  File: {repr(file_line)}")
-                logger.debug(f"  Expected: {repr(new_line)}")
-                break
-        if exact_match:
-            logger.debug(f"Exact match of expected content found at position {pos}")
-            return True
+    # Check for duplicate declarations
+    return _check_duplicate_declarations(file_lines, added_lines)
+def _check_duplicate_declarations(file_lines: List[str], added_lines: List[str]) -> bool:
+    """Check if added lines contain declarations that already exist in the file."""
+    declaration_patterns = [
+        r'const\s+\w+\s*=',  # const x =
+        r'let\s+\w+\s*=',    # let x =
+        r'var\s+\w+\s*=',    # var x =
+        r'function\s+\w+\s*\(',  # function x(
+        r'class\s+\w+\s*{',  # class x {
+        r'interface\s+\w+\s*{',  # interface x {
+        r'type\s+\w+\s*=',   # type x =
+        r'enum\s+\w+\s*{',   # enum x {
+    ]
+    for added_line in added_lines:
+        for pattern in declaration_patterns:
+            match = re.search(pattern, added_line)
+            if match:
+                # Extract declaration name
+                declaration_name = None
+                for m in re.finditer(r'\b(\w+)\b', added_line[match.start():]):
+                    if m.group(1) not in ['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum']:
+                        declaration_name = m.group(1)
+                        break
+                if declaration_name:
+                    # Check if this declaration already exists elsewhere in the file
+                    for line in file_lines:
+                        if declaration_name in line:
+                            for p in declaration_patterns:
+                                if re.search(p + r'.*\b' + re.escape(declaration_name) + r'\b', line):
+                                    logger.debug(f"Found duplicate declaration of '{declaration_name}'")
+                                    return True
+    return False
+def _validate_removal_content(file_lines: List[str], removed_lines: List[str], pos: int) -> bool:
+    """Validate that the content to be removed matches what's in the file."""
+    if pos + len(removed_lines) > len(file_lines):
+        return False
+    file_slice = file_lines[pos:pos+len(removed_lines)]
+    normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice]
+    normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
+    if normalized_file_slice != normalized_removed_lines:
+        similarity = difflib.SequenceMatcher(None,
+                                           "\n".join(normalized_file_slice),
+                                           "\n".join(normalized_removed_lines)).ratio()
+        logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
+        logger.debug(f"File content: {normalized_file_slice}")
+        logger.debug(f"Removed lines: {normalized_removed_lines}")
+        return False
-    # Check if we have enough lines to compare
+    return True
+def _check_expected_content_match(file_lines: List[str], new_lines: List[str], pos: int, ignore_whitespace: bool) -> bool:
+    """Check if the expected content after applying the hunk is already present."""
     if pos + len(new_lines) > len(file_lines):
         logger.debug(f"Not enough lines to compare at position {pos}")
         return False
-    # Extract the file content at the position
     file_slice = file_lines[pos:pos+len(new_lines)]
-    # Compare the file content with the expected content
-    for i, (file_line, new_line) in enumerate(zip(file_slice, new_lines)):
-        if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(new_line):
-            logger.debug(f"Line mismatch at position {pos+i}")
-            logger.debug(f"  File: {repr(file_line)}")
-            logger.debug(f"  Expected: {repr(new_line)}")
-            return False
-    logger.debug(f"Hunk already applied at position {pos}")
-    return True
+    # Try exact match first
+    if _lines_match_exactly(file_slice, new_lines):
+        logger.debug(f"Exact match of expected content found at position {pos}")
+        return True
-    # ENHANCED VERIFICATION: Perform more strict checking for already applied hunks
+    # Try with various normalizations
+    if _lines_match_with_normalization(file_slice, new_lines, ignore_whitespace):
+        return True
-    # 1. First check if the file content at this position matches what we're trying to remove
-    # This is essential to prevent marking a hunk as "already applied" when the file content doesn't match
-    # what we're trying to remove
-    if removed_lines and pos + len(removed_lines) <= len(file_lines):
-        file_slice_for_removed = file_lines[pos:pos+len(removed_lines)]
-        # Normalize both for comparison
-        normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice_for_removed]
-        normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
-        # If the file content doesn't match what we're trying to remove,
-        # then this hunk can't be already applied here
-        if normalized_file_slice != normalized_removed_lines:
-            # Calculate similarity to help with debugging
-            similarity = difflib.SequenceMatcher(None,
-                                               "\n".join(normalized_file_slice),
-                                               "\n".join(normalized_removed_lines)).ratio()
-            logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
-            logger.debug(f"File content: {normalized_file_slice}")
-            logger.debug(f"Removed lines: {normalized_removed_lines}")
-            return False
-    # 2. Check if the diff header is malformed
-    if 'header' in hunk and '@@ -' in hunk['header']:
-        # Check if the header has proper line numbers
-        header_match = re.match(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', hunk['header'])
-        if not header_match:
-            logger.warning(f"Malformed hunk header: {hunk['header']}")
-            # Don't mark hunks with malformed headers as already applied
+    # Try fuzzy matching as last resort
+    return _lines_match_fuzzy(file_slice, new_lines)
+def _lines_match_exactly(file_lines: List[str], expected_lines: List[str]) -> bool:
+    """Check if lines match exactly."""
+    for file_line, expected_line in zip(file_lines, expected_lines):
+        if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(expected_line):
             return False
+    return True
+def _lines_match_with_normalization(file_lines: List[str], expected_lines: List[str], ignore_whitespace: bool) -> bool:
+    """Check if lines match with various normalizations applied."""
+    # Check for whitespace-only changes
+    if _check_whitespace_only_changes(file_lines, expected_lines, ignore_whitespace):
+        return True
-    # 3. If there are no actual changes (no removed or added lines), it's a no-op
-    if not removed_lines and not added_lines:
-        logger.debug("No actual changes in hunk (no removed or added lines)")
+    # Check for invisible Unicode characters
+    if _check_invisible_unicode_match(file_lines, expected_lines):
+        return True
+    # Check for escape sequences
+    if _check_escape_sequence_match(file_lines, expected_lines):
         return True
-    # 4. If this is a completely new file or section, it can't be already applied
-    if all(line.startswith('+') for line in hunk.get('old_block', [])):
-        logger.debug("Hunk is adding completely new content, can't be already applied")
+    return False
+def _check_whitespace_only_changes(file_lines: List[str], expected_lines: List[str], ignore_whitespace: bool) -> bool:
+    """Check if the differences are only in whitespace."""
+    if len(file_lines) != len(expected_lines):
+        return False
+    # Check if content is the same ignoring whitespace
+    whitespace_only = True
+    for file_line, expected_line in zip(file_lines, expected_lines):
+        if normalize_line_for_comparison(file_line).strip() != normalize_line_for_comparison(expected_line).strip():
+            whitespace_only = False
+            break
+    if not whitespace_only:
         return False
-    # 5. Check if the file content at this position matches what we expect after applying the hunk
-    if len(available_lines) >= len(hunk.get('new_lines', [])):
-        # Extract the expected content after applying the hunk
-        expected_lines = hunk.get('new_lines', [])
+    # For whitespace-only changes, check exact match based on ignore_whitespace setting
+    for file_line, expected_line in zip(file_lines, expected_lines):
+        if not ignore_whitespace:
+            if file_line.rstrip('\r\n') != expected_line.rstrip('\r\n'):
+                # Try normalizing invisible characters
+                if normalize_unicode(file_line.rstrip('\r\n')) != normalize_unicode(expected_line.rstrip('\r\n')):
+                    return False
+        else:
+            if normalize_line_for_comparison(file_line).strip() != normalize_line_for_comparison(expected_line).strip():
+                return False
+    logger.debug("Whitespace-only changes already applied")
+    return True
+def _check_invisible_unicode_match(file_lines: List[str], expected_lines: List[str]) -> bool:
+    """Check if lines match when invisible Unicode characters are normalized."""
+    if not any('\u200B' in line or '\u200C' in line or '\u200D' in line or '\uFEFF' in line for line in expected_lines):
+        return False
+    for file_line, expected_line in zip(file_lines, expected_lines):
+        normalized_file_line = normalize_unicode(file_line)
+        normalized_expected_line = normalize_unicode(expected_line)
-        # If there are no expected lines, this can't be applied
-        if not expected_lines:
-            # If it's a pure deletion, this check isn't sufficient.
-            # For now, assume if new_lines is empty, it's not "already applied" in the sense of content matching.
-            logger.debug("Hunk results in empty content (deletion), cannot match based on new_lines.")
+        if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_expected_line):
             return False
-        # Check if the available lines match the expected lines
-        if len(available_lines) >= len(expected_lines):
-            # Now check for exact match of expected content
-            exact_match = True
-            for i, expected_line in enumerate(expected_lines):
-                if i >= len(available_lines):
-                    exact_match = False
-                    break
-                # Use our enhanced normalization for better comparison
-                normalized_file_line = normalize_line_for_comparison(available_lines[i])
-                normalized_expected_line = normalize_line_for_comparison(expected_line)
-                if normalized_file_line != normalized_expected_line:
-                    # If normalized versions don't match, it's definitely not applied
-                    exact_match = False
-                    break
-            if exact_match:
-                logger.debug(f"Exact match of expected content found at position {pos}")
-                return True
-    # CRITICAL FIX: Check for duplicate declarations
-    # This is a language-agnostic approach that looks for patterns like duplicate variable declarations
-    if added_lines:
-        # Look for patterns that might indicate declarations
-        declaration_patterns = [
-            r'const\s+\w+\s*=',  # const x =
-            r'let\s+\w+\s*=',    # let x =
-            r'var\s+\w+\s*=',    # var x =
-            r'function\s+\w+\s*\(',  # function x(
-            r'class\s+\w+\s*{',  # class x {
-            r'interface\s+\w+\s*{',  # interface x {
-            r'type\s+\w+\s*=',   # type x =
-            r'enum\s+\w+\s*{',   # enum x {
-        ]
-        # Check if any added line matches a declaration pattern
-        for added_line in added_lines:
-            for pattern in declaration_patterns:
-                match = re.search(pattern, added_line)
-                if match:
-                    # Found a potential declaration, check if it already exists elsewhere in the file
-                    declaration_name = None
-                    for m in re.finditer(r'\b(\w+)\b', added_line[match.start():]):
-                        if m.group(1) not in ['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum']:
-                            declaration_name = m.group(1)
-                            break
-                    if declaration_name:
-                        # Check if this declaration already exists elsewhere in the file
-                        for i, line in enumerate(file_lines):
-                            if i != pos and declaration_name in line:
-                                for p in declaration_patterns:
-                                    if re.search(p + r'.*\b' + re.escape(declaration_name) + r'\b', line):
-                                        logger.debug(f"Found duplicate declaration of '{declaration_name}' at line {i}")
-                                        # This declaration already exists elsewhere, so this hunk might be already applied
-                                        return True
-    # Check if this is a whitespace-only change
-    if len(removed_lines) == len(added_lines):
-        whitespace_only = True
-        for removed, added in zip(removed_lines, added_lines):
-            # Compare non-whitespace content
-            if normalize_line_for_comparison(removed).strip() != normalize_line_for_comparison(added).strip():
-                whitespace_only = False
-                break
-        if whitespace_only and removed_lines:  # Only if there are actual changes
-            # For whitespace-only changes, check if the file already has the correct whitespace
-            if len(available_lines) >= len(added_lines):
-                all_match = True
-                for i, added_line in enumerate(added_lines):
-                    if i >= len(available_lines):
-                        all_match = False
-                        break
-                    # Compare with exact whitespace if not ignoring whitespace
-                    if not ignore_whitespace:
-                        if available_lines[i].rstrip('\r\n') != added_line.rstrip('\r\n'):
-                            # Try normalizing invisible characters
-                            if normalize_unicode(available_lines[i].rstrip('\r\n')) != normalize_unicode(added_line.rstrip('\r\n')):
-                                all_match = False
-                                break
-                    else:
-                        # Compare ignoring whitespace
-                        if normalize_line_for_comparison(available_lines[i]).strip() != normalize_line_for_comparison(added_line).strip():
-                            all_match = False
-                            break
-                if all_match:
-                    logger.debug("Whitespace-only changes already applied")
-                    return True
-    # Check for invisible Unicode characters
-    if any('\u200B' in line or '\u200C' in line or '\u200D' in line or '\uFEFF' in line for line in added_lines):
-        # This hunk contains invisible Unicode characters
-        # Check if the file already has the content with or without the invisible characters
-        if len(available_lines) >= len(added_lines):
-            all_match = True
-            for i, added_line in enumerate(added_lines):
-                if i >= len(available_lines):
-                    all_match = False
-                    break
-                # Normalize both lines to remove invisible characters
-                normalized_file_line = normalize_unicode(available_lines[i])
-                normalized_added_line = normalize_unicode(added_line)
-                # Compare normalized content
-                if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_added_line):
-                    all_match = False
-                    break
-            if all_match:
-                logger.debug("Content with invisible Unicode characters already applied (normalized)")
-                return True
+    logger.debug("Content with invisible Unicode characters already applied (normalized)")
+    return True
+def _check_escape_sequence_match(file_lines: List[str], expected_lines: List[str]) -> bool:
+    """Check if lines match when escape sequences are normalized."""
+    if not any('\\n' in line or '\\r' in line or '\\t' in line or '\\\\' in line for line in expected_lines):
+        return False
-    # Check for escape sequences
-    if any('\\n' in line or '\\r' in line or '\\t' in line or '\\\\' in line for line in added_lines):
-        # This hunk contains escape sequences
-        # Check if the file already has the content with properly handled escape sequences
-        if len(available_lines) >= len(added_lines):
-            all_match = True
-            for i, added_line in enumerate(added_lines):
-                if i >= len(available_lines):
-                    all_match = False
-                    break
-                # Normalize both lines to handle escape sequences
-                normalized_file_line = normalize_escape_sequences(available_lines[i])
-                normalized_added_line = normalize_escape_sequences(added_line)
-                # Compare normalized content
-                if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_added_line):
-                    all_match = False
-                    break
-            if all_match:
-                logger.debug("Content with escape sequences already applied (normalized)")
-                return True
-    # Calculate overall similarity for fuzzy matching
-    if len(available_lines) >= len(added_lines) and added_lines:
-        # Normalize both sides for comparison
-        normalized_available = [normalize_line_for_comparison(line) for line in available_lines[:len(added_lines)]]
-        normalized_added = [normalize_line_for_comparison(line) for line in added_lines]
-        similarity = calculate_block_similarity(normalized_available, normalized_added)
+    for file_line, expected_line in zip(file_lines, expected_lines):
+        normalized_file_line = normalize_escape_sequences(file_line)
+        normalized_expected_line = normalize_escape_sequences(expected_line)
-        # Very high similarity suggests the changes are already applied
-        if similarity >= 0.95:
-            logger.debug(f"Very high similarity ({similarity:.2f}) suggests hunk already applied")
-            return True
+        if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_expected_line):
+            return False
+    logger.debug("Content with escape sequences already applied (normalized)")
+    return True
+def _lines_match_fuzzy(file_lines: List[str], expected_lines: List[str]) -> bool:
+    """Check if lines match using fuzzy matching."""
+    if not expected_lines:
+        return False
+    # Normalize both sides for comparison
+    normalized_file = [normalize_line_for_comparison(line) for line in file_lines]
+    normalized_expected = [normalize_line_for_comparison(line) for line in expected_lines]
+    similarity = calculate_block_similarity(normalized_file, normalized_expected)
+    # Very high similarity suggests the changes are already applied
+    if similarity >= 0.95:
+        logger.debug(f"Very high similarity ({similarity:.2f}) suggests hunk already applied")
+        return True
     return False

app/utils/directory_util.py CHANGED Viewed

@@ -4,6 +4,7 @@ import time
 import signal
 from typing import List, Tuple, Dict, Any, Optional
 from app.utils.logging_utils import logger
+import re
 from app.utils.file_utils import is_binary_file, is_document_file, is_processable_file, read_file_content
 from app.utils.logging_utils import logger
@@ -33,9 +34,16 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
     # Add additional exclude directories from environment variable if it exists
     additional_excludes = os.environ.get("ZIYA_ADDITIONAL_EXCLUDE_DIRS", "")
     if additional_excludes:
+        logger.info(f"Processing additional excludes: {additional_excludes}")
         for pattern in additional_excludes.split(','):
+            pattern = pattern.strip()
             if pattern:
                 ignored_patterns.append((pattern, user_codebase_dir))
+                logger.info(f"Added exclude pattern: {pattern}")
+    logger.info(f"Total ignore patterns: {len(ignored_patterns)}")
+    for pattern, base in ignored_patterns:
+        logger.debug(f"Ignore pattern: {pattern} (base: {base})")
     def read_gitignore(path: str) -> List[Tuple[str, str]]:
         gitignore_patterns: List[Tuple[str, str]] = []
@@ -44,7 +52,14 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
                 for line_number, line in enumerate(f, 1):
                     line = line.strip()
                     if line and not line.startswith("#"):
-                        gitignore_patterns.append((line, os.path.dirname(path)))
+                        try:
+                            # Test if the pattern would create a valid regex
+                            from app.utils.gitignore_parser import rule_from_pattern
+                            test_rule = rule_from_pattern(line, base_path=os.path.dirname(path))
+                            if test_rule:
+                                gitignore_patterns.append((line, os.path.dirname(path)))
+                        except re.error as e:
+                            logger.warning(f"Skipping invalid gitignore pattern '{line}' in {path}:{line_number}: {e}")
         except FileNotFoundError:
             logger.debug(f".gitignore not found at {path}")
         except Exception as e:
@@ -59,7 +74,15 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
             patterns.extend(read_gitignore(gitignore_path))
         for subdir in glob.glob(os.path.join(path, "*/")):
-            patterns.extend(get_patterns_recursive(subdir))
+            # Skip directories with problematic characters that cause regex errors
+            dir_name = os.path.basename(subdir.rstrip('/'))
+            if '[' in dir_name or ']' in dir_name:
+                logger.debug(f"Skipping directory with brackets: {subdir}")
+                continue
+            try:
+                patterns.extend(get_patterns_recursive(subdir))
+            except re.error as e:
+                logger.warning(f"Skipping directory due to regex error: {subdir} - {e}")
         return patterns
@@ -77,7 +100,11 @@ def get_complete_file_list(user_codebase_dir: str, ignored_patterns: List[str],
     for pattern in included_relative_dirs:
         for root, dirs, files in os.walk(os.path.normpath(os.path.join(user_codebase_dir, pattern))):
             # Filter out ignored directories and hidden directories
-            dirs[:] = [d for d in dirs if not should_ignore_fn(os.path.join(root, d)) and not d.startswith('.')]
+            # Also filter out symlinks to prevent following them into ignored directories
+            dirs[:] = [d for d in dirs
+                      if not should_ignore_fn(os.path.join(root, d))
+                      and not d.startswith('.')
+                      and not os.path.islink(os.path.join(root, d))]
             for file in files:
                 file_path = os.path.join(root, file)
@@ -216,10 +243,14 @@ def get_folder_structure(directory: str, ignored_patterns: List[Tuple[str, str]]
                 continue
             entry_path = os.path.join(path, entry)
+            # Skip symlinks early to prevent following them into ignored directories
             if os.path.islink(entry_path):  # Skip symlinks
+                logger.debug(f"Skipping symlink: {entry_path}")
                 continue
             if should_ignore_fn(entry_path):  # Skip ignored files
+                logger.debug(f"Ignoring path: {entry_path}")
                 continue
             if os.path.isdir(entry_path):

app/utils/gitignore_parser.py CHANGED Viewed

@@ -182,12 +182,25 @@ def fnmatch_pathname_to_regex(
                 res.append('\\[')
             else:
                 stuff = pattern[i:j].replace('\\', '\\\\').replace('/', '')
-                i = j + 1
-                if stuff[0] == '!':
-                    stuff = ''.join(['^', stuff[1:]])
-                elif stuff[0] == '^':
-                    stuff = ''.join('\\' + stuff)
-                res.append('[{}]'.format(stuff))
+                # Validate character ranges to prevent regex errors
+                try:
+                    # Test if the character class is valid by compiling a test regex
+                    if stuff:
+                        test_pattern = f'[{stuff}]'
+                        re.compile(test_pattern)
+                        # If we get here, the pattern is valid - process it normally
+                        i = j + 1
+                        if stuff[0] == '!':
+                            stuff = ''.join(['^', stuff[1:]])
+                        elif stuff[0] == '^':
+                            stuff = ''.join('\\' + stuff)
+                        res.append('[{}]'.format(stuff))
+                except re.error:
+                    # If invalid, escape the original brackets
+                    res.append('\\[')
+                    res.append(re.escape(stuff))
+                    res.append('\\]')
+                    i = j + 1  # Still need to advance the position
         else:
             res.append(re.escape(c))
     if anchored:

{ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: ziya
-Version: 0.2.4
+Version: 0.2.4.2
 Summary:
 Author: Vishnu Krishnaprasad
 Author-email: vishnukool@gmail.com
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Requires-Dist: PyPDF2 (>=3.0.1,<4.0.0)
 Requires-Dist: boto3 (>=1.34.88,<2.0.0)
+Requires-Dist: cryptography (>=3.4.8,<43.0.0)
 Requires-Dist: cssutils (>=2.6.0)
 Requires-Dist: html5lib (>=1.1)
 Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
@@ -24,10 +25,12 @@ Requires-Dist: langchain-community (>=0.3.1,<0.4.0)
 Requires-Dist: langchain-google-genai (>=2.1.0,<3.0.0)
 Requires-Dist: langchainhub (>=0.1.15)
 Requires-Dist: langgraph (>=0.2,<0.3)
+Requires-Dist: numpy (>=1.21.0,<2.0.0)
 Requires-Dist: openpyxl (>=3.1.2,<4.0.0)
-Requires-Dist: pandas (>=2.1.0,<3.0.0)
+Requires-Dist: pandas (>=2.0.0,<2.3.0)
 Requires-Dist: patch-ng (>=1.17)
 Requires-Dist: pdfplumber (>=0.10.0,<0.11.0)
+Requires-Dist: pyOpenSSL (>=20.0.0,<25.0.0)
 Requires-Dist: pydantic (>=2.9.2,<3.0.0)
 Requires-Dist: pydevd-pycharm (>=243.18137.19,<244.0.0)
 Requires-Dist: python-docx (>=1.1.0,<2.0.0)

ziya 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl

Potentially problematic release.

ziya 0.2.4py3-none-any.whl → 0.2.4.2py3-none-any.whl