ziya 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ziya might be problematic. Click here for more details.

Files changed (38) hide show
  1. app/main.py +2 -1
  2. app/server.py +11 -2
  3. app/templates/asset-manifest.json +17 -17
  4. app/templates/index.html +1 -1
  5. app/templates/static/js/14386.567bf803.chunk.js +2 -0
  6. app/templates/static/js/14386.567bf803.chunk.js.map +1 -0
  7. app/templates/static/js/94645.a352e47a.chunk.js +2 -0
  8. app/templates/static/js/94645.a352e47a.chunk.js.map +1 -0
  9. app/templates/static/js/98244.0b90f940.chunk.js +3 -0
  10. app/templates/static/js/98244.0b90f940.chunk.js.map +1 -0
  11. app/templates/static/js/99948.71670e91.chunk.js +2 -0
  12. app/templates/static/js/99948.71670e91.chunk.js.map +1 -0
  13. app/templates/static/js/{main.05ba4902.js → main.77e20f53.js} +3 -3
  14. app/templates/static/js/{main.05ba4902.js.map → main.77e20f53.js.map} +1 -1
  15. app/utils/aws_utils.py +48 -36
  16. app/utils/diff_utils/application/identical_blocks_handler.py +290 -0
  17. app/utils/diff_utils/application/patch_apply.py +248 -2
  18. app/utils/diff_utils/application/simple_identical_blocks_fix.py +129 -0
  19. app/utils/diff_utils/parsing/diff_parser.py +37 -13
  20. app/utils/diff_utils/pipeline/pipeline_manager.py +56 -3
  21. app/utils/diff_utils/validation/validators.py +201 -259
  22. app/utils/directory_util.py +34 -3
  23. app/utils/gitignore_parser.py +19 -6
  24. {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/METADATA +5 -2
  25. {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/RECORD +31 -29
  26. app/templates/static/js/14386.881399c5.chunk.js +0 -2
  27. app/templates/static/js/14386.881399c5.chunk.js.map +0 -1
  28. app/templates/static/js/19886.c4b3152d.chunk.js +0 -3
  29. app/templates/static/js/19886.c4b3152d.chunk.js.map +0 -1
  30. app/templates/static/js/94645.68d48e03.chunk.js +0 -2
  31. app/templates/static/js/94645.68d48e03.chunk.js.map +0 -1
  32. app/templates/static/js/99948.fdf17a82.chunk.js +0 -2
  33. app/templates/static/js/99948.fdf17a82.chunk.js.map +0 -1
  34. /app/templates/static/js/{19886.c4b3152d.chunk.js.LICENSE.txt → 98244.0b90f940.chunk.js.LICENSE.txt} +0 -0
  35. /app/templates/static/js/{main.05ba4902.js.LICENSE.txt → main.77e20f53.js.LICENSE.txt} +0 -0
  36. {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/LICENSE +0 -0
  37. {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/WHEEL +0 -0
  38. {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/entry_points.txt +0 -0
@@ -167,289 +167,231 @@ def is_hunk_already_applied(file_lines: List[str], hunk: Dict[str, Any], pos: in
167
167
 
168
168
  # Extract the removed and added lines from the hunk
169
169
  removed_lines, added_lines = extract_diff_changes(hunk)
170
+ new_lines = hunk.get('new_lines', [])
171
+
172
+ # Validate hunk header if present
173
+ if not _is_valid_hunk_header(hunk):
174
+ return False
175
+
176
+ # Handle no-op hunks
177
+ if not removed_lines and not added_lines:
178
+ logger.debug("No actual changes in hunk (no removed or added lines)")
179
+ return True
170
180
 
171
- # CRITICAL FIX: For hunks that add new content (like imports), we need to be more strict
172
- # If this is a pure addition (no lines removed), check if the exact content exists
181
+ # For pure additions, check if content already exists in file
173
182
  if len(removed_lines) == 0 and len(added_lines) > 0:
174
- # This is a pure addition - be more strict about considering it already applied
175
- # Check if the exact content exists anywhere in the file
176
- added_content = "\n".join([normalize_line_for_comparison(line) for line in added_lines])
177
- file_content = "\n".join([normalize_line_for_comparison(line) for line in file_lines])
178
-
179
- # If the exact added content doesn't exist in the file, it's not already applied
180
- if added_content not in file_content:
181
- logger.debug(f"Pure addition not found in file content")
182
- return False
183
+ return _check_pure_addition_already_applied(file_lines, added_lines)
183
184
 
184
- # Check if the file content at this position matches what we're trying to remove
185
- # This is essential to prevent marking a hunk as "already applied" when the file content doesn't match
186
- # what we're trying to remove
187
- if removed_lines and pos + len(removed_lines) <= len(file_lines):
188
- file_slice_for_removed = file_lines[pos:pos+len(removed_lines)]
189
-
190
- # Normalize both for comparison
191
- normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice_for_removed]
192
- normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
193
-
194
- # If the file content doesn't match what we're trying to remove,
195
- # then this hunk can't be already applied here
196
- if normalized_file_slice != normalized_removed_lines:
197
- # Calculate similarity to help with debugging
198
- similarity = difflib.SequenceMatcher(None,
199
- "\n".join(normalized_file_slice),
200
- "\n".join(normalized_removed_lines)).ratio()
201
- logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
202
- logger.debug(f"File content: {normalized_file_slice}")
203
- logger.debug(f"Removed lines: {normalized_removed_lines}")
185
+ # For hunks with removals, validate that the content to be removed matches
186
+ if removed_lines and not _validate_removal_content(file_lines, removed_lines, pos):
187
+ return False
188
+
189
+ # Check if the expected result (new_lines) is already present at this position
190
+ return _check_expected_content_match(file_lines, new_lines, pos, ignore_whitespace)
191
+
192
+
193
+ def _is_valid_hunk_header(hunk: Dict[str, Any]) -> bool:
194
+ """Check if the hunk header is valid."""
195
+ if 'header' in hunk and '@@ -' in hunk['header']:
196
+ header_match = re.match(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', hunk['header'])
197
+ if not header_match:
198
+ logger.warning(f"Malformed hunk header: {hunk['header']}")
204
199
  return False
200
+ return True
201
+
202
+
203
+ def _check_pure_addition_already_applied(file_lines: List[str], added_lines: List[str]) -> bool:
204
+ """Check if a pure addition (no removals) is already applied."""
205
+ # Check if the exact content exists anywhere in the file
206
+ added_content = "\n".join([normalize_line_for_comparison(line) for line in added_lines])
207
+ file_content = "\n".join([normalize_line_for_comparison(line) for line in file_lines])
208
+
209
+ if added_content not in file_content:
210
+ logger.debug("Pure addition not found in file content")
211
+ return False
205
212
 
206
- # CRITICAL FIX: Direct check if the expected content after applying the hunk is already present
207
- # This is the most reliable way to determine if a hunk is already applied
208
- new_lines = hunk.get('new_lines', [])
209
- if pos + len(new_lines) <= len(file_lines):
210
- file_slice = file_lines[pos:pos+len(new_lines)]
211
-
212
- # Compare the file content with the expected content
213
- exact_match = True
214
- for i, (file_line, new_line) in enumerate(zip(file_slice, new_lines)):
215
- if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(new_line):
216
- exact_match = False
217
- logger.debug(f"Line mismatch at position {pos+i}")
218
- logger.debug(f" File: {repr(file_line)}")
219
- logger.debug(f" Expected: {repr(new_line)}")
220
- break
221
-
222
- if exact_match:
223
- logger.debug(f"Exact match of expected content found at position {pos}")
224
- return True
213
+ # Check for duplicate declarations
214
+ return _check_duplicate_declarations(file_lines, added_lines)
215
+
216
+
217
+ def _check_duplicate_declarations(file_lines: List[str], added_lines: List[str]) -> bool:
218
+ """Check if added lines contain declarations that already exist in the file."""
219
+ declaration_patterns = [
220
+ r'const\s+\w+\s*=', # const x =
221
+ r'let\s+\w+\s*=', # let x =
222
+ r'var\s+\w+\s*=', # var x =
223
+ r'function\s+\w+\s*\(', # function x(
224
+ r'class\s+\w+\s*{', # class x {
225
+ r'interface\s+\w+\s*{', # interface x {
226
+ r'type\s+\w+\s*=', # type x =
227
+ r'enum\s+\w+\s*{', # enum x {
228
+ ]
229
+
230
+ for added_line in added_lines:
231
+ for pattern in declaration_patterns:
232
+ match = re.search(pattern, added_line)
233
+ if match:
234
+ # Extract declaration name
235
+ declaration_name = None
236
+ for m in re.finditer(r'\b(\w+)\b', added_line[match.start():]):
237
+ if m.group(1) not in ['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum']:
238
+ declaration_name = m.group(1)
239
+ break
240
+
241
+ if declaration_name:
242
+ # Check if this declaration already exists elsewhere in the file
243
+ for line in file_lines:
244
+ if declaration_name in line:
245
+ for p in declaration_patterns:
246
+ if re.search(p + r'.*\b' + re.escape(declaration_name) + r'\b', line):
247
+ logger.debug(f"Found duplicate declaration of '{declaration_name}'")
248
+ return True
249
+ return False
250
+
251
+
252
+ def _validate_removal_content(file_lines: List[str], removed_lines: List[str], pos: int) -> bool:
253
+ """Validate that the content to be removed matches what's in the file."""
254
+ if pos + len(removed_lines) > len(file_lines):
255
+ return False
256
+
257
+ file_slice = file_lines[pos:pos+len(removed_lines)]
258
+ normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice]
259
+ normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
260
+
261
+ if normalized_file_slice != normalized_removed_lines:
262
+ similarity = difflib.SequenceMatcher(None,
263
+ "\n".join(normalized_file_slice),
264
+ "\n".join(normalized_removed_lines)).ratio()
265
+ logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
266
+ logger.debug(f"File content: {normalized_file_slice}")
267
+ logger.debug(f"Removed lines: {normalized_removed_lines}")
268
+ return False
225
269
 
226
- # Check if we have enough lines to compare
270
+ return True
271
+
272
+
273
+ def _check_expected_content_match(file_lines: List[str], new_lines: List[str], pos: int, ignore_whitespace: bool) -> bool:
274
+ """Check if the expected content after applying the hunk is already present."""
227
275
  if pos + len(new_lines) > len(file_lines):
228
276
  logger.debug(f"Not enough lines to compare at position {pos}")
229
277
  return False
230
278
 
231
- # Extract the file content at the position
232
279
  file_slice = file_lines[pos:pos+len(new_lines)]
233
280
 
234
- # Compare the file content with the expected content
235
- for i, (file_line, new_line) in enumerate(zip(file_slice, new_lines)):
236
- if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(new_line):
237
- logger.debug(f"Line mismatch at position {pos+i}")
238
- logger.debug(f" File: {repr(file_line)}")
239
- logger.debug(f" Expected: {repr(new_line)}")
240
- return False
241
-
242
- logger.debug(f"Hunk already applied at position {pos}")
243
- return True
281
+ # Try exact match first
282
+ if _lines_match_exactly(file_slice, new_lines):
283
+ logger.debug(f"Exact match of expected content found at position {pos}")
284
+ return True
244
285
 
245
- # ENHANCED VERIFICATION: Perform more strict checking for already applied hunks
286
+ # Try with various normalizations
287
+ if _lines_match_with_normalization(file_slice, new_lines, ignore_whitespace):
288
+ return True
246
289
 
247
- # 1. First check if the file content at this position matches what we're trying to remove
248
- # This is essential to prevent marking a hunk as "already applied" when the file content doesn't match
249
- # what we're trying to remove
250
- if removed_lines and pos + len(removed_lines) <= len(file_lines):
251
- file_slice_for_removed = file_lines[pos:pos+len(removed_lines)]
252
-
253
- # Normalize both for comparison
254
- normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice_for_removed]
255
- normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
256
-
257
- # If the file content doesn't match what we're trying to remove,
258
- # then this hunk can't be already applied here
259
- if normalized_file_slice != normalized_removed_lines:
260
- # Calculate similarity to help with debugging
261
- similarity = difflib.SequenceMatcher(None,
262
- "\n".join(normalized_file_slice),
263
- "\n".join(normalized_removed_lines)).ratio()
264
- logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
265
- logger.debug(f"File content: {normalized_file_slice}")
266
- logger.debug(f"Removed lines: {normalized_removed_lines}")
267
- return False
268
-
269
- # 2. Check if the diff header is malformed
270
- if 'header' in hunk and '@@ -' in hunk['header']:
271
- # Check if the header has proper line numbers
272
- header_match = re.match(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', hunk['header'])
273
- if not header_match:
274
- logger.warning(f"Malformed hunk header: {hunk['header']}")
275
- # Don't mark hunks with malformed headers as already applied
290
+ # Try fuzzy matching as last resort
291
+ return _lines_match_fuzzy(file_slice, new_lines)
292
+
293
+
294
+ def _lines_match_exactly(file_lines: List[str], expected_lines: List[str]) -> bool:
295
+ """Check if lines match exactly."""
296
+ for file_line, expected_line in zip(file_lines, expected_lines):
297
+ if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(expected_line):
276
298
  return False
299
+ return True
300
+
301
+
302
+ def _lines_match_with_normalization(file_lines: List[str], expected_lines: List[str], ignore_whitespace: bool) -> bool:
303
+ """Check if lines match with various normalizations applied."""
304
+ # Check for whitespace-only changes
305
+ if _check_whitespace_only_changes(file_lines, expected_lines, ignore_whitespace):
306
+ return True
277
307
 
278
- # 3. If there are no actual changes (no removed or added lines), it's a no-op
279
- if not removed_lines and not added_lines:
280
- logger.debug("No actual changes in hunk (no removed or added lines)")
308
+ # Check for invisible Unicode characters
309
+ if _check_invisible_unicode_match(file_lines, expected_lines):
310
+ return True
311
+
312
+ # Check for escape sequences
313
+ if _check_escape_sequence_match(file_lines, expected_lines):
281
314
  return True
282
315
 
283
- # 4. If this is a completely new file or section, it can't be already applied
284
- if all(line.startswith('+') for line in hunk.get('old_block', [])):
285
- logger.debug("Hunk is adding completely new content, can't be already applied")
316
+ return False
317
+
318
+
319
+ def _check_whitespace_only_changes(file_lines: List[str], expected_lines: List[str], ignore_whitespace: bool) -> bool:
320
+ """Check if the differences are only in whitespace."""
321
+ if len(file_lines) != len(expected_lines):
322
+ return False
323
+
324
+ # Check if content is the same ignoring whitespace
325
+ whitespace_only = True
326
+ for file_line, expected_line in zip(file_lines, expected_lines):
327
+ if normalize_line_for_comparison(file_line).strip() != normalize_line_for_comparison(expected_line).strip():
328
+ whitespace_only = False
329
+ break
330
+
331
+ if not whitespace_only:
286
332
  return False
287
333
 
288
- # 5. Check if the file content at this position matches what we expect after applying the hunk
289
- if len(available_lines) >= len(hunk.get('new_lines', [])):
290
- # Extract the expected content after applying the hunk
291
- expected_lines = hunk.get('new_lines', [])
334
+ # For whitespace-only changes, check exact match based on ignore_whitespace setting
335
+ for file_line, expected_line in zip(file_lines, expected_lines):
336
+ if not ignore_whitespace:
337
+ if file_line.rstrip('\r\n') != expected_line.rstrip('\r\n'):
338
+ # Try normalizing invisible characters
339
+ if normalize_unicode(file_line.rstrip('\r\n')) != normalize_unicode(expected_line.rstrip('\r\n')):
340
+ return False
341
+ else:
342
+ if normalize_line_for_comparison(file_line).strip() != normalize_line_for_comparison(expected_line).strip():
343
+ return False
344
+
345
+ logger.debug("Whitespace-only changes already applied")
346
+ return True
347
+
348
+
349
+ def _check_invisible_unicode_match(file_lines: List[str], expected_lines: List[str]) -> bool:
350
+ """Check if lines match when invisible Unicode characters are normalized."""
351
+ if not any('\u200B' in line or '\u200C' in line or '\u200D' in line or '\uFEFF' in line for line in expected_lines):
352
+ return False
353
+
354
+ for file_line, expected_line in zip(file_lines, expected_lines):
355
+ normalized_file_line = normalize_unicode(file_line)
356
+ normalized_expected_line = normalize_unicode(expected_line)
292
357
 
293
- # If there are no expected lines, this can't be applied
294
- if not expected_lines:
295
- # If it's a pure deletion, this check isn't sufficient.
296
- # For now, assume if new_lines is empty, it's not "already applied" in the sense of content matching.
297
- logger.debug("Hunk results in empty content (deletion), cannot match based on new_lines.")
358
+ if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_expected_line):
298
359
  return False
299
-
300
- # Check if the available lines match the expected lines
301
- if len(available_lines) >= len(expected_lines):
302
- # Now check for exact match of expected content
303
- exact_match = True
304
- for i, expected_line in enumerate(expected_lines):
305
- if i >= len(available_lines):
306
- exact_match = False
307
- break
308
-
309
- # Use our enhanced normalization for better comparison
310
- normalized_file_line = normalize_line_for_comparison(available_lines[i])
311
- normalized_expected_line = normalize_line_for_comparison(expected_line)
312
-
313
- if normalized_file_line != normalized_expected_line:
314
- # If normalized versions don't match, it's definitely not applied
315
- exact_match = False
316
- break
317
-
318
- if exact_match:
319
- logger.debug(f"Exact match of expected content found at position {pos}")
320
- return True
321
-
322
- # CRITICAL FIX: Check for duplicate declarations
323
- # This is a language-agnostic approach that looks for patterns like duplicate variable declarations
324
- if added_lines:
325
- # Look for patterns that might indicate declarations
326
- declaration_patterns = [
327
- r'const\s+\w+\s*=', # const x =
328
- r'let\s+\w+\s*=', # let x =
329
- r'var\s+\w+\s*=', # var x =
330
- r'function\s+\w+\s*\(', # function x(
331
- r'class\s+\w+\s*{', # class x {
332
- r'interface\s+\w+\s*{', # interface x {
333
- r'type\s+\w+\s*=', # type x =
334
- r'enum\s+\w+\s*{', # enum x {
335
- ]
336
-
337
- # Check if any added line matches a declaration pattern
338
- for added_line in added_lines:
339
- for pattern in declaration_patterns:
340
- match = re.search(pattern, added_line)
341
- if match:
342
- # Found a potential declaration, check if it already exists elsewhere in the file
343
- declaration_name = None
344
- for m in re.finditer(r'\b(\w+)\b', added_line[match.start():]):
345
- if m.group(1) not in ['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum']:
346
- declaration_name = m.group(1)
347
- break
348
-
349
- if declaration_name:
350
- # Check if this declaration already exists elsewhere in the file
351
- for i, line in enumerate(file_lines):
352
- if i != pos and declaration_name in line:
353
- for p in declaration_patterns:
354
- if re.search(p + r'.*\b' + re.escape(declaration_name) + r'\b', line):
355
- logger.debug(f"Found duplicate declaration of '{declaration_name}' at line {i}")
356
- # This declaration already exists elsewhere, so this hunk might be already applied
357
- return True
358
-
359
- # Check if this is a whitespace-only change
360
- if len(removed_lines) == len(added_lines):
361
- whitespace_only = True
362
- for removed, added in zip(removed_lines, added_lines):
363
- # Compare non-whitespace content
364
- if normalize_line_for_comparison(removed).strip() != normalize_line_for_comparison(added).strip():
365
- whitespace_only = False
366
- break
367
-
368
- if whitespace_only and removed_lines: # Only if there are actual changes
369
- # For whitespace-only changes, check if the file already has the correct whitespace
370
- if len(available_lines) >= len(added_lines):
371
- all_match = True
372
- for i, added_line in enumerate(added_lines):
373
- if i >= len(available_lines):
374
- all_match = False
375
- break
376
-
377
- # Compare with exact whitespace if not ignoring whitespace
378
- if not ignore_whitespace:
379
- if available_lines[i].rstrip('\r\n') != added_line.rstrip('\r\n'):
380
- # Try normalizing invisible characters
381
- if normalize_unicode(available_lines[i].rstrip('\r\n')) != normalize_unicode(added_line.rstrip('\r\n')):
382
- all_match = False
383
- break
384
- else:
385
- # Compare ignoring whitespace
386
- if normalize_line_for_comparison(available_lines[i]).strip() != normalize_line_for_comparison(added_line).strip():
387
- all_match = False
388
- break
389
-
390
- if all_match:
391
- logger.debug("Whitespace-only changes already applied")
392
- return True
393
360
 
394
- # Check for invisible Unicode characters
395
- if any('\u200B' in line or '\u200C' in line or '\u200D' in line or '\uFEFF' in line for line in added_lines):
396
- # This hunk contains invisible Unicode characters
397
- # Check if the file already has the content with or without the invisible characters
398
- if len(available_lines) >= len(added_lines):
399
- all_match = True
400
- for i, added_line in enumerate(added_lines):
401
- if i >= len(available_lines):
402
- all_match = False
403
- break
404
-
405
- # Normalize both lines to remove invisible characters
406
- normalized_file_line = normalize_unicode(available_lines[i])
407
- normalized_added_line = normalize_unicode(added_line)
408
-
409
- # Compare normalized content
410
- if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_added_line):
411
- all_match = False
412
- break
413
-
414
- if all_match:
415
- logger.debug("Content with invisible Unicode characters already applied (normalized)")
416
- return True
361
+ logger.debug("Content with invisible Unicode characters already applied (normalized)")
362
+ return True
363
+
364
+
365
+ def _check_escape_sequence_match(file_lines: List[str], expected_lines: List[str]) -> bool:
366
+ """Check if lines match when escape sequences are normalized."""
367
+ if not any('\\n' in line or '\\r' in line or '\\t' in line or '\\\\' in line for line in expected_lines):
368
+ return False
417
369
 
418
- # Check for escape sequences
419
- if any('\\n' in line or '\\r' in line or '\\t' in line or '\\\\' in line for line in added_lines):
420
- # This hunk contains escape sequences
421
- # Check if the file already has the content with properly handled escape sequences
422
- if len(available_lines) >= len(added_lines):
423
- all_match = True
424
- for i, added_line in enumerate(added_lines):
425
- if i >= len(available_lines):
426
- all_match = False
427
- break
428
-
429
- # Normalize both lines to handle escape sequences
430
- normalized_file_line = normalize_escape_sequences(available_lines[i])
431
- normalized_added_line = normalize_escape_sequences(added_line)
432
-
433
- # Compare normalized content
434
- if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_added_line):
435
- all_match = False
436
- break
437
-
438
- if all_match:
439
- logger.debug("Content with escape sequences already applied (normalized)")
440
- return True
441
-
442
- # Calculate overall similarity for fuzzy matching
443
- if len(available_lines) >= len(added_lines) and added_lines:
444
- # Normalize both sides for comparison
445
- normalized_available = [normalize_line_for_comparison(line) for line in available_lines[:len(added_lines)]]
446
- normalized_added = [normalize_line_for_comparison(line) for line in added_lines]
447
-
448
- similarity = calculate_block_similarity(normalized_available, normalized_added)
370
+ for file_line, expected_line in zip(file_lines, expected_lines):
371
+ normalized_file_line = normalize_escape_sequences(file_line)
372
+ normalized_expected_line = normalize_escape_sequences(expected_line)
449
373
 
450
- # Very high similarity suggests the changes are already applied
451
- if similarity >= 0.95:
452
- logger.debug(f"Very high similarity ({similarity:.2f}) suggests hunk already applied")
453
- return True
374
+ if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_expected_line):
375
+ return False
376
+
377
+ logger.debug("Content with escape sequences already applied (normalized)")
378
+ return True
379
+
380
+
381
+ def _lines_match_fuzzy(file_lines: List[str], expected_lines: List[str]) -> bool:
382
+ """Check if lines match using fuzzy matching."""
383
+ if not expected_lines:
384
+ return False
385
+
386
+ # Normalize both sides for comparison
387
+ normalized_file = [normalize_line_for_comparison(line) for line in file_lines]
388
+ normalized_expected = [normalize_line_for_comparison(line) for line in expected_lines]
389
+
390
+ similarity = calculate_block_similarity(normalized_file, normalized_expected)
391
+
392
+ # Very high similarity suggests the changes are already applied
393
+ if similarity >= 0.95:
394
+ logger.debug(f"Very high similarity ({similarity:.2f}) suggests hunk already applied")
395
+ return True
454
396
 
455
397
  return False
@@ -4,6 +4,7 @@ import time
4
4
  import signal
5
5
  from typing import List, Tuple, Dict, Any, Optional
6
6
  from app.utils.logging_utils import logger
7
+ import re
7
8
 
8
9
  from app.utils.file_utils import is_binary_file, is_document_file, is_processable_file, read_file_content
9
10
  from app.utils.logging_utils import logger
@@ -33,9 +34,16 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
33
34
  # Add additional exclude directories from environment variable if it exists
34
35
  additional_excludes = os.environ.get("ZIYA_ADDITIONAL_EXCLUDE_DIRS", "")
35
36
  if additional_excludes:
37
+ logger.info(f"Processing additional excludes: {additional_excludes}")
36
38
  for pattern in additional_excludes.split(','):
39
+ pattern = pattern.strip()
37
40
  if pattern:
38
41
  ignored_patterns.append((pattern, user_codebase_dir))
42
+ logger.info(f"Added exclude pattern: {pattern}")
43
+
44
+ logger.info(f"Total ignore patterns: {len(ignored_patterns)}")
45
+ for pattern, base in ignored_patterns:
46
+ logger.debug(f"Ignore pattern: {pattern} (base: {base})")
39
47
 
40
48
  def read_gitignore(path: str) -> List[Tuple[str, str]]:
41
49
  gitignore_patterns: List[Tuple[str, str]] = []
@@ -44,7 +52,14 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
44
52
  for line_number, line in enumerate(f, 1):
45
53
  line = line.strip()
46
54
  if line and not line.startswith("#"):
47
- gitignore_patterns.append((line, os.path.dirname(path)))
55
+ try:
56
+ # Test if the pattern would create a valid regex
57
+ from app.utils.gitignore_parser import rule_from_pattern
58
+ test_rule = rule_from_pattern(line, base_path=os.path.dirname(path))
59
+ if test_rule:
60
+ gitignore_patterns.append((line, os.path.dirname(path)))
61
+ except re.error as e:
62
+ logger.warning(f"Skipping invalid gitignore pattern '{line}' in {path}:{line_number}: {e}")
48
63
  except FileNotFoundError:
49
64
  logger.debug(f".gitignore not found at {path}")
50
65
  except Exception as e:
@@ -59,7 +74,15 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
59
74
  patterns.extend(read_gitignore(gitignore_path))
60
75
 
61
76
  for subdir in glob.glob(os.path.join(path, "*/")):
62
- patterns.extend(get_patterns_recursive(subdir))
77
+ # Skip directories with problematic characters that cause regex errors
78
+ dir_name = os.path.basename(subdir.rstrip('/'))
79
+ if '[' in dir_name or ']' in dir_name:
80
+ logger.debug(f"Skipping directory with brackets: {subdir}")
81
+ continue
82
+ try:
83
+ patterns.extend(get_patterns_recursive(subdir))
84
+ except re.error as e:
85
+ logger.warning(f"Skipping directory due to regex error: {subdir} - {e}")
63
86
 
64
87
  return patterns
65
88
 
@@ -77,7 +100,11 @@ def get_complete_file_list(user_codebase_dir: str, ignored_patterns: List[str],
77
100
  for pattern in included_relative_dirs:
78
101
  for root, dirs, files in os.walk(os.path.normpath(os.path.join(user_codebase_dir, pattern))):
79
102
  # Filter out ignored directories and hidden directories
80
- dirs[:] = [d for d in dirs if not should_ignore_fn(os.path.join(root, d)) and not d.startswith('.')]
103
+ # Also filter out symlinks to prevent following them into ignored directories
104
+ dirs[:] = [d for d in dirs
105
+ if not should_ignore_fn(os.path.join(root, d))
106
+ and not d.startswith('.')
107
+ and not os.path.islink(os.path.join(root, d))]
81
108
 
82
109
  for file in files:
83
110
  file_path = os.path.join(root, file)
@@ -216,10 +243,14 @@ def get_folder_structure(directory: str, ignored_patterns: List[Tuple[str, str]]
216
243
  continue
217
244
 
218
245
  entry_path = os.path.join(path, entry)
246
+
247
+ # Skip symlinks early to prevent following them into ignored directories
219
248
  if os.path.islink(entry_path): # Skip symlinks
249
+ logger.debug(f"Skipping symlink: {entry_path}")
220
250
  continue
221
251
 
222
252
  if should_ignore_fn(entry_path): # Skip ignored files
253
+ logger.debug(f"Ignoring path: {entry_path}")
223
254
  continue
224
255
 
225
256
  if os.path.isdir(entry_path):
@@ -182,12 +182,25 @@ def fnmatch_pathname_to_regex(
182
182
  res.append('\\[')
183
183
  else:
184
184
  stuff = pattern[i:j].replace('\\', '\\\\').replace('/', '')
185
- i = j + 1
186
- if stuff[0] == '!':
187
- stuff = ''.join(['^', stuff[1:]])
188
- elif stuff[0] == '^':
189
- stuff = ''.join('\\' + stuff)
190
- res.append('[{}]'.format(stuff))
185
+ # Validate character ranges to prevent regex errors
186
+ try:
187
+ # Test if the character class is valid by compiling a test regex
188
+ if stuff:
189
+ test_pattern = f'[{stuff}]'
190
+ re.compile(test_pattern)
191
+ # If we get here, the pattern is valid - process it normally
192
+ i = j + 1
193
+ if stuff[0] == '!':
194
+ stuff = ''.join(['^', stuff[1:]])
195
+ elif stuff[0] == '^':
196
+ stuff = ''.join('\\' + stuff)
197
+ res.append('[{}]'.format(stuff))
198
+ except re.error:
199
+ # If invalid, escape the original brackets
200
+ res.append('\\[')
201
+ res.append(re.escape(stuff))
202
+ res.append('\\]')
203
+ i = j + 1 # Still need to advance the position
191
204
  else:
192
205
  res.append(re.escape(c))
193
206
  if anchored:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ziya
3
- Version: 0.2.4
3
+ Version: 0.2.4.2
4
4
  Summary:
5
5
  Author: Vishnu Krishnaprasad
6
6
  Author-email: vishnukool@gmail.com
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.12
13
13
  Classifier: Programming Language :: Python :: 3.13
14
14
  Requires-Dist: PyPDF2 (>=3.0.1,<4.0.0)
15
15
  Requires-Dist: boto3 (>=1.34.88,<2.0.0)
16
+ Requires-Dist: cryptography (>=3.4.8,<43.0.0)
16
17
  Requires-Dist: cssutils (>=2.6.0)
17
18
  Requires-Dist: html5lib (>=1.1)
18
19
  Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
@@ -24,10 +25,12 @@ Requires-Dist: langchain-community (>=0.3.1,<0.4.0)
24
25
  Requires-Dist: langchain-google-genai (>=2.1.0,<3.0.0)
25
26
  Requires-Dist: langchainhub (>=0.1.15)
26
27
  Requires-Dist: langgraph (>=0.2,<0.3)
28
+ Requires-Dist: numpy (>=1.21.0,<2.0.0)
27
29
  Requires-Dist: openpyxl (>=3.1.2,<4.0.0)
28
- Requires-Dist: pandas (>=2.1.0,<3.0.0)
30
+ Requires-Dist: pandas (>=2.0.0,<2.3.0)
29
31
  Requires-Dist: patch-ng (>=1.17)
30
32
  Requires-Dist: pdfplumber (>=0.10.0,<0.11.0)
33
+ Requires-Dist: pyOpenSSL (>=20.0.0,<25.0.0)
31
34
  Requires-Dist: pydantic (>=2.9.2,<3.0.0)
32
35
  Requires-Dist: pydevd-pycharm (>=243.18137.19,<244.0.0)
33
36
  Requires-Dist: python-docx (>=1.1.0,<2.0.0)