ziya 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/main.py +2 -1
- app/server.py +11 -2
- app/templates/asset-manifest.json +17 -17
- app/templates/index.html +1 -1
- app/templates/static/js/14386.567bf803.chunk.js +2 -0
- app/templates/static/js/14386.567bf803.chunk.js.map +1 -0
- app/templates/static/js/94645.a352e47a.chunk.js +2 -0
- app/templates/static/js/94645.a352e47a.chunk.js.map +1 -0
- app/templates/static/js/98244.0b90f940.chunk.js +3 -0
- app/templates/static/js/98244.0b90f940.chunk.js.map +1 -0
- app/templates/static/js/99948.71670e91.chunk.js +2 -0
- app/templates/static/js/99948.71670e91.chunk.js.map +1 -0
- app/templates/static/js/{main.05ba4902.js → main.77e20f53.js} +3 -3
- app/templates/static/js/{main.05ba4902.js.map → main.77e20f53.js.map} +1 -1
- app/utils/aws_utils.py +48 -36
- app/utils/diff_utils/application/identical_blocks_handler.py +290 -0
- app/utils/diff_utils/application/patch_apply.py +248 -2
- app/utils/diff_utils/application/simple_identical_blocks_fix.py +129 -0
- app/utils/diff_utils/parsing/diff_parser.py +37 -13
- app/utils/diff_utils/pipeline/pipeline_manager.py +56 -3
- app/utils/diff_utils/validation/validators.py +201 -259
- app/utils/directory_util.py +34 -3
- app/utils/gitignore_parser.py +19 -6
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/METADATA +5 -2
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/RECORD +31 -29
- app/templates/static/js/14386.881399c5.chunk.js +0 -2
- app/templates/static/js/14386.881399c5.chunk.js.map +0 -1
- app/templates/static/js/19886.c4b3152d.chunk.js +0 -3
- app/templates/static/js/19886.c4b3152d.chunk.js.map +0 -1
- app/templates/static/js/94645.68d48e03.chunk.js +0 -2
- app/templates/static/js/94645.68d48e03.chunk.js.map +0 -1
- app/templates/static/js/99948.fdf17a82.chunk.js +0 -2
- app/templates/static/js/99948.fdf17a82.chunk.js.map +0 -1
- /app/templates/static/js/{19886.c4b3152d.chunk.js.LICENSE.txt → 98244.0b90f940.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{main.05ba4902.js.LICENSE.txt → main.77e20f53.js.LICENSE.txt} +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/LICENSE +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/WHEEL +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/entry_points.txt +0 -0
|
@@ -167,289 +167,231 @@ def is_hunk_already_applied(file_lines: List[str], hunk: Dict[str, Any], pos: in
|
|
|
167
167
|
|
|
168
168
|
# Extract the removed and added lines from the hunk
|
|
169
169
|
removed_lines, added_lines = extract_diff_changes(hunk)
|
|
170
|
+
new_lines = hunk.get('new_lines', [])
|
|
171
|
+
|
|
172
|
+
# Validate hunk header if present
|
|
173
|
+
if not _is_valid_hunk_header(hunk):
|
|
174
|
+
return False
|
|
175
|
+
|
|
176
|
+
# Handle no-op hunks
|
|
177
|
+
if not removed_lines and not added_lines:
|
|
178
|
+
logger.debug("No actual changes in hunk (no removed or added lines)")
|
|
179
|
+
return True
|
|
170
180
|
|
|
171
|
-
#
|
|
172
|
-
# If this is a pure addition (no lines removed), check if the exact content exists
|
|
181
|
+
# For pure additions, check if content already exists in file
|
|
173
182
|
if len(removed_lines) == 0 and len(added_lines) > 0:
|
|
174
|
-
|
|
175
|
-
# Check if the exact content exists anywhere in the file
|
|
176
|
-
added_content = "\n".join([normalize_line_for_comparison(line) for line in added_lines])
|
|
177
|
-
file_content = "\n".join([normalize_line_for_comparison(line) for line in file_lines])
|
|
178
|
-
|
|
179
|
-
# If the exact added content doesn't exist in the file, it's not already applied
|
|
180
|
-
if added_content not in file_content:
|
|
181
|
-
logger.debug(f"Pure addition not found in file content")
|
|
182
|
-
return False
|
|
183
|
+
return _check_pure_addition_already_applied(file_lines, added_lines)
|
|
183
184
|
|
|
184
|
-
#
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
if
|
|
197
|
-
|
|
198
|
-
similarity = difflib.SequenceMatcher(None,
|
|
199
|
-
"\n".join(normalized_file_slice),
|
|
200
|
-
"\n".join(normalized_removed_lines)).ratio()
|
|
201
|
-
logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
|
|
202
|
-
logger.debug(f"File content: {normalized_file_slice}")
|
|
203
|
-
logger.debug(f"Removed lines: {normalized_removed_lines}")
|
|
185
|
+
# For hunks with removals, validate that the content to be removed matches
|
|
186
|
+
if removed_lines and not _validate_removal_content(file_lines, removed_lines, pos):
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
# Check if the expected result (new_lines) is already present at this position
|
|
190
|
+
return _check_expected_content_match(file_lines, new_lines, pos, ignore_whitespace)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _is_valid_hunk_header(hunk: Dict[str, Any]) -> bool:
|
|
194
|
+
"""Check if the hunk header is valid."""
|
|
195
|
+
if 'header' in hunk and '@@ -' in hunk['header']:
|
|
196
|
+
header_match = re.match(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', hunk['header'])
|
|
197
|
+
if not header_match:
|
|
198
|
+
logger.warning(f"Malformed hunk header: {hunk['header']}")
|
|
204
199
|
return False
|
|
200
|
+
return True
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _check_pure_addition_already_applied(file_lines: List[str], added_lines: List[str]) -> bool:
|
|
204
|
+
"""Check if a pure addition (no removals) is already applied."""
|
|
205
|
+
# Check if the exact content exists anywhere in the file
|
|
206
|
+
added_content = "\n".join([normalize_line_for_comparison(line) for line in added_lines])
|
|
207
|
+
file_content = "\n".join([normalize_line_for_comparison(line) for line in file_lines])
|
|
208
|
+
|
|
209
|
+
if added_content not in file_content:
|
|
210
|
+
logger.debug("Pure addition not found in file content")
|
|
211
|
+
return False
|
|
205
212
|
|
|
206
|
-
#
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
213
|
+
# Check for duplicate declarations
|
|
214
|
+
return _check_duplicate_declarations(file_lines, added_lines)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _check_duplicate_declarations(file_lines: List[str], added_lines: List[str]) -> bool:
|
|
218
|
+
"""Check if added lines contain declarations that already exist in the file."""
|
|
219
|
+
declaration_patterns = [
|
|
220
|
+
r'const\s+\w+\s*=', # const x =
|
|
221
|
+
r'let\s+\w+\s*=', # let x =
|
|
222
|
+
r'var\s+\w+\s*=', # var x =
|
|
223
|
+
r'function\s+\w+\s*\(', # function x(
|
|
224
|
+
r'class\s+\w+\s*{', # class x {
|
|
225
|
+
r'interface\s+\w+\s*{', # interface x {
|
|
226
|
+
r'type\s+\w+\s*=', # type x =
|
|
227
|
+
r'enum\s+\w+\s*{', # enum x {
|
|
228
|
+
]
|
|
229
|
+
|
|
230
|
+
for added_line in added_lines:
|
|
231
|
+
for pattern in declaration_patterns:
|
|
232
|
+
match = re.search(pattern, added_line)
|
|
233
|
+
if match:
|
|
234
|
+
# Extract declaration name
|
|
235
|
+
declaration_name = None
|
|
236
|
+
for m in re.finditer(r'\b(\w+)\b', added_line[match.start():]):
|
|
237
|
+
if m.group(1) not in ['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum']:
|
|
238
|
+
declaration_name = m.group(1)
|
|
239
|
+
break
|
|
240
|
+
|
|
241
|
+
if declaration_name:
|
|
242
|
+
# Check if this declaration already exists elsewhere in the file
|
|
243
|
+
for line in file_lines:
|
|
244
|
+
if declaration_name in line:
|
|
245
|
+
for p in declaration_patterns:
|
|
246
|
+
if re.search(p + r'.*\b' + re.escape(declaration_name) + r'\b', line):
|
|
247
|
+
logger.debug(f"Found duplicate declaration of '{declaration_name}'")
|
|
248
|
+
return True
|
|
249
|
+
return False
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _validate_removal_content(file_lines: List[str], removed_lines: List[str], pos: int) -> bool:
|
|
253
|
+
"""Validate that the content to be removed matches what's in the file."""
|
|
254
|
+
if pos + len(removed_lines) > len(file_lines):
|
|
255
|
+
return False
|
|
256
|
+
|
|
257
|
+
file_slice = file_lines[pos:pos+len(removed_lines)]
|
|
258
|
+
normalized_file_slice = [normalize_line_for_comparison(line) for line in file_slice]
|
|
259
|
+
normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
|
|
260
|
+
|
|
261
|
+
if normalized_file_slice != normalized_removed_lines:
|
|
262
|
+
similarity = difflib.SequenceMatcher(None,
|
|
263
|
+
"\n".join(normalized_file_slice),
|
|
264
|
+
"\n".join(normalized_removed_lines)).ratio()
|
|
265
|
+
logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
|
|
266
|
+
logger.debug(f"File content: {normalized_file_slice}")
|
|
267
|
+
logger.debug(f"Removed lines: {normalized_removed_lines}")
|
|
268
|
+
return False
|
|
225
269
|
|
|
226
|
-
|
|
270
|
+
return True
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def _check_expected_content_match(file_lines: List[str], new_lines: List[str], pos: int, ignore_whitespace: bool) -> bool:
|
|
274
|
+
"""Check if the expected content after applying the hunk is already present."""
|
|
227
275
|
if pos + len(new_lines) > len(file_lines):
|
|
228
276
|
logger.debug(f"Not enough lines to compare at position {pos}")
|
|
229
277
|
return False
|
|
230
278
|
|
|
231
|
-
# Extract the file content at the position
|
|
232
279
|
file_slice = file_lines[pos:pos+len(new_lines)]
|
|
233
280
|
|
|
234
|
-
#
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
logger.debug(f" File: {repr(file_line)}")
|
|
239
|
-
logger.debug(f" Expected: {repr(new_line)}")
|
|
240
|
-
return False
|
|
241
|
-
|
|
242
|
-
logger.debug(f"Hunk already applied at position {pos}")
|
|
243
|
-
return True
|
|
281
|
+
# Try exact match first
|
|
282
|
+
if _lines_match_exactly(file_slice, new_lines):
|
|
283
|
+
logger.debug(f"Exact match of expected content found at position {pos}")
|
|
284
|
+
return True
|
|
244
285
|
|
|
245
|
-
#
|
|
286
|
+
# Try with various normalizations
|
|
287
|
+
if _lines_match_with_normalization(file_slice, new_lines, ignore_whitespace):
|
|
288
|
+
return True
|
|
246
289
|
|
|
247
|
-
#
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
normalized_removed_lines = [normalize_line_for_comparison(line) for line in removed_lines]
|
|
256
|
-
|
|
257
|
-
# If the file content doesn't match what we're trying to remove,
|
|
258
|
-
# then this hunk can't be already applied here
|
|
259
|
-
if normalized_file_slice != normalized_removed_lines:
|
|
260
|
-
# Calculate similarity to help with debugging
|
|
261
|
-
similarity = difflib.SequenceMatcher(None,
|
|
262
|
-
"\n".join(normalized_file_slice),
|
|
263
|
-
"\n".join(normalized_removed_lines)).ratio()
|
|
264
|
-
logger.debug(f"File content doesn't match what we're trying to remove at position {pos} (similarity: {similarity:.2f})")
|
|
265
|
-
logger.debug(f"File content: {normalized_file_slice}")
|
|
266
|
-
logger.debug(f"Removed lines: {normalized_removed_lines}")
|
|
267
|
-
return False
|
|
268
|
-
|
|
269
|
-
# 2. Check if the diff header is malformed
|
|
270
|
-
if 'header' in hunk and '@@ -' in hunk['header']:
|
|
271
|
-
# Check if the header has proper line numbers
|
|
272
|
-
header_match = re.match(r'^@@ -(\d+),(\d+) \+(\d+),(\d+) @@', hunk['header'])
|
|
273
|
-
if not header_match:
|
|
274
|
-
logger.warning(f"Malformed hunk header: {hunk['header']}")
|
|
275
|
-
# Don't mark hunks with malformed headers as already applied
|
|
290
|
+
# Try fuzzy matching as last resort
|
|
291
|
+
return _lines_match_fuzzy(file_slice, new_lines)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _lines_match_exactly(file_lines: List[str], expected_lines: List[str]) -> bool:
|
|
295
|
+
"""Check if lines match exactly."""
|
|
296
|
+
for file_line, expected_line in zip(file_lines, expected_lines):
|
|
297
|
+
if normalize_line_for_comparison(file_line) != normalize_line_for_comparison(expected_line):
|
|
276
298
|
return False
|
|
299
|
+
return True
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def _lines_match_with_normalization(file_lines: List[str], expected_lines: List[str], ignore_whitespace: bool) -> bool:
|
|
303
|
+
"""Check if lines match with various normalizations applied."""
|
|
304
|
+
# Check for whitespace-only changes
|
|
305
|
+
if _check_whitespace_only_changes(file_lines, expected_lines, ignore_whitespace):
|
|
306
|
+
return True
|
|
277
307
|
|
|
278
|
-
#
|
|
279
|
-
if
|
|
280
|
-
|
|
308
|
+
# Check for invisible Unicode characters
|
|
309
|
+
if _check_invisible_unicode_match(file_lines, expected_lines):
|
|
310
|
+
return True
|
|
311
|
+
|
|
312
|
+
# Check for escape sequences
|
|
313
|
+
if _check_escape_sequence_match(file_lines, expected_lines):
|
|
281
314
|
return True
|
|
282
315
|
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
316
|
+
return False
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _check_whitespace_only_changes(file_lines: List[str], expected_lines: List[str], ignore_whitespace: bool) -> bool:
|
|
320
|
+
"""Check if the differences are only in whitespace."""
|
|
321
|
+
if len(file_lines) != len(expected_lines):
|
|
322
|
+
return False
|
|
323
|
+
|
|
324
|
+
# Check if content is the same ignoring whitespace
|
|
325
|
+
whitespace_only = True
|
|
326
|
+
for file_line, expected_line in zip(file_lines, expected_lines):
|
|
327
|
+
if normalize_line_for_comparison(file_line).strip() != normalize_line_for_comparison(expected_line).strip():
|
|
328
|
+
whitespace_only = False
|
|
329
|
+
break
|
|
330
|
+
|
|
331
|
+
if not whitespace_only:
|
|
286
332
|
return False
|
|
287
333
|
|
|
288
|
-
#
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
334
|
+
# For whitespace-only changes, check exact match based on ignore_whitespace setting
|
|
335
|
+
for file_line, expected_line in zip(file_lines, expected_lines):
|
|
336
|
+
if not ignore_whitespace:
|
|
337
|
+
if file_line.rstrip('\r\n') != expected_line.rstrip('\r\n'):
|
|
338
|
+
# Try normalizing invisible characters
|
|
339
|
+
if normalize_unicode(file_line.rstrip('\r\n')) != normalize_unicode(expected_line.rstrip('\r\n')):
|
|
340
|
+
return False
|
|
341
|
+
else:
|
|
342
|
+
if normalize_line_for_comparison(file_line).strip() != normalize_line_for_comparison(expected_line).strip():
|
|
343
|
+
return False
|
|
344
|
+
|
|
345
|
+
logger.debug("Whitespace-only changes already applied")
|
|
346
|
+
return True
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _check_invisible_unicode_match(file_lines: List[str], expected_lines: List[str]) -> bool:
|
|
350
|
+
"""Check if lines match when invisible Unicode characters are normalized."""
|
|
351
|
+
if not any('\u200B' in line or '\u200C' in line or '\u200D' in line or '\uFEFF' in line for line in expected_lines):
|
|
352
|
+
return False
|
|
353
|
+
|
|
354
|
+
for file_line, expected_line in zip(file_lines, expected_lines):
|
|
355
|
+
normalized_file_line = normalize_unicode(file_line)
|
|
356
|
+
normalized_expected_line = normalize_unicode(expected_line)
|
|
292
357
|
|
|
293
|
-
|
|
294
|
-
if not expected_lines:
|
|
295
|
-
# If it's a pure deletion, this check isn't sufficient.
|
|
296
|
-
# For now, assume if new_lines is empty, it's not "already applied" in the sense of content matching.
|
|
297
|
-
logger.debug("Hunk results in empty content (deletion), cannot match based on new_lines.")
|
|
358
|
+
if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_expected_line):
|
|
298
359
|
return False
|
|
299
|
-
|
|
300
|
-
# Check if the available lines match the expected lines
|
|
301
|
-
if len(available_lines) >= len(expected_lines):
|
|
302
|
-
# Now check for exact match of expected content
|
|
303
|
-
exact_match = True
|
|
304
|
-
for i, expected_line in enumerate(expected_lines):
|
|
305
|
-
if i >= len(available_lines):
|
|
306
|
-
exact_match = False
|
|
307
|
-
break
|
|
308
|
-
|
|
309
|
-
# Use our enhanced normalization for better comparison
|
|
310
|
-
normalized_file_line = normalize_line_for_comparison(available_lines[i])
|
|
311
|
-
normalized_expected_line = normalize_line_for_comparison(expected_line)
|
|
312
|
-
|
|
313
|
-
if normalized_file_line != normalized_expected_line:
|
|
314
|
-
# If normalized versions don't match, it's definitely not applied
|
|
315
|
-
exact_match = False
|
|
316
|
-
break
|
|
317
|
-
|
|
318
|
-
if exact_match:
|
|
319
|
-
logger.debug(f"Exact match of expected content found at position {pos}")
|
|
320
|
-
return True
|
|
321
|
-
|
|
322
|
-
# CRITICAL FIX: Check for duplicate declarations
|
|
323
|
-
# This is a language-agnostic approach that looks for patterns like duplicate variable declarations
|
|
324
|
-
if added_lines:
|
|
325
|
-
# Look for patterns that might indicate declarations
|
|
326
|
-
declaration_patterns = [
|
|
327
|
-
r'const\s+\w+\s*=', # const x =
|
|
328
|
-
r'let\s+\w+\s*=', # let x =
|
|
329
|
-
r'var\s+\w+\s*=', # var x =
|
|
330
|
-
r'function\s+\w+\s*\(', # function x(
|
|
331
|
-
r'class\s+\w+\s*{', # class x {
|
|
332
|
-
r'interface\s+\w+\s*{', # interface x {
|
|
333
|
-
r'type\s+\w+\s*=', # type x =
|
|
334
|
-
r'enum\s+\w+\s*{', # enum x {
|
|
335
|
-
]
|
|
336
|
-
|
|
337
|
-
# Check if any added line matches a declaration pattern
|
|
338
|
-
for added_line in added_lines:
|
|
339
|
-
for pattern in declaration_patterns:
|
|
340
|
-
match = re.search(pattern, added_line)
|
|
341
|
-
if match:
|
|
342
|
-
# Found a potential declaration, check if it already exists elsewhere in the file
|
|
343
|
-
declaration_name = None
|
|
344
|
-
for m in re.finditer(r'\b(\w+)\b', added_line[match.start():]):
|
|
345
|
-
if m.group(1) not in ['const', 'let', 'var', 'function', 'class', 'interface', 'type', 'enum']:
|
|
346
|
-
declaration_name = m.group(1)
|
|
347
|
-
break
|
|
348
|
-
|
|
349
|
-
if declaration_name:
|
|
350
|
-
# Check if this declaration already exists elsewhere in the file
|
|
351
|
-
for i, line in enumerate(file_lines):
|
|
352
|
-
if i != pos and declaration_name in line:
|
|
353
|
-
for p in declaration_patterns:
|
|
354
|
-
if re.search(p + r'.*\b' + re.escape(declaration_name) + r'\b', line):
|
|
355
|
-
logger.debug(f"Found duplicate declaration of '{declaration_name}' at line {i}")
|
|
356
|
-
# This declaration already exists elsewhere, so this hunk might be already applied
|
|
357
|
-
return True
|
|
358
|
-
|
|
359
|
-
# Check if this is a whitespace-only change
|
|
360
|
-
if len(removed_lines) == len(added_lines):
|
|
361
|
-
whitespace_only = True
|
|
362
|
-
for removed, added in zip(removed_lines, added_lines):
|
|
363
|
-
# Compare non-whitespace content
|
|
364
|
-
if normalize_line_for_comparison(removed).strip() != normalize_line_for_comparison(added).strip():
|
|
365
|
-
whitespace_only = False
|
|
366
|
-
break
|
|
367
|
-
|
|
368
|
-
if whitespace_only and removed_lines: # Only if there are actual changes
|
|
369
|
-
# For whitespace-only changes, check if the file already has the correct whitespace
|
|
370
|
-
if len(available_lines) >= len(added_lines):
|
|
371
|
-
all_match = True
|
|
372
|
-
for i, added_line in enumerate(added_lines):
|
|
373
|
-
if i >= len(available_lines):
|
|
374
|
-
all_match = False
|
|
375
|
-
break
|
|
376
|
-
|
|
377
|
-
# Compare with exact whitespace if not ignoring whitespace
|
|
378
|
-
if not ignore_whitespace:
|
|
379
|
-
if available_lines[i].rstrip('\r\n') != added_line.rstrip('\r\n'):
|
|
380
|
-
# Try normalizing invisible characters
|
|
381
|
-
if normalize_unicode(available_lines[i].rstrip('\r\n')) != normalize_unicode(added_line.rstrip('\r\n')):
|
|
382
|
-
all_match = False
|
|
383
|
-
break
|
|
384
|
-
else:
|
|
385
|
-
# Compare ignoring whitespace
|
|
386
|
-
if normalize_line_for_comparison(available_lines[i]).strip() != normalize_line_for_comparison(added_line).strip():
|
|
387
|
-
all_match = False
|
|
388
|
-
break
|
|
389
|
-
|
|
390
|
-
if all_match:
|
|
391
|
-
logger.debug("Whitespace-only changes already applied")
|
|
392
|
-
return True
|
|
393
360
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
all_match = False
|
|
403
|
-
break
|
|
404
|
-
|
|
405
|
-
# Normalize both lines to remove invisible characters
|
|
406
|
-
normalized_file_line = normalize_unicode(available_lines[i])
|
|
407
|
-
normalized_added_line = normalize_unicode(added_line)
|
|
408
|
-
|
|
409
|
-
# Compare normalized content
|
|
410
|
-
if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_added_line):
|
|
411
|
-
all_match = False
|
|
412
|
-
break
|
|
413
|
-
|
|
414
|
-
if all_match:
|
|
415
|
-
logger.debug("Content with invisible Unicode characters already applied (normalized)")
|
|
416
|
-
return True
|
|
361
|
+
logger.debug("Content with invisible Unicode characters already applied (normalized)")
|
|
362
|
+
return True
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _check_escape_sequence_match(file_lines: List[str], expected_lines: List[str]) -> bool:
|
|
366
|
+
"""Check if lines match when escape sequences are normalized."""
|
|
367
|
+
if not any('\\n' in line or '\\r' in line or '\\t' in line or '\\\\' in line for line in expected_lines):
|
|
368
|
+
return False
|
|
417
369
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
# Check if the file already has the content with properly handled escape sequences
|
|
422
|
-
if len(available_lines) >= len(added_lines):
|
|
423
|
-
all_match = True
|
|
424
|
-
for i, added_line in enumerate(added_lines):
|
|
425
|
-
if i >= len(available_lines):
|
|
426
|
-
all_match = False
|
|
427
|
-
break
|
|
428
|
-
|
|
429
|
-
# Normalize both lines to handle escape sequences
|
|
430
|
-
normalized_file_line = normalize_escape_sequences(available_lines[i])
|
|
431
|
-
normalized_added_line = normalize_escape_sequences(added_line)
|
|
432
|
-
|
|
433
|
-
# Compare normalized content
|
|
434
|
-
if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_added_line):
|
|
435
|
-
all_match = False
|
|
436
|
-
break
|
|
437
|
-
|
|
438
|
-
if all_match:
|
|
439
|
-
logger.debug("Content with escape sequences already applied (normalized)")
|
|
440
|
-
return True
|
|
441
|
-
|
|
442
|
-
# Calculate overall similarity for fuzzy matching
|
|
443
|
-
if len(available_lines) >= len(added_lines) and added_lines:
|
|
444
|
-
# Normalize both sides for comparison
|
|
445
|
-
normalized_available = [normalize_line_for_comparison(line) for line in available_lines[:len(added_lines)]]
|
|
446
|
-
normalized_added = [normalize_line_for_comparison(line) for line in added_lines]
|
|
447
|
-
|
|
448
|
-
similarity = calculate_block_similarity(normalized_available, normalized_added)
|
|
370
|
+
for file_line, expected_line in zip(file_lines, expected_lines):
|
|
371
|
+
normalized_file_line = normalize_escape_sequences(file_line)
|
|
372
|
+
normalized_expected_line = normalize_escape_sequences(expected_line)
|
|
449
373
|
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
374
|
+
if normalize_line_for_comparison(normalized_file_line) != normalize_line_for_comparison(normalized_expected_line):
|
|
375
|
+
return False
|
|
376
|
+
|
|
377
|
+
logger.debug("Content with escape sequences already applied (normalized)")
|
|
378
|
+
return True
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _lines_match_fuzzy(file_lines: List[str], expected_lines: List[str]) -> bool:
|
|
382
|
+
"""Check if lines match using fuzzy matching."""
|
|
383
|
+
if not expected_lines:
|
|
384
|
+
return False
|
|
385
|
+
|
|
386
|
+
# Normalize both sides for comparison
|
|
387
|
+
normalized_file = [normalize_line_for_comparison(line) for line in file_lines]
|
|
388
|
+
normalized_expected = [normalize_line_for_comparison(line) for line in expected_lines]
|
|
389
|
+
|
|
390
|
+
similarity = calculate_block_similarity(normalized_file, normalized_expected)
|
|
391
|
+
|
|
392
|
+
# Very high similarity suggests the changes are already applied
|
|
393
|
+
if similarity >= 0.95:
|
|
394
|
+
logger.debug(f"Very high similarity ({similarity:.2f}) suggests hunk already applied")
|
|
395
|
+
return True
|
|
454
396
|
|
|
455
397
|
return False
|
app/utils/directory_util.py
CHANGED
|
@@ -4,6 +4,7 @@ import time
|
|
|
4
4
|
import signal
|
|
5
5
|
from typing import List, Tuple, Dict, Any, Optional
|
|
6
6
|
from app.utils.logging_utils import logger
|
|
7
|
+
import re
|
|
7
8
|
|
|
8
9
|
from app.utils.file_utils import is_binary_file, is_document_file, is_processable_file, read_file_content
|
|
9
10
|
from app.utils.logging_utils import logger
|
|
@@ -33,9 +34,16 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
|
|
|
33
34
|
# Add additional exclude directories from environment variable if it exists
|
|
34
35
|
additional_excludes = os.environ.get("ZIYA_ADDITIONAL_EXCLUDE_DIRS", "")
|
|
35
36
|
if additional_excludes:
|
|
37
|
+
logger.info(f"Processing additional excludes: {additional_excludes}")
|
|
36
38
|
for pattern in additional_excludes.split(','):
|
|
39
|
+
pattern = pattern.strip()
|
|
37
40
|
if pattern:
|
|
38
41
|
ignored_patterns.append((pattern, user_codebase_dir))
|
|
42
|
+
logger.info(f"Added exclude pattern: {pattern}")
|
|
43
|
+
|
|
44
|
+
logger.info(f"Total ignore patterns: {len(ignored_patterns)}")
|
|
45
|
+
for pattern, base in ignored_patterns:
|
|
46
|
+
logger.debug(f"Ignore pattern: {pattern} (base: {base})")
|
|
39
47
|
|
|
40
48
|
def read_gitignore(path: str) -> List[Tuple[str, str]]:
|
|
41
49
|
gitignore_patterns: List[Tuple[str, str]] = []
|
|
@@ -44,7 +52,14 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
|
|
|
44
52
|
for line_number, line in enumerate(f, 1):
|
|
45
53
|
line = line.strip()
|
|
46
54
|
if line and not line.startswith("#"):
|
|
47
|
-
|
|
55
|
+
try:
|
|
56
|
+
# Test if the pattern would create a valid regex
|
|
57
|
+
from app.utils.gitignore_parser import rule_from_pattern
|
|
58
|
+
test_rule = rule_from_pattern(line, base_path=os.path.dirname(path))
|
|
59
|
+
if test_rule:
|
|
60
|
+
gitignore_patterns.append((line, os.path.dirname(path)))
|
|
61
|
+
except re.error as e:
|
|
62
|
+
logger.warning(f"Skipping invalid gitignore pattern '{line}' in {path}:{line_number}: {e}")
|
|
48
63
|
except FileNotFoundError:
|
|
49
64
|
logger.debug(f".gitignore not found at {path}")
|
|
50
65
|
except Exception as e:
|
|
@@ -59,7 +74,15 @@ def get_ignored_patterns(directory: str) -> List[Tuple[str, str]]:
|
|
|
59
74
|
patterns.extend(read_gitignore(gitignore_path))
|
|
60
75
|
|
|
61
76
|
for subdir in glob.glob(os.path.join(path, "*/")):
|
|
62
|
-
|
|
77
|
+
# Skip directories with problematic characters that cause regex errors
|
|
78
|
+
dir_name = os.path.basename(subdir.rstrip('/'))
|
|
79
|
+
if '[' in dir_name or ']' in dir_name:
|
|
80
|
+
logger.debug(f"Skipping directory with brackets: {subdir}")
|
|
81
|
+
continue
|
|
82
|
+
try:
|
|
83
|
+
patterns.extend(get_patterns_recursive(subdir))
|
|
84
|
+
except re.error as e:
|
|
85
|
+
logger.warning(f"Skipping directory due to regex error: {subdir} - {e}")
|
|
63
86
|
|
|
64
87
|
return patterns
|
|
65
88
|
|
|
@@ -77,7 +100,11 @@ def get_complete_file_list(user_codebase_dir: str, ignored_patterns: List[str],
|
|
|
77
100
|
for pattern in included_relative_dirs:
|
|
78
101
|
for root, dirs, files in os.walk(os.path.normpath(os.path.join(user_codebase_dir, pattern))):
|
|
79
102
|
# Filter out ignored directories and hidden directories
|
|
80
|
-
|
|
103
|
+
# Also filter out symlinks to prevent following them into ignored directories
|
|
104
|
+
dirs[:] = [d for d in dirs
|
|
105
|
+
if not should_ignore_fn(os.path.join(root, d))
|
|
106
|
+
and not d.startswith('.')
|
|
107
|
+
and not os.path.islink(os.path.join(root, d))]
|
|
81
108
|
|
|
82
109
|
for file in files:
|
|
83
110
|
file_path = os.path.join(root, file)
|
|
@@ -216,10 +243,14 @@ def get_folder_structure(directory: str, ignored_patterns: List[Tuple[str, str]]
|
|
|
216
243
|
continue
|
|
217
244
|
|
|
218
245
|
entry_path = os.path.join(path, entry)
|
|
246
|
+
|
|
247
|
+
# Skip symlinks early to prevent following them into ignored directories
|
|
219
248
|
if os.path.islink(entry_path): # Skip symlinks
|
|
249
|
+
logger.debug(f"Skipping symlink: {entry_path}")
|
|
220
250
|
continue
|
|
221
251
|
|
|
222
252
|
if should_ignore_fn(entry_path): # Skip ignored files
|
|
253
|
+
logger.debug(f"Ignoring path: {entry_path}")
|
|
223
254
|
continue
|
|
224
255
|
|
|
225
256
|
if os.path.isdir(entry_path):
|
app/utils/gitignore_parser.py
CHANGED
|
@@ -182,12 +182,25 @@ def fnmatch_pathname_to_regex(
|
|
|
182
182
|
res.append('\\[')
|
|
183
183
|
else:
|
|
184
184
|
stuff = pattern[i:j].replace('\\', '\\\\').replace('/', '')
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
185
|
+
# Validate character ranges to prevent regex errors
|
|
186
|
+
try:
|
|
187
|
+
# Test if the character class is valid by compiling a test regex
|
|
188
|
+
if stuff:
|
|
189
|
+
test_pattern = f'[{stuff}]'
|
|
190
|
+
re.compile(test_pattern)
|
|
191
|
+
# If we get here, the pattern is valid - process it normally
|
|
192
|
+
i = j + 1
|
|
193
|
+
if stuff[0] == '!':
|
|
194
|
+
stuff = ''.join(['^', stuff[1:]])
|
|
195
|
+
elif stuff[0] == '^':
|
|
196
|
+
stuff = ''.join('\\' + stuff)
|
|
197
|
+
res.append('[{}]'.format(stuff))
|
|
198
|
+
except re.error:
|
|
199
|
+
# If invalid, escape the original brackets
|
|
200
|
+
res.append('\\[')
|
|
201
|
+
res.append(re.escape(stuff))
|
|
202
|
+
res.append('\\]')
|
|
203
|
+
i = j + 1 # Still need to advance the position
|
|
191
204
|
else:
|
|
192
205
|
res.append(re.escape(c))
|
|
193
206
|
if anchored:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ziya
|
|
3
|
-
Version: 0.2.4
|
|
3
|
+
Version: 0.2.4.2
|
|
4
4
|
Summary:
|
|
5
5
|
Author: Vishnu Krishnaprasad
|
|
6
6
|
Author-email: vishnukool@gmail.com
|
|
@@ -13,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
14
|
Requires-Dist: PyPDF2 (>=3.0.1,<4.0.0)
|
|
15
15
|
Requires-Dist: boto3 (>=1.34.88,<2.0.0)
|
|
16
|
+
Requires-Dist: cryptography (>=3.4.8,<43.0.0)
|
|
16
17
|
Requires-Dist: cssutils (>=2.6.0)
|
|
17
18
|
Requires-Dist: html5lib (>=1.1)
|
|
18
19
|
Requires-Dist: jinja2 (>=3.1.3,<4.0.0)
|
|
@@ -24,10 +25,12 @@ Requires-Dist: langchain-community (>=0.3.1,<0.4.0)
|
|
|
24
25
|
Requires-Dist: langchain-google-genai (>=2.1.0,<3.0.0)
|
|
25
26
|
Requires-Dist: langchainhub (>=0.1.15)
|
|
26
27
|
Requires-Dist: langgraph (>=0.2,<0.3)
|
|
28
|
+
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
|
27
29
|
Requires-Dist: openpyxl (>=3.1.2,<4.0.0)
|
|
28
|
-
Requires-Dist: pandas (>=2.
|
|
30
|
+
Requires-Dist: pandas (>=2.0.0,<2.3.0)
|
|
29
31
|
Requires-Dist: patch-ng (>=1.17)
|
|
30
32
|
Requires-Dist: pdfplumber (>=0.10.0,<0.11.0)
|
|
33
|
+
Requires-Dist: pyOpenSSL (>=20.0.0,<25.0.0)
|
|
31
34
|
Requires-Dist: pydantic (>=2.9.2,<3.0.0)
|
|
32
35
|
Requires-Dist: pydevd-pycharm (>=243.18137.19,<244.0.0)
|
|
33
36
|
Requires-Dist: python-docx (>=1.1.0,<2.0.0)
|