ziya 0.2.4__py3-none-any.whl → 0.2.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ziya might be problematic. Click here for more details.
- app/main.py +2 -1
- app/server.py +11 -2
- app/templates/asset-manifest.json +17 -17
- app/templates/index.html +1 -1
- app/templates/static/js/14386.567bf803.chunk.js +2 -0
- app/templates/static/js/14386.567bf803.chunk.js.map +1 -0
- app/templates/static/js/94645.a352e47a.chunk.js +2 -0
- app/templates/static/js/94645.a352e47a.chunk.js.map +1 -0
- app/templates/static/js/98244.0b90f940.chunk.js +3 -0
- app/templates/static/js/98244.0b90f940.chunk.js.map +1 -0
- app/templates/static/js/99948.71670e91.chunk.js +2 -0
- app/templates/static/js/99948.71670e91.chunk.js.map +1 -0
- app/templates/static/js/{main.05ba4902.js → main.77e20f53.js} +3 -3
- app/templates/static/js/{main.05ba4902.js.map → main.77e20f53.js.map} +1 -1
- app/utils/aws_utils.py +48 -36
- app/utils/diff_utils/application/identical_blocks_handler.py +290 -0
- app/utils/diff_utils/application/patch_apply.py +248 -2
- app/utils/diff_utils/application/simple_identical_blocks_fix.py +129 -0
- app/utils/diff_utils/parsing/diff_parser.py +37 -13
- app/utils/diff_utils/pipeline/pipeline_manager.py +56 -3
- app/utils/diff_utils/validation/validators.py +201 -259
- app/utils/directory_util.py +34 -3
- app/utils/gitignore_parser.py +19 -6
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/METADATA +5 -2
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/RECORD +31 -29
- app/templates/static/js/14386.881399c5.chunk.js +0 -2
- app/templates/static/js/14386.881399c5.chunk.js.map +0 -1
- app/templates/static/js/19886.c4b3152d.chunk.js +0 -3
- app/templates/static/js/19886.c4b3152d.chunk.js.map +0 -1
- app/templates/static/js/94645.68d48e03.chunk.js +0 -2
- app/templates/static/js/94645.68d48e03.chunk.js.map +0 -1
- app/templates/static/js/99948.fdf17a82.chunk.js +0 -2
- app/templates/static/js/99948.fdf17a82.chunk.js.map +0 -1
- /app/templates/static/js/{19886.c4b3152d.chunk.js.LICENSE.txt → 98244.0b90f940.chunk.js.LICENSE.txt} +0 -0
- /app/templates/static/js/{main.05ba4902.js.LICENSE.txt → main.77e20f53.js.LICENSE.txt} +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/LICENSE +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/WHEEL +0 -0
- {ziya-0.2.4.dist-info → ziya-0.2.4.2.dist-info}/entry_points.txt +0 -0
app/utils/aws_utils.py
CHANGED
|
@@ -133,6 +133,24 @@ def check_aws_credentials(is_server_startup=True, profile_name=None):
|
|
|
133
133
|
return False, error_msg
|
|
134
134
|
except Exception as e:
|
|
135
135
|
logger.error(f"AWS credentials check failed: {e}")
|
|
136
|
+
|
|
137
|
+
import os
|
|
138
|
+
# First, check if we have any AWS credentials at all
|
|
139
|
+
has_any_credentials = (
|
|
140
|
+
os.environ.get("AWS_ACCESS_KEY_ID") or
|
|
141
|
+
os.environ.get("AWS_SECRET_ACCESS_KEY") or
|
|
142
|
+
os.environ.get("AWS_SESSION_TOKEN") or
|
|
143
|
+
os.path.exists(os.path.expanduser("~/.aws/credentials")) or
|
|
144
|
+
os.path.exists(os.path.expanduser("~/.aws/config"))
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if not has_any_credentials:
|
|
148
|
+
return False, """⚠️ AWS CREDENTIALS ERROR: No AWS credentials found.
|
|
149
|
+
|
|
150
|
+
Please set up your AWS credentials using one of these methods:
|
|
151
|
+
1. Run 'aws configure' to set up credentials
|
|
152
|
+
2. Set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment variables
|
|
153
|
+
3. For Amazon internal users, run 'mwinit' to get temporary credentials"""
|
|
136
154
|
|
|
137
155
|
# Create a more user-friendly error message
|
|
138
156
|
error_msg = str(e)
|
|
@@ -179,30 +197,18 @@ Please run the following command to refresh your credentials:
|
|
|
179
197
|
Then try your query again."""
|
|
180
198
|
except Exception as cli_error:
|
|
181
199
|
logger.debug(f"Failed to get detailed error from AWS CLI: {cli_error}")
|
|
182
|
-
|
|
183
200
|
# If we couldn't get a better message from the CLI, use our standard error handling
|
|
184
201
|
# For Amazon internal users, we should always suggest mwinit for credential errors
|
|
185
202
|
# Check if this is an Amazon internal environment first
|
|
186
|
-
is_amazon_internal =
|
|
187
|
-
try:
|
|
188
|
-
# Check for common Amazon internal environment indicators
|
|
189
|
-
import os
|
|
190
|
-
is_amazon_internal = (
|
|
191
|
-
os.path.exists('/apollo') or
|
|
192
|
-
os.path.exists('/home/ec2-user') or
|
|
193
|
-
'AWS_PROFILE' in os.environ and 'isengard' in os.environ.get('AWS_PROFILE', '').lower() or
|
|
194
|
-
'AWS_CONFIG_FILE' in os.environ and 'midway' in os.environ.get('AWS_CONFIG_FILE', '').lower() or
|
|
195
|
-
any(pattern in error_msg for pattern in ["amazon.com", "corp.amazon", "midway", "isengard"])
|
|
196
|
-
)
|
|
197
|
-
except:
|
|
198
|
-
pass
|
|
203
|
+
is_amazon_internal = _is_amazon_internal_environment(error_msg)
|
|
199
204
|
|
|
200
|
-
# If we're in an Amazon internal environment,
|
|
205
|
+
# If we're in an Amazon internal environment, suggest mwinit for credential errors
|
|
201
206
|
if is_amazon_internal and ("InvalidClientTokenId" in error_msg or "ExpiredToken" in error_msg or
|
|
202
207
|
"AccessDenied" in error_msg or "NoCredentialProviders" in error_msg):
|
|
203
208
|
if is_server_startup:
|
|
204
209
|
return False, """⚠️ AWS CREDENTIALS ERROR: Your Amazon internal credentials have expired.
|
|
205
210
|
|
|
211
|
+
|
|
206
212
|
Please run the following command to refresh your credentials:
|
|
207
213
|
|
|
208
214
|
mwinit
|
|
@@ -233,27 +239,6 @@ Then try your query again."""
|
|
|
233
239
|
# Generic error message for other cases
|
|
234
240
|
return False, f"⚠️ AWS CREDENTIALS ERROR: {e}. Please check your AWS credentials and try again."
|
|
235
241
|
|
|
236
|
-
# For Amazon internal users, we should always suggest mwinit for credential errors
|
|
237
|
-
# Check if this is an Amazon internal environment first
|
|
238
|
-
is_amazon_internal = False
|
|
239
|
-
try:
|
|
240
|
-
# Check for common Amazon internal environment indicators
|
|
241
|
-
import os
|
|
242
|
-
is_amazon_internal = (
|
|
243
|
-
os.path.exists('/apollo') or
|
|
244
|
-
os.path.exists('/home/ec2-user') or
|
|
245
|
-
'AWS_PROFILE' in os.environ and 'isengard' in os.environ.get('AWS_PROFILE', '').lower() or
|
|
246
|
-
'AWS_CONFIG_FILE' in os.environ and 'midway' in os.environ.get('AWS_CONFIG_FILE', '').lower() or
|
|
247
|
-
any(pattern in error_msg for pattern in ["amazon.com", "corp.amazon", "midway", "isengard"])
|
|
248
|
-
)
|
|
249
|
-
except:
|
|
250
|
-
pass
|
|
251
|
-
|
|
252
|
-
# If we're in an Amazon internal environment, always suggest mwinit for credential errors
|
|
253
|
-
if is_amazon_internal and ("InvalidClientTokenId" in error_msg or "ExpiredToken" in error_msg or
|
|
254
|
-
"AccessDenied" in error_msg or "NoCredentialProviders" in error_msg):
|
|
255
|
-
return False, "⚠️ AWS CREDENTIALS ERROR: Your Amazon internal credentials have expired. Please run 'mwinit' to refresh them."
|
|
256
|
-
|
|
257
242
|
# Standard error detection for non-Amazon environments
|
|
258
243
|
if "ExpiredToken" in error_msg:
|
|
259
244
|
return False, "⚠️ AWS CREDENTIALS ERROR: Your AWS credentials have expired. Please refresh your credentials."
|
|
@@ -267,6 +252,33 @@ Then try your query again."""
|
|
|
267
252
|
# Generic error message for other cases
|
|
268
253
|
return False, f"⚠️ AWS CREDENTIALS ERROR: {e}. Please check your AWS credentials and try again."
|
|
269
254
|
|
|
255
|
+
def _is_amazon_internal_environment(error_message=""):
|
|
256
|
+
"""
|
|
257
|
+
Determine if this is an Amazon internal environment.
|
|
258
|
+
Uses more conservative detection to avoid false positives.
|
|
259
|
+
"""
|
|
260
|
+
try:
|
|
261
|
+
# Check for definitive Amazon internal indicators
|
|
262
|
+
definitive_indicators = [
|
|
263
|
+
os.path.exists('/apollo'), # Apollo environment
|
|
264
|
+
'midway' in error_message.lower(), # Midway auth mentioned in error
|
|
265
|
+
'mwinit' in error_message.lower(), # mwinit mentioned in error
|
|
266
|
+
'iibs-midway' in error_message.lower(), # IIBS Midway auth
|
|
267
|
+
]
|
|
268
|
+
|
|
269
|
+
# Check AWS profile/config for Amazon-specific patterns
|
|
270
|
+
aws_profile_indicators = [
|
|
271
|
+
'AWS_PROFILE' in os.environ and 'isengard' in os.environ.get('AWS_PROFILE', '').lower(),
|
|
272
|
+
'AWS_CONFIG_FILE' in os.environ and 'midway' in os.environ.get('AWS_CONFIG_FILE', '').lower(),
|
|
273
|
+
]
|
|
274
|
+
|
|
275
|
+
# Only consider it Amazon internal if we have definitive indicators
|
|
276
|
+
# OR AWS profile indicators combined with credential-related errors
|
|
277
|
+
return any(definitive_indicators) or (any(aws_profile_indicators) and
|
|
278
|
+
any(term in error_message for term in ["ExpiredToken", "InvalidClientTokenId"]))
|
|
279
|
+
except:
|
|
280
|
+
return False
|
|
281
|
+
|
|
270
282
|
def debug_aws_credentials():
|
|
271
283
|
"""Debug function to print AWS credential information."""
|
|
272
284
|
import boto3
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Specialized handler for identical adjacent blocks in diff application.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import difflib
|
|
7
|
+
from typing import List, Optional, Tuple, Dict
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
def detect_identical_adjacent_blocks(file_lines: List[str], hunks: List[Dict]) -> bool:
|
|
12
|
+
"""
|
|
13
|
+
Detect if this diff involves identical adjacent blocks that could cause confusion.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
file_lines: The original file content
|
|
17
|
+
hunks: List of hunks to be applied
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
True if identical adjacent blocks are detected
|
|
21
|
+
"""
|
|
22
|
+
# Look for patterns that appear multiple times in the file
|
|
23
|
+
patterns_found = {}
|
|
24
|
+
|
|
25
|
+
for hunk in hunks:
|
|
26
|
+
# Extract the "old" lines from the hunk (lines being replaced/removed)
|
|
27
|
+
old_lines = []
|
|
28
|
+
for line in hunk.get('lines', []):
|
|
29
|
+
if line.startswith('-') or line.startswith(' '):
|
|
30
|
+
old_lines.append(line[1:] if line.startswith(('-', ' ')) else line)
|
|
31
|
+
|
|
32
|
+
if len(old_lines) >= 3: # Only consider substantial patterns
|
|
33
|
+
# Create a signature for this pattern
|
|
34
|
+
pattern_signature = tuple(line.strip() for line in old_lines[:5]) # First 5 lines
|
|
35
|
+
|
|
36
|
+
if pattern_signature in patterns_found:
|
|
37
|
+
patterns_found[pattern_signature] += 1
|
|
38
|
+
else:
|
|
39
|
+
patterns_found[pattern_signature] = 1
|
|
40
|
+
|
|
41
|
+
# Check if any pattern appears multiple times in the file
|
|
42
|
+
for pattern, count in patterns_found.items():
|
|
43
|
+
if count > 1:
|
|
44
|
+
logger.debug(f"Detected identical pattern appearing {count} times: {pattern[:2]}...")
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
# Also check if similar patterns exist in the file
|
|
48
|
+
for hunk in hunks:
|
|
49
|
+
old_start = hunk.get('old_start', 0)
|
|
50
|
+
old_lines = []
|
|
51
|
+
for line in hunk.get('lines', []):
|
|
52
|
+
if line.startswith('-') or line.startswith(' '):
|
|
53
|
+
old_lines.append(line[1:] if line.startswith(('-', ' ')) else line)
|
|
54
|
+
|
|
55
|
+
if len(old_lines) >= 3:
|
|
56
|
+
# Look for similar patterns elsewhere in the file
|
|
57
|
+
similar_positions = find_similar_patterns(file_lines, old_lines, old_start - 1)
|
|
58
|
+
if len(similar_positions) > 1:
|
|
59
|
+
logger.debug(f"Found {len(similar_positions)} similar patterns for hunk at {old_start}")
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
return False
|
|
63
|
+
|
|
64
|
+
def find_similar_patterns(file_lines: List[str], pattern: List[str], exclude_pos: int) -> List[int]:
|
|
65
|
+
"""
|
|
66
|
+
Find positions in the file where similar patterns occur.
|
|
67
|
+
"""
|
|
68
|
+
similar_positions = []
|
|
69
|
+
|
|
70
|
+
# Look for the first distinctive line of the pattern
|
|
71
|
+
if not pattern:
|
|
72
|
+
return similar_positions
|
|
73
|
+
|
|
74
|
+
first_line = pattern[0].strip()
|
|
75
|
+
if not first_line:
|
|
76
|
+
return similar_positions
|
|
77
|
+
|
|
78
|
+
# Find all occurrences of the first line
|
|
79
|
+
for i, file_line in enumerate(file_lines):
|
|
80
|
+
if i == exclude_pos:
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
if file_line.strip() == first_line:
|
|
84
|
+
# Check if the following lines also match
|
|
85
|
+
match_score = 0
|
|
86
|
+
total_lines = min(len(pattern), len(file_lines) - i)
|
|
87
|
+
|
|
88
|
+
for j in range(total_lines):
|
|
89
|
+
if i + j >= len(file_lines):
|
|
90
|
+
break
|
|
91
|
+
|
|
92
|
+
pattern_line = pattern[j].strip()
|
|
93
|
+
file_line = file_lines[i + j].strip()
|
|
94
|
+
|
|
95
|
+
if pattern_line == file_line:
|
|
96
|
+
match_score += 1
|
|
97
|
+
elif pattern_line in file_line or file_line in pattern_line:
|
|
98
|
+
match_score += 0.5
|
|
99
|
+
|
|
100
|
+
# Consider it similar if at least 60% of lines match
|
|
101
|
+
if total_lines > 0 and (match_score / total_lines) >= 0.6:
|
|
102
|
+
similar_positions.append(i)
|
|
103
|
+
|
|
104
|
+
return similar_positions
|
|
105
|
+
|
|
106
|
+
def apply_hunks_with_context_awareness(
|
|
107
|
+
file_lines: List[str],
|
|
108
|
+
hunks: List[Dict],
|
|
109
|
+
use_enhanced_matching: bool = True
|
|
110
|
+
) -> Tuple[List[str], List[Dict]]:
|
|
111
|
+
"""
|
|
112
|
+
Apply hunks with enhanced context awareness for identical adjacent blocks.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
file_lines: Original file content
|
|
116
|
+
hunks: List of hunks to apply
|
|
117
|
+
use_enhanced_matching: Whether to use enhanced matching
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
Tuple of (modified_file_lines, hunk_results)
|
|
121
|
+
"""
|
|
122
|
+
if not use_enhanced_matching or not detect_identical_adjacent_blocks(file_lines, hunks):
|
|
123
|
+
# Fall back to normal processing
|
|
124
|
+
return None, []
|
|
125
|
+
|
|
126
|
+
logger.info("Applying enhanced context-aware processing for identical adjacent blocks")
|
|
127
|
+
|
|
128
|
+
modified_lines = file_lines.copy()
|
|
129
|
+
hunk_results = []
|
|
130
|
+
|
|
131
|
+
# Sort hunks by their original position to apply them in order
|
|
132
|
+
sorted_hunks = sorted(hunks, key=lambda h: h.get('old_start', 0))
|
|
133
|
+
|
|
134
|
+
line_offset = 0 # Track how line numbers shift as we apply changes
|
|
135
|
+
|
|
136
|
+
for hunk in sorted_hunks:
|
|
137
|
+
old_start = hunk.get('old_start', 0) - 1 # Convert to 0-based
|
|
138
|
+
old_count = hunk.get('old_count', 0)
|
|
139
|
+
|
|
140
|
+
# Adjust position based on previous changes
|
|
141
|
+
adjusted_start = old_start + line_offset
|
|
142
|
+
|
|
143
|
+
# Extract old and new lines from the hunk
|
|
144
|
+
old_lines = []
|
|
145
|
+
new_lines = []
|
|
146
|
+
|
|
147
|
+
for line in hunk.get('lines', []):
|
|
148
|
+
if line.startswith('-'):
|
|
149
|
+
old_lines.append(line[1:])
|
|
150
|
+
elif line.startswith('+'):
|
|
151
|
+
new_lines.append(line[1:])
|
|
152
|
+
elif line.startswith(' '):
|
|
153
|
+
# Context line - appears in both old and new
|
|
154
|
+
old_lines.append(line[1:])
|
|
155
|
+
new_lines.append(line[1:])
|
|
156
|
+
|
|
157
|
+
# Verify that the old lines match at the expected position
|
|
158
|
+
if adjusted_start + len(old_lines) <= len(modified_lines):
|
|
159
|
+
actual_lines = modified_lines[adjusted_start:adjusted_start + len(old_lines)]
|
|
160
|
+
|
|
161
|
+
# Check for exact match
|
|
162
|
+
exact_match = True
|
|
163
|
+
for i, (expected, actual) in enumerate(zip(old_lines, actual_lines)):
|
|
164
|
+
if expected.strip() != actual.strip():
|
|
165
|
+
exact_match = False
|
|
166
|
+
break
|
|
167
|
+
|
|
168
|
+
if exact_match:
|
|
169
|
+
# Apply the change
|
|
170
|
+
modified_lines[adjusted_start:adjusted_start + len(old_lines)] = new_lines
|
|
171
|
+
line_offset += len(new_lines) - len(old_lines)
|
|
172
|
+
|
|
173
|
+
hunk_results.append({
|
|
174
|
+
'hunk_id': hunk.get('number', 0),
|
|
175
|
+
'status': 'succeeded',
|
|
176
|
+
'position': adjusted_start,
|
|
177
|
+
'method': 'context_aware_exact'
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
logger.debug(f"Applied hunk #{hunk.get('number', 0)} at position {adjusted_start} (exact match)")
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
# If exact match failed, try context-aware fuzzy matching
|
|
184
|
+
best_pos, confidence = find_best_position_with_context(
|
|
185
|
+
modified_lines, old_lines, new_lines, adjusted_start, hunk
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
if best_pos is not None and confidence >= 0.8:
|
|
189
|
+
# Apply the change at the best position
|
|
190
|
+
modified_lines[best_pos:best_pos + len(old_lines)] = new_lines
|
|
191
|
+
line_offset += len(new_lines) - len(old_lines)
|
|
192
|
+
|
|
193
|
+
hunk_results.append({
|
|
194
|
+
'hunk_id': hunk.get('number', 0),
|
|
195
|
+
'status': 'succeeded',
|
|
196
|
+
'position': best_pos,
|
|
197
|
+
'method': 'context_aware_fuzzy',
|
|
198
|
+
'confidence': confidence
|
|
199
|
+
})
|
|
200
|
+
|
|
201
|
+
logger.debug(f"Applied hunk #{hunk.get('number', 0)} at position {best_pos} (fuzzy match, confidence: {confidence:.3f})")
|
|
202
|
+
else:
|
|
203
|
+
# Failed to apply
|
|
204
|
+
hunk_results.append({
|
|
205
|
+
'hunk_id': hunk.get('number', 0),
|
|
206
|
+
'status': 'failed',
|
|
207
|
+
'position': None,
|
|
208
|
+
'method': 'context_aware_failed',
|
|
209
|
+
'confidence': confidence if best_pos is not None else 0.0
|
|
210
|
+
})
|
|
211
|
+
|
|
212
|
+
logger.warning(f"Failed to apply hunk #{hunk.get('number', 0)} (confidence: {confidence if best_pos is not None else 0.0:.3f})")
|
|
213
|
+
|
|
214
|
+
return modified_lines, hunk_results
|
|
215
|
+
|
|
216
|
+
def find_best_position_with_context(
|
|
217
|
+
file_lines: List[str],
|
|
218
|
+
old_lines: List[str],
|
|
219
|
+
new_lines: List[str],
|
|
220
|
+
expected_pos: int,
|
|
221
|
+
hunk: Dict
|
|
222
|
+
) -> Tuple[Optional[int], float]:
|
|
223
|
+
"""
|
|
224
|
+
Find the best position to apply a hunk using extended context analysis.
|
|
225
|
+
"""
|
|
226
|
+
if not old_lines:
|
|
227
|
+
return expected_pos, 1.0
|
|
228
|
+
|
|
229
|
+
# Get extended context around the expected position
|
|
230
|
+
context_radius = 10
|
|
231
|
+
search_radius = 5
|
|
232
|
+
|
|
233
|
+
best_pos = None
|
|
234
|
+
best_confidence = 0.0
|
|
235
|
+
|
|
236
|
+
# Search in a window around the expected position
|
|
237
|
+
start_search = max(0, expected_pos - search_radius)
|
|
238
|
+
end_search = min(len(file_lines) - len(old_lines), expected_pos + search_radius)
|
|
239
|
+
|
|
240
|
+
for pos in range(start_search, end_search + 1):
|
|
241
|
+
if pos + len(old_lines) > len(file_lines):
|
|
242
|
+
continue
|
|
243
|
+
|
|
244
|
+
# Get the candidate lines
|
|
245
|
+
candidate_lines = file_lines[pos:pos + len(old_lines)]
|
|
246
|
+
|
|
247
|
+
# Calculate direct similarity
|
|
248
|
+
direct_similarity = calculate_line_similarity(old_lines, candidate_lines)
|
|
249
|
+
|
|
250
|
+
# Calculate context similarity (lines before and after)
|
|
251
|
+
context_before = file_lines[max(0, pos - context_radius):pos]
|
|
252
|
+
context_after = file_lines[pos + len(old_lines):min(len(file_lines), pos + len(old_lines) + context_radius)]
|
|
253
|
+
|
|
254
|
+
expected_context_before = file_lines[max(0, expected_pos - context_radius):expected_pos]
|
|
255
|
+
expected_context_after = file_lines[expected_pos + len(old_lines):min(len(file_lines), expected_pos + len(old_lines) + context_radius)]
|
|
256
|
+
|
|
257
|
+
context_before_sim = calculate_line_similarity(expected_context_before, context_before)
|
|
258
|
+
context_after_sim = calculate_line_similarity(expected_context_after, context_after)
|
|
259
|
+
|
|
260
|
+
# Combined confidence score
|
|
261
|
+
combined_confidence = (
|
|
262
|
+
direct_similarity * 0.6 +
|
|
263
|
+
context_before_sim * 0.2 +
|
|
264
|
+
context_after_sim * 0.2
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
# Penalty for distance from expected position
|
|
268
|
+
distance_penalty = abs(pos - expected_pos) / max(len(file_lines), 1)
|
|
269
|
+
combined_confidence *= (1.0 - distance_penalty * 0.1)
|
|
270
|
+
|
|
271
|
+
if combined_confidence > best_confidence:
|
|
272
|
+
best_confidence = combined_confidence
|
|
273
|
+
best_pos = pos
|
|
274
|
+
|
|
275
|
+
return best_pos, best_confidence
|
|
276
|
+
|
|
277
|
+
def calculate_line_similarity(lines1: List[str], lines2: List[str]) -> float:
|
|
278
|
+
"""
|
|
279
|
+
Calculate similarity between two lists of lines.
|
|
280
|
+
"""
|
|
281
|
+
if not lines1 and not lines2:
|
|
282
|
+
return 1.0
|
|
283
|
+
if not lines1 or not lines2:
|
|
284
|
+
return 0.0
|
|
285
|
+
|
|
286
|
+
# Use difflib to calculate similarity
|
|
287
|
+
text1 = '\n'.join(line.strip() for line in lines1)
|
|
288
|
+
text2 = '\n'.join(line.strip() for line in lines2)
|
|
289
|
+
|
|
290
|
+
return difflib.SequenceMatcher(None, text1, text2).ratio()
|