Spaces:
Running
on
Zero
Running
on
Zero
Update response_processor.py
Browse files- response_processor.py +82 -155
response_processor.py
CHANGED
|
@@ -1218,185 +1218,112 @@ class ResponseProcessor:
|
|
| 1218 |
if not re.search(r'[.!?]', response):
|
| 1219 |
raise ResponseProcessingError("Response lacks proper sentence structure")
|
| 1220 |
|
| 1221 |
-
def remove_explanatory_notes(self, response: str
|
| 1222 |
"""
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
Args:
|
| 1226 |
-
response:
|
| 1227 |
-
|
| 1228 |
-
|
| 1229 |
Returns:
|
| 1230 |
-
str:
|
| 1231 |
"""
|
| 1232 |
-
if not response or not response.strip():
|
| 1233 |
-
return response
|
| 1234 |
-
|
| 1235 |
-
original_response = response
|
| 1236 |
-
|
| 1237 |
try:
|
| 1238 |
-
#
|
| 1239 |
-
|
| 1240 |
-
# Note
|
| 1241 |
-
r'(?:^|\
|
| 1242 |
-
|
| 1243 |
-
# "I have" 開頭的解釋句
|
| 1244 |
-
r'(?:^|\n)\s*I\s+have\s+(?:followed|adhered\s+to|ensured|strictly\s+adhered\s+to|also\s+followed).*?(?:\n|$)',
|
| 1245 |
-
|
| 1246 |
-
# "This description" 開頭的說明
|
| 1247 |
-
r'(?:^|\n)\s*This\s+description\s+(?:follows|adheres\s+to|maintains).*?(?:\n|$)',
|
| 1248 |
-
|
| 1249 |
-
# "The enhanced description" 開頭的說明
|
| 1250 |
-
r'(?:^|\n)\s*The\s+enhanced\s+description\s+(?:maintains|preserves).*?(?:\n|$)',
|
| 1251 |
-
|
| 1252 |
-
# "Additionally, I have" 模式
|
| 1253 |
-
r'(?:^|\n)\s*Additionally,?\s*I\s+have.*?(?:\n|$)',
|
| 1254 |
-
|
| 1255 |
-
# "I've" 開頭的解釋
|
| 1256 |
-
r'(?:^|\n)\s*I\'ve\s+(?:maintained|preserved|ensured|avoided).*?(?:\n|$)',
|
| 1257 |
|
| 1258 |
-
#
|
| 1259 |
-
r'(?:^|\
|
| 1260 |
|
| 1261 |
-
#
|
| 1262 |
-
r'(?:^|\
|
| 1263 |
|
| 1264 |
-
#
|
| 1265 |
-
r'
|
| 1266 |
|
| 1267 |
-
#
|
| 1268 |
-
r'(?:^|\
|
| 1269 |
|
| 1270 |
-
#
|
| 1271 |
-
r'(?:^|\
|
| 1272 |
|
| 1273 |
-
#
|
| 1274 |
-
r'(?:^|\
|
| 1275 |
|
| 1276 |
-
#
|
| 1277 |
-
r'(?:^|\
|
| 1278 |
|
| 1279 |
-
#
|
| 1280 |
-
r'(?:^|\
|
| 1281 |
-
|
| 1282 |
-
# "varying sentence structures" 相關
|
| 1283 |
-
r'(?:^|\n).*?varying\s+sentence\s+structures.*?(?:\n|$)',
|
| 1284 |
-
|
| 1285 |
-
# "natural flow" 相關
|
| 1286 |
-
r'(?:^|\n).*?natural\s+flow.*?(?:\n|$)',
|
| 1287 |
-
|
| 1288 |
-
# 長句形式的規則說明
|
| 1289 |
-
r'(?:^|\n).*?(?:focused\s+on\s+describing|clear\s+and\s+concise\s+manner).*?(?:\n|$)'
|
| 1290 |
]
|
| 1291 |
|
| 1292 |
-
#
|
| 1293 |
-
|
| 1294 |
-
|
| 1295 |
-
|
| 1296 |
-
|
| 1297 |
-
|
| 1298 |
-
|
| 1299 |
-
|
| 1300 |
-
|
| 1301 |
-
|
| 1302 |
-
|
| 1303 |
-
|
| 1304 |
-
|
| 1305 |
-
|
| 1306 |
-
|
| 1307 |
-
|
| 1308 |
-
|
| 1309 |
-
r'\busing\s+transitional\s+phrases.*?\.?',
|
| 1310 |
-
r'\bcreate\s+a\s+natural\s+flow.*?\.?'
|
| 1311 |
-
]
|
| 1312 |
-
|
| 1313 |
-
for phrase in problematic_phrases:
|
| 1314 |
-
cleaned_text = re.sub(phrase, '', cleaned_text, flags=re.IGNORECASE)
|
| 1315 |
-
|
| 1316 |
-
# 清理多餘空格和標點
|
| 1317 |
-
cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
|
| 1318 |
-
cleaned_text = re.sub(r'\s*,\s*,\s*', ', ', cleaned_text)
|
| 1319 |
-
cleaned_text = re.sub(r'\s*\.\s*\.\s*', '. ', cleaned_text)
|
| 1320 |
-
|
| 1321 |
-
return cleaned_text.strip()
|
| 1322 |
|
| 1323 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1324 |
content_paragraphs = []
|
| 1325 |
-
|
| 1326 |
for paragraph in paragraphs:
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
# 檢查是否為解釋性段落
|
| 1330 |
-
for pattern in note_patterns:
|
| 1331 |
-
if re.search(pattern, paragraph, flags=re.IGNORECASE | re.MULTILINE):
|
| 1332 |
-
is_explanatory = True
|
| 1333 |
-
if debug:
|
| 1334 |
-
print(f"Removed explanatory paragraph: {paragraph[:50]}...")
|
| 1335 |
-
break
|
| 1336 |
-
|
| 1337 |
-
# 檢查常見的解釋性開頭
|
| 1338 |
-
explanatory_starters = [
|
| 1339 |
-
'note:', 'please note:', 'remember:', 'i have followed',
|
| 1340 |
-
'i have adhered', 'i have strictly', 'additionally, i',
|
| 1341 |
-
'this description follows', 'the enhanced description',
|
| 1342 |
-
'i\'ve maintained', 'i\'ve preserved', 'i\'ve ensured'
|
| 1343 |
-
]
|
| 1344 |
|
| 1345 |
-
|
| 1346 |
-
|
| 1347 |
-
|
| 1348 |
-
|
| 1349 |
-
print(f"Removed paragraph starting with '{starter}': {paragraph[:50]}...")
|
| 1350 |
break
|
| 1351 |
-
|
| 1352 |
-
#
|
| 1353 |
-
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
-
|
| 1357 |
-
if keyword_count >= 2: # 如果包含2個以上規則關鍵詞,視為解釋性段落
|
| 1358 |
-
is_explanatory = True
|
| 1359 |
-
if debug:
|
| 1360 |
-
print(f"Removed rule-heavy paragraph: {paragraph[:50]}...")
|
| 1361 |
-
|
| 1362 |
-
# 保留非解釋性段落
|
| 1363 |
-
if not is_explanatory:
|
| 1364 |
content_paragraphs.append(paragraph)
|
|
|
|
|
|
|
| 1365 |
|
| 1366 |
-
#
|
| 1367 |
-
if
|
| 1368 |
-
|
| 1369 |
-
|
| 1370 |
-
|
| 1371 |
-
|
| 1372 |
-
|
| 1373 |
-
|
| 1374 |
-
result = re.sub(r'\s+', ' ', result).strip()
|
| 1375 |
-
else:
|
| 1376 |
-
result = ""
|
| 1377 |
-
|
| 1378 |
-
# 階段6:最終清理
|
| 1379 |
-
if result:
|
| 1380 |
-
# 移除可能殘留的解釋性片段
|
| 1381 |
-
result = re.sub(r'\s*,?\s*avoiding\s+any\s+(?:assumptions|speculation).*?(?=\.|$)', '', result, flags=re.IGNORECASE)
|
| 1382 |
-
result = re.sub(r'\s*,?\s*using\s+(?:transitional\s+phrases|clear\s+and\s+concise).*?(?=\.|$)', '', result, flags=re.IGNORECASE)
|
| 1383 |
|
| 1384 |
-
|
| 1385 |
-
|
| 1386 |
-
result = re.sub(r'\s*([,.!?])\s*', r'\1 ', result)
|
| 1387 |
-
result = re.sub(r'\s+([,.!?])', r'\1', result)
|
| 1388 |
-
result = result.strip()
|
| 1389 |
-
|
| 1390 |
-
if debug and hasattr(self, 'logger'):
|
| 1391 |
-
self.logger.info(f"Cleaning completed. Original length: {len(original_response)}, Final length: {len(result)}")
|
| 1392 |
-
|
| 1393 |
-
return result if result else original_response
|
| 1394 |
-
|
| 1395 |
except Exception as e:
|
| 1396 |
-
|
| 1397 |
-
|
| 1398 |
-
self.logger.error(f"Error during cleaning: {str(e)}")
|
| 1399 |
-
return original_response
|
| 1400 |
|
| 1401 |
def get_processor_info(self) -> Dict[str, Any]:
|
| 1402 |
"""
|
|
|
|
| 1218 |
if not re.search(r'[.!?]', response):
|
| 1219 |
raise ResponseProcessingError("Response lacks proper sentence structure")
|
| 1220 |
|
| 1221 |
+
def remove_explanatory_notes(self, response: str) -> str:
|
| 1222 |
"""
|
| 1223 |
+
移除解釋性注釋和說明,特別針對 "Note that I..."
|
| 1224 |
+
|
| 1225 |
Args:
|
| 1226 |
+
response: 包含可能注釋的回應
|
| 1227 |
+
|
|
|
|
| 1228 |
Returns:
|
| 1229 |
+
str: 移除注釋後的回應
|
| 1230 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1231 |
try:
|
| 1232 |
+
# 專門針對 "Note that I..." 和相關解釋性敘述
|
| 1233 |
+
specific_note_patterns = [
|
| 1234 |
+
# Note that I have...
|
| 1235 |
+
r'(?:^|\s)Note\s+that\s+I\s+have.*?(?=\s[A-Z]|\.|$)',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1236 |
|
| 1237 |
+
# I have strictly adhered...
|
| 1238 |
+
r'(?:^|\s)I\s+have\s+strictly\s+adhered\s+to.*?(?=\s[A-Z]|\.|$)',
|
| 1239 |
|
| 1240 |
+
# I have followed/ensured...
|
| 1241 |
+
r'(?:^|\s)I\s+have\s+(?:followed|ensured|also\s+followed).*?(?=\s[A-Z]|\.|$)',
|
| 1242 |
|
| 1243 |
+
# Additionally, I have...
|
| 1244 |
+
r'(?:^|\s)Additionally,?\s*I\s+have.*?(?=\s[A-Z]|\.|$)',
|
| 1245 |
|
| 1246 |
+
# avoiding any assumptions...
|
| 1247 |
+
r'(?:^|\s)avoiding\s+any\s+(?:assumptions|inferences).*?(?=\s[A-Z]|\.|$)',
|
| 1248 |
|
| 1249 |
+
# object whitelist and detail accuracy rule
|
| 1250 |
+
r'(?:^|\s)(?:object\s+whitelist\s+and\s+detail\s+accuracy\s+rule|detail\s+accuracy\s+rule).*?(?=\s[A-Z]|\.|$)',
|
| 1251 |
|
| 1252 |
+
# using transitional phrases
|
| 1253 |
+
r'(?:^|\s)using\s+transitional\s+phrases.*?(?=\s[A-Z]|\.|$)',
|
| 1254 |
|
| 1255 |
+
# create a natural flow
|
| 1256 |
+
r'(?:^|\s)(?:and\s+have\s+focused\s+on|focused\s+on)\s+describing.*?natural\s+flow.*?(?=\s[A-Z]|\.|$)',
|
| 1257 |
|
| 1258 |
+
# critical adherence to input rule
|
| 1259 |
+
r'(?:^|\s)critical\s+adherence\s+to\s+input\s+rule.*?(?=\s[A-Z]|\.|$)'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1260 |
]
|
| 1261 |
|
| 1262 |
+
# 傳統的注釋和解釋模式
|
| 1263 |
+
traditional_note_patterns = [
|
| 1264 |
+
r'(?:^|\n)Note:.*?(?:\n|$)',
|
| 1265 |
+
r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
|
| 1266 |
+
r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
|
| 1267 |
+
r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
|
| 1268 |
+
]
|
| 1269 |
+
|
| 1270 |
+
# 首先移除特定的 "Note that I..."
|
| 1271 |
+
cleaned_response = response
|
| 1272 |
+
for pattern in specific_note_patterns:
|
| 1273 |
+
cleaned_response = re.sub(pattern, '', cleaned_response, flags=re.IGNORECASE)
|
| 1274 |
+
|
| 1275 |
+
# 清理多餘的標點符號和空格
|
| 1276 |
+
cleaned_response = re.sub(r'\s*,\s*,\s*', ', ', cleaned_response)
|
| 1277 |
+
cleaned_response = re.sub(r'\s*\.\s*\.\s*', '. ', cleaned_response)
|
| 1278 |
+
cleaned_response = re.sub(r'\s+', ' ', cleaned_response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1279 |
|
| 1280 |
+
# 修復可能出現的句子結尾問題
|
| 1281 |
+
cleaned_response = re.sub(r'(\w)\s*,\s*$', r'\1.', cleaned_response)
|
| 1282 |
+
cleaned_response = re.sub(r'(\w)\s*,\s*([A-Z])', r'\1. \2', cleaned_response)
|
| 1283 |
+
|
| 1284 |
+
# 尋找段落進行傳統處理
|
| 1285 |
+
paragraphs = [p.strip() for p in cleaned_response.split('\n\n') if p.strip()]
|
| 1286 |
+
|
| 1287 |
+
# 如果只有一個段落,檢查並清理傳統注釋模式
|
| 1288 |
+
if len(paragraphs) == 1:
|
| 1289 |
+
for pattern in traditional_note_patterns:
|
| 1290 |
+
paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
|
| 1291 |
+
return paragraphs[0].strip()
|
| 1292 |
+
|
| 1293 |
+
# 如果有多個段落,移除傳統注釋段落
|
| 1294 |
content_paragraphs = []
|
|
|
|
| 1295 |
for paragraph in paragraphs:
|
| 1296 |
+
is_note = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1297 |
|
| 1298 |
+
# 檢查傳統注釋模式
|
| 1299 |
+
for pattern in traditional_note_patterns:
|
| 1300 |
+
if re.search(pattern, paragraph, flags=re.IGNORECASE):
|
| 1301 |
+
is_note = True
|
|
|
|
| 1302 |
break
|
| 1303 |
+
|
| 1304 |
+
# 檢查段落是否以常見的注釋詞開頭
|
| 1305 |
+
if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
|
| 1306 |
+
is_note = True
|
| 1307 |
+
|
| 1308 |
+
if not is_note:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1309 |
content_paragraphs.append(paragraph)
|
| 1310 |
+
|
| 1311 |
+
result = '\n\n'.join(content_paragraphs).strip()
|
| 1312 |
|
| 1313 |
+
# 最終檢查:確保結果不為空
|
| 1314 |
+
if not result or len(result.strip()) < 10:
|
| 1315 |
+
# 如果處理後內容過短,返回去除特定模式後的原始內容
|
| 1316 |
+
fallback_result = response
|
| 1317 |
+
for pattern in specific_note_patterns:
|
| 1318 |
+
fallback_result = re.sub(pattern, '', fallback_result, flags=re.IGNORECASE)
|
| 1319 |
+
fallback_result = re.sub(r'\s+', ' ', fallback_result).strip()
|
| 1320 |
+
return fallback_result if fallback_result else response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1321 |
|
| 1322 |
+
return result
|
| 1323 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1324 |
except Exception as e:
|
| 1325 |
+
self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
|
| 1326 |
+
return response
|
|
|
|
|
|
|
| 1327 |
|
| 1328 |
def get_processor_info(self) -> Dict[str, Any]:
|
| 1329 |
"""
|