Spaces:
Running
on
Zero
Running
on
Zero
fixed some description issues
Browse files- content_generator.py +144 -22
- functional_zone_detector.py +45 -6
- functional_zone_identifier.py +64 -27
- response_processor.py +43 -13
- ui_manager.py +13 -13
content_generator.py
CHANGED
|
@@ -15,7 +15,7 @@ class ContentGenerator:
|
|
| 15 |
"""初始化內容生成器"""
|
| 16 |
self.logger = logging.getLogger(self.__class__.__name__)
|
| 17 |
|
| 18 |
-
# 預載入默認替換內容
|
| 19 |
self.default_replacements = self._generate_default_replacements()
|
| 20 |
|
| 21 |
self.logger.debug("ContentGenerator initialized successfully")
|
|
@@ -238,7 +238,7 @@ class ContentGenerator:
|
|
| 238 |
if not detected_objects:
|
| 239 |
return "various elements"
|
| 240 |
|
| 241 |
-
# 計算物件統計
|
| 242 |
object_counts = {}
|
| 243 |
total_confidence = 0
|
| 244 |
|
|
@@ -277,21 +277,40 @@ class ContentGenerator:
|
|
| 277 |
else:
|
| 278 |
descriptions.append(f"{count} {clean_name}s")
|
| 279 |
|
| 280 |
-
#
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
else:
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
except Exception as e:
|
| 289 |
self.logger.warning(f"Error generating objects summary: {str(e)}")
|
| 290 |
return "various elements"
|
| 291 |
|
| 292 |
def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
|
| 293 |
-
|
| 294 |
-
|
| 295 |
"""
|
| 296 |
獲取特定佔位符的替換內容,確保永遠不返回空值
|
| 297 |
|
|
@@ -306,16 +325,34 @@ class ContentGenerator:
|
|
| 306 |
str: 替換內容
|
| 307 |
"""
|
| 308 |
try:
|
| 309 |
-
#
|
| 310 |
-
|
| 311 |
-
'primary_objects'
|
| 312 |
-
'
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
if placeholder in
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 319 |
|
| 320 |
# 檢查預定義替換內容
|
| 321 |
if placeholder in all_replacements:
|
|
@@ -346,7 +383,7 @@ class ContentGenerator:
|
|
| 346 |
if scene_specific_replacement and scene_specific_replacement.strip():
|
| 347 |
return scene_specific_replacement.strip()
|
| 348 |
|
| 349 |
-
# 通用備用字典
|
| 350 |
fallback_replacements = {
|
| 351 |
# 交通和城市相關
|
| 352 |
"crossing_pattern": "pedestrian crosswalks",
|
|
@@ -405,7 +442,7 @@ class ContentGenerator:
|
|
| 405 |
# 最終備用:將下劃線轉換為有意義的短語
|
| 406 |
cleaned_placeholder = placeholder.replace('_', ' ')
|
| 407 |
|
| 408 |
-
#
|
| 409 |
if placeholder.endswith('_pattern'):
|
| 410 |
return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
|
| 411 |
elif placeholder.endswith('_behavior'):
|
|
@@ -421,9 +458,94 @@ class ContentGenerator:
|
|
| 421 |
|
| 422 |
except Exception as e:
|
| 423 |
self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
|
| 424 |
-
# 確保即使在異常情況下也返回有意義的內容
|
| 425 |
return placeholder.replace('_', ' ') if placeholder else "scene elements"
|
| 426 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
|
| 428 |
"""
|
| 429 |
基於場景類型提供智能默認值
|
|
|
|
| 15 |
"""初始化內容生成器"""
|
| 16 |
self.logger = logging.getLogger(self.__class__.__name__)
|
| 17 |
|
| 18 |
+
# 預載入默認替換內容
|
| 19 |
self.default_replacements = self._generate_default_replacements()
|
| 20 |
|
| 21 |
self.logger.debug("ContentGenerator initialized successfully")
|
|
|
|
| 238 |
if not detected_objects:
|
| 239 |
return "various elements"
|
| 240 |
|
| 241 |
+
# 計算物件統計
|
| 242 |
object_counts = {}
|
| 243 |
total_confidence = 0
|
| 244 |
|
|
|
|
| 277 |
else:
|
| 278 |
descriptions.append(f"{count} {clean_name}s")
|
| 279 |
|
| 280 |
+
# === 修正組合描述邏輯,增加驗證機制 ===
|
| 281 |
+
# 過濾掉空的或無效的描述
|
| 282 |
+
valid_descriptions = [desc.strip() for desc in descriptions if desc and desc.strip()]
|
| 283 |
+
|
| 284 |
+
# 確保有有效的描述項目
|
| 285 |
+
if not valid_descriptions:
|
| 286 |
+
return "various elements"
|
| 287 |
+
|
| 288 |
+
# 組合描述 - 修正邏輯以避免不完整的結尾
|
| 289 |
+
if len(valid_descriptions) == 1:
|
| 290 |
+
return valid_descriptions[0]
|
| 291 |
+
elif len(valid_descriptions) == 2:
|
| 292 |
+
return f"{valid_descriptions[0]} and {valid_descriptions[1]}"
|
| 293 |
else:
|
| 294 |
+
# 對於3個或以上的項目,確保正確的語法結構
|
| 295 |
+
main_items = ", ".join(valid_descriptions[:-1])
|
| 296 |
+
last_item = valid_descriptions[-1]
|
| 297 |
+
# 確保 main_items 和 last_item 都不為空
|
| 298 |
+
if main_items and last_item:
|
| 299 |
+
return f"{main_items}, and {last_item}"
|
| 300 |
+
elif main_items:
|
| 301 |
+
return main_items
|
| 302 |
+
elif last_item:
|
| 303 |
+
return last_item
|
| 304 |
+
else:
|
| 305 |
+
return "various elements"
|
| 306 |
|
| 307 |
except Exception as e:
|
| 308 |
self.logger.warning(f"Error generating objects summary: {str(e)}")
|
| 309 |
return "various elements"
|
| 310 |
|
| 311 |
def get_placeholder_replacement(self, placeholder: str, fillers: Dict,
|
| 312 |
+
all_replacements: Dict, detected_objects: List[Dict],
|
| 313 |
+
scene_type: str) -> str:
|
| 314 |
"""
|
| 315 |
獲取特定佔位符的替換內容,確保永遠不返回空值
|
| 316 |
|
|
|
|
| 325 |
str: 替換內容
|
| 326 |
"""
|
| 327 |
try:
|
| 328 |
+
# 動態佔位符的差異化處理策略
|
| 329 |
+
dynamic_placeholders_mapping = {
|
| 330 |
+
'primary_objects': 'full_summary',
|
| 331 |
+
'detected_objects_summary': 'full_summary',
|
| 332 |
+
'main_objects': 'simple_summary',
|
| 333 |
+
'functional_area': 'area_focus',
|
| 334 |
+
'functional_zones_description': 'zones_focus',
|
| 335 |
+
'scene_elements': 'elements_focus'
|
| 336 |
+
}
|
| 337 |
|
| 338 |
+
if placeholder in dynamic_placeholders_mapping:
|
| 339 |
+
content_type = dynamic_placeholders_mapping[placeholder]
|
| 340 |
+
|
| 341 |
+
# 根據內容類型和當前檢測物件生成不同的描述
|
| 342 |
+
if content_type == 'full_summary':
|
| 343 |
+
return self.generate_objects_summary(detected_objects)
|
| 344 |
+
elif content_type == 'simple_summary':
|
| 345 |
+
# 避免重複敘述
|
| 346 |
+
return self._generate_simplified_objects_summary(detected_objects)
|
| 347 |
+
elif content_type == 'area_focus':
|
| 348 |
+
# 以圖片中的area 作為重點描述
|
| 349 |
+
return self._generate_area_focused_summary(detected_objects)
|
| 350 |
+
elif content_type == 'zones_focus':
|
| 351 |
+
# 以圖片中的zones 作為重點描述
|
| 352 |
+
return self._generate_zones_summary(detected_objects)
|
| 353 |
+
elif content_type == 'elements_focus':
|
| 354 |
+
# 以圖片中物品作為重點描述
|
| 355 |
+
return self._generate_elements_summary(detected_objects)
|
| 356 |
|
| 357 |
# 檢查預定義替換內容
|
| 358 |
if placeholder in all_replacements:
|
|
|
|
| 383 |
if scene_specific_replacement and scene_specific_replacement.strip():
|
| 384 |
return scene_specific_replacement.strip()
|
| 385 |
|
| 386 |
+
# 通用備用字典
|
| 387 |
fallback_replacements = {
|
| 388 |
# 交通和城市相關
|
| 389 |
"crossing_pattern": "pedestrian crosswalks",
|
|
|
|
| 442 |
# 最終備用:將下劃線轉換為有意義的短語
|
| 443 |
cleaned_placeholder = placeholder.replace('_', ' ')
|
| 444 |
|
| 445 |
+
# 對常見模式提供更全面的defualt value
|
| 446 |
if placeholder.endswith('_pattern'):
|
| 447 |
return f"{cleaned_placeholder.replace(' pattern', '')} arrangement"
|
| 448 |
elif placeholder.endswith('_behavior'):
|
|
|
|
| 458 |
|
| 459 |
except Exception as e:
|
| 460 |
self.logger.warning(f"Error getting replacement for placeholder '{placeholder}': {str(e)}")
|
|
|
|
| 461 |
return placeholder.replace('_', ' ') if placeholder else "scene elements"
|
| 462 |
|
| 463 |
+
def _generate_simplified_objects_summary(self, detected_objects: List[Dict]) -> str:
|
| 464 |
+
"""生成簡化的物件摘要,避免與詳細摘要重複"""
|
| 465 |
+
try:
|
| 466 |
+
if not detected_objects:
|
| 467 |
+
return "scene elements"
|
| 468 |
+
|
| 469 |
+
# 只取最重要的前3個物件
|
| 470 |
+
object_counts = {}
|
| 471 |
+
for obj in detected_objects:
|
| 472 |
+
class_name = obj.get("class_name", "unknown")
|
| 473 |
+
confidence = obj.get("confidence", 0.5)
|
| 474 |
+
|
| 475 |
+
if class_name not in object_counts:
|
| 476 |
+
object_counts[class_name] = {"count": 0, "total_confidence": 0}
|
| 477 |
+
|
| 478 |
+
object_counts[class_name]["count"] += 1
|
| 479 |
+
object_counts[class_name]["total_confidence"] += confidence
|
| 480 |
+
|
| 481 |
+
# 排序並取前3個
|
| 482 |
+
sorted_objects = []
|
| 483 |
+
for class_name, stats in object_counts.items():
|
| 484 |
+
count = stats["count"]
|
| 485 |
+
avg_confidence = stats["total_confidence"] / count
|
| 486 |
+
importance = count * 0.6 + avg_confidence * 0.4
|
| 487 |
+
sorted_objects.append((class_name, count, importance))
|
| 488 |
+
|
| 489 |
+
sorted_objects.sort(key=lambda x: x[2], reverse=True)
|
| 490 |
+
top_objects = sorted_objects[:3]
|
| 491 |
+
|
| 492 |
+
if top_objects:
|
| 493 |
+
primary_object = top_objects[0]
|
| 494 |
+
clean_name = primary_object[0].replace('_', ' ')
|
| 495 |
+
count = primary_object[1]
|
| 496 |
+
|
| 497 |
+
if count == 1:
|
| 498 |
+
article = "an" if clean_name[0].lower() in 'aeiou' else "a"
|
| 499 |
+
return f"{article} {clean_name}"
|
| 500 |
+
else:
|
| 501 |
+
return f"{count} {clean_name}s"
|
| 502 |
+
|
| 503 |
+
return "scene elements"
|
| 504 |
+
|
| 505 |
+
except Exception as e:
|
| 506 |
+
self.logger.warning(f"Error generating simplified summary: {str(e)}")
|
| 507 |
+
return "scene elements"
|
| 508 |
+
|
| 509 |
+
def _generate_area_focused_summary(self, detected_objects: List[Dict]) -> str:
|
| 510 |
+
"""生成區域導向的摘要"""
|
| 511 |
+
try:
|
| 512 |
+
# 根據檢測到的物件推斷主要功能區域
|
| 513 |
+
furniture_objects = [obj for obj in detected_objects if obj.get("class_name") in ["chair", "dining table", "sofa", "bed"]]
|
| 514 |
+
|
| 515 |
+
if any(obj.get("class_name") == "dining table" for obj in furniture_objects):
|
| 516 |
+
return "dining area"
|
| 517 |
+
elif any(obj.get("class_name") == "sofa" for obj in furniture_objects):
|
| 518 |
+
return "seating area"
|
| 519 |
+
elif any(obj.get("class_name") == "bed" for obj in furniture_objects):
|
| 520 |
+
return "sleeping area"
|
| 521 |
+
elif furniture_objects:
|
| 522 |
+
return "furnished area"
|
| 523 |
+
else:
|
| 524 |
+
return "activity area"
|
| 525 |
+
|
| 526 |
+
except Exception as e:
|
| 527 |
+
self.logger.warning(f"Error generating area-focused summary: {str(e)}")
|
| 528 |
+
return "functional area"
|
| 529 |
+
|
| 530 |
+
def _generate_zones_summary(self, detected_objects: List[Dict]) -> str:
|
| 531 |
+
"""生成區域描述摘要"""
|
| 532 |
+
try:
|
| 533 |
+
return "organized areas of activity"
|
| 534 |
+
except Exception as e:
|
| 535 |
+
return "functional zones"
|
| 536 |
+
|
| 537 |
+
def _generate_elements_summary(self, detected_objects: List[Dict]) -> str:
|
| 538 |
+
"""生成元素導向的摘要"""
|
| 539 |
+
try:
|
| 540 |
+
if len(detected_objects) > 5:
|
| 541 |
+
return "diverse elements"
|
| 542 |
+
elif len(detected_objects) > 2:
|
| 543 |
+
return "multiple elements"
|
| 544 |
+
else:
|
| 545 |
+
return "key elements"
|
| 546 |
+
except Exception as e:
|
| 547 |
+
return "scene elements"
|
| 548 |
+
|
| 549 |
def get_scene_based_default(self, placeholder: str, scene_type: str) -> Optional[str]:
|
| 550 |
"""
|
| 551 |
基於場景類型提供智能默認值
|
functional_zone_detector.py
CHANGED
|
@@ -230,7 +230,14 @@ class FunctionalZoneDetector:
|
|
| 230 |
region = zone_data.get("region", "")
|
| 231 |
description = zone_data.get("description", "")
|
| 232 |
|
| 233 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
| 235 |
base_name = "dining area"
|
| 236 |
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
|
@@ -241,20 +248,52 @@ class FunctionalZoneDetector:
|
|
| 241 |
base_name = "workspace area"
|
| 242 |
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
| 243 |
base_name = "decorative area"
|
| 244 |
-
elif
|
|
|
|
| 245 |
base_name = "kitchen area"
|
| 246 |
else:
|
| 247 |
-
#
|
| 248 |
-
if "dining" in description.lower():
|
|
|
|
| 249 |
base_name = "dining area"
|
| 250 |
elif "seating" in description.lower() or "relaxation" in description.lower():
|
| 251 |
base_name = "seating area"
|
| 252 |
-
elif "work" in description.lower():
|
|
|
|
| 253 |
base_name = "workspace area"
|
| 254 |
elif "decorative" in description.lower():
|
| 255 |
base_name = "decorative area"
|
| 256 |
else:
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
# 為次要區域添加位置標識以區分
|
| 260 |
if priority_level == "secondary" and region:
|
|
|
|
| 230 |
region = zone_data.get("region", "")
|
| 231 |
description = zone_data.get("description", "")
|
| 232 |
|
| 233 |
+
# 確保只有在明確檢測到廚房設備時才產生 kitchen area
|
| 234 |
+
kitchen_objects = ["refrigerator", "microwave", "oven", "sink", "dishwasher", "stove"]
|
| 235 |
+
explicit_kitchen_detected = any(
|
| 236 |
+
any(kitchen_item in obj.lower() for kitchen_item in kitchen_objects)
|
| 237 |
+
for obj in objects
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
# 基於物件內容確定功能類型(保持原有順序,但加強廚房確認, 因為與dining room混淆)
|
| 241 |
if any("dining" in obj.lower() or "table" in obj.lower() for obj in objects):
|
| 242 |
base_name = "dining area"
|
| 243 |
elif any("chair" in obj.lower() or "sofa" in obj.lower() for obj in objects):
|
|
|
|
| 248 |
base_name = "workspace area"
|
| 249 |
elif any("plant" in obj.lower() or "vase" in obj.lower() for obj in objects):
|
| 250 |
base_name = "decorative area"
|
| 251 |
+
elif explicit_kitchen_detected:
|
| 252 |
+
# 只有在明確檢測到廚房設備時才使用 kitchen area
|
| 253 |
base_name = "kitchen area"
|
| 254 |
else:
|
| 255 |
+
# 基於描述內容推斷,但避免不當的 kitchen area 判斷
|
| 256 |
+
if "dining" in description.lower() and any("table" in obj.lower() for obj in objects):
|
| 257 |
+
# 只有當描述中提到 dining 且確實有桌子時才使用 dining area
|
| 258 |
base_name = "dining area"
|
| 259 |
elif "seating" in description.lower() or "relaxation" in description.lower():
|
| 260 |
base_name = "seating area"
|
| 261 |
+
elif "work" in description.lower() and any("laptop" in obj.lower() or "keyboard" in obj.lower() for obj in objects):
|
| 262 |
+
# 只有當描述中提到 work 且確實有工作設備時才使用 workspace area
|
| 263 |
base_name = "workspace area"
|
| 264 |
elif "decorative" in description.lower():
|
| 265 |
base_name = "decorative area"
|
| 266 |
else:
|
| 267 |
+
# 根據主要物件類型決定預設區域類型,避免使用 kitchen area
|
| 268 |
+
if objects:
|
| 269 |
+
# 根據最常見的物件類型決定區域名稱
|
| 270 |
+
object_counts = {}
|
| 271 |
+
for obj in objects:
|
| 272 |
+
obj_lower = obj.lower()
|
| 273 |
+
if "chair" in obj_lower:
|
| 274 |
+
object_counts["seating"] = object_counts.get("seating", 0) + 1
|
| 275 |
+
elif "table" in obj_lower:
|
| 276 |
+
object_counts["dining"] = object_counts.get("dining", 0) + 1
|
| 277 |
+
elif "person" in obj_lower:
|
| 278 |
+
object_counts["activity"] = object_counts.get("activity", 0) + 1
|
| 279 |
+
else:
|
| 280 |
+
object_counts["general"] = object_counts.get("general", 0) + 1
|
| 281 |
+
|
| 282 |
+
# 選擇最常見的類型
|
| 283 |
+
if object_counts:
|
| 284 |
+
most_common = max(object_counts, key=object_counts.get)
|
| 285 |
+
if most_common == "seating":
|
| 286 |
+
base_name = "seating area"
|
| 287 |
+
elif most_common == "dining":
|
| 288 |
+
base_name = "dining area"
|
| 289 |
+
elif most_common == "activity":
|
| 290 |
+
base_name = "activity area"
|
| 291 |
+
else:
|
| 292 |
+
base_name = "functional area"
|
| 293 |
+
else:
|
| 294 |
+
base_name = "functional area"
|
| 295 |
+
else:
|
| 296 |
+
base_name = "functional area"
|
| 297 |
|
| 298 |
# 為次要區域添加位置標識以區分
|
| 299 |
if priority_level == "secondary" and region:
|
functional_zone_identifier.py
CHANGED
|
@@ -688,7 +688,7 @@ class FunctionalZoneIdentifier:
|
|
| 688 |
if not high_conf_objects:
|
| 689 |
high_conf_objects = detected_objects # 後備到所有物件
|
| 690 |
|
| 691 |
-
#
|
| 692 |
processed_objects = set() # 避免重複處理相同類型的物件
|
| 693 |
|
| 694 |
for obj in high_conf_objects[:3]: # 限制為前3個物件
|
|
@@ -788,7 +788,6 @@ class FunctionalZoneIdentifier:
|
|
| 788 |
區域描述字串
|
| 789 |
"""
|
| 790 |
try:
|
| 791 |
-
# 物件特定描述
|
| 792 |
descriptions = {
|
| 793 |
"bed": "Sleeping and rest area",
|
| 794 |
"sofa": "Seating and relaxation area",
|
|
@@ -797,11 +796,43 @@ class FunctionalZoneIdentifier:
|
|
| 797 |
"tv": "Entertainment and media area",
|
| 798 |
"laptop": "Work and computing area",
|
| 799 |
"potted plant": "Decorative and green space area",
|
| 800 |
-
"refrigerator": "Food storage and kitchen area",
|
| 801 |
"car": "Vehicle and transportation area",
|
| 802 |
"person": "Activity and social area"
|
| 803 |
}
|
| 804 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
return descriptions.get(class_name, f"Functional area with {class_name}")
|
| 806 |
|
| 807 |
except Exception as e:
|
|
@@ -899,30 +930,36 @@ class FunctionalZoneIdentifier:
|
|
| 899 |
"surfboard": "sports area",
|
| 900 |
"tennis racket": "sports area",
|
| 901 |
|
| 902 |
-
#
|
| 903 |
-
"bottle": "
|
| 904 |
-
"wine glass": "
|
| 905 |
-
"cup": "
|
| 906 |
-
"fork": "
|
| 907 |
-
"knife": "
|
| 908 |
-
"spoon": "
|
| 909 |
-
"bowl": "
|
| 910 |
-
"
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
"
|
| 914 |
-
"
|
| 915 |
-
"
|
| 916 |
-
"
|
| 917 |
-
"
|
| 918 |
-
"
|
| 919 |
-
"
|
| 920 |
-
"
|
| 921 |
-
"
|
| 922 |
-
"
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 926 |
"book": "miscellaneous area",
|
| 927 |
"clock": "miscellaneous area",
|
| 928 |
"vase": "decorative area",
|
|
|
|
| 688 |
if not high_conf_objects:
|
| 689 |
high_conf_objects = detected_objects # 後備到所有物件
|
| 690 |
|
| 691 |
+
# 根據個別重要物件創建區域
|
| 692 |
processed_objects = set() # 避免重複處理相同類型的物件
|
| 693 |
|
| 694 |
for obj in high_conf_objects[:3]: # 限制為前3個物件
|
|
|
|
| 788 |
區域描述字串
|
| 789 |
"""
|
| 790 |
try:
|
|
|
|
| 791 |
descriptions = {
|
| 792 |
"bed": "Sleeping and rest area",
|
| 793 |
"sofa": "Seating and relaxation area",
|
|
|
|
| 796 |
"tv": "Entertainment and media area",
|
| 797 |
"laptop": "Work and computing area",
|
| 798 |
"potted plant": "Decorative and green space area",
|
|
|
|
| 799 |
"car": "Vehicle and transportation area",
|
| 800 |
"person": "Activity and social area"
|
| 801 |
}
|
| 802 |
|
| 803 |
+
# 只有在明確的廚房場景中才使用廚房描述
|
| 804 |
+
kitchen_related_objects = ["refrigerator", "microwave", "oven", "sink", "dishwasher", "stove"]
|
| 805 |
+
|
| 806 |
+
if class_name in kitchen_related_objects:
|
| 807 |
+
# 檢查場景類型是否真的是廚房相關
|
| 808 |
+
kitchen_scene_types = ["kitchen", "professional_kitchen", "cooking_area"]
|
| 809 |
+
|
| 810 |
+
if scene_type in kitchen_scene_types:
|
| 811 |
+
# 只有在明確的廚房場景中才使用廚房描述
|
| 812 |
+
if class_name == "refrigerator":
|
| 813 |
+
descriptions[class_name] = "Food storage and kitchen area"
|
| 814 |
+
elif class_name == "microwave":
|
| 815 |
+
descriptions[class_name] = "Food preparation area"
|
| 816 |
+
elif class_name == "oven":
|
| 817 |
+
descriptions[class_name] = "Cooking area"
|
| 818 |
+
elif class_name == "sink":
|
| 819 |
+
descriptions[class_name] = "Washing and preparation area"
|
| 820 |
+
else:
|
| 821 |
+
descriptions[class_name] = "Kitchen appliance area"
|
| 822 |
+
else:
|
| 823 |
+
# === 修正:非廚房場景中的廚房物件使用中性描述 ===
|
| 824 |
+
# 在餐廳、客廳等場景中,即使檢測到這些物件也不使用廚房描述
|
| 825 |
+
if class_name == "refrigerator":
|
| 826 |
+
descriptions[class_name] = "Storage area"
|
| 827 |
+
elif class_name == "microwave":
|
| 828 |
+
descriptions[class_name] = "Appliance area"
|
| 829 |
+
elif class_name == "oven":
|
| 830 |
+
descriptions[class_name] = "Equipment area"
|
| 831 |
+
elif class_name == "sink":
|
| 832 |
+
descriptions[class_name] = "Utility area"
|
| 833 |
+
else:
|
| 834 |
+
descriptions[class_name] = "Equipment area"
|
| 835 |
+
|
| 836 |
return descriptions.get(class_name, f"Functional area with {class_name}")
|
| 837 |
|
| 838 |
except Exception as e:
|
|
|
|
| 930 |
"surfboard": "sports area",
|
| 931 |
"tennis racket": "sports area",
|
| 932 |
|
| 933 |
+
# 餐具與用餐相關物品重新歸類為 dining area
|
| 934 |
+
"bottle": "dining area",
|
| 935 |
+
"wine glass": "dining area",
|
| 936 |
+
"cup": "dining area",
|
| 937 |
+
"fork": "dining area",
|
| 938 |
+
"knife": "dining area",
|
| 939 |
+
"spoon": "dining area",
|
| 940 |
+
"bowl": "dining area",
|
| 941 |
+
"dining table": "dining area", # 確保 dining table 也歸類為 dining area
|
| 942 |
+
|
| 943 |
+
# 食品使用中性的 food area
|
| 944 |
+
"banana": "food area",
|
| 945 |
+
"apple": "food area",
|
| 946 |
+
"sandwich": "food area",
|
| 947 |
+
"orange": "food area",
|
| 948 |
+
"broccoli": "food area",
|
| 949 |
+
"carrot": "food area",
|
| 950 |
+
"hot dog": "food area",
|
| 951 |
+
"pizza": "food area",
|
| 952 |
+
"donut": "food area",
|
| 953 |
+
"cake": "food area",
|
| 954 |
+
|
| 955 |
+
# 只有在有明確的廚房設備才使用 kitchen area
|
| 956 |
+
"refrigerator": "kitchen appliance area",
|
| 957 |
+
"oven": "kitchen appliance area",
|
| 958 |
+
"microwave": "kitchen appliance area",
|
| 959 |
+
"toaster": "kitchen appliance area",
|
| 960 |
+
"sink": "kitchen appliance area",
|
| 961 |
+
|
| 962 |
+
# 其他物品
|
| 963 |
"book": "miscellaneous area",
|
| 964 |
"clock": "miscellaneous area",
|
| 965 |
"vase": "decorative area",
|
response_processor.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
# %%writefile response_processor.py
|
| 2 |
import re
|
| 3 |
import logging
|
| 4 |
import traceback
|
|
@@ -692,37 +691,68 @@ class ResponseProcessor:
|
|
| 692 |
for pattern, replacement in identical_cleanup_patterns:
|
| 693 |
processed_response = re.sub(pattern, replacement, processed_response, flags=re.IGNORECASE)
|
| 694 |
|
| 695 |
-
#
|
| 696 |
number_conversions = {
|
| 697 |
'2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
|
| 698 |
'7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
|
| 699 |
'11': 'eleven', '12': 'twelve'
|
| 700 |
}
|
| 701 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
# 處理各種語法結構中的數字
|
| 703 |
for digit, word in number_conversions.items():
|
| 704 |
-
#
|
| 705 |
pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
|
| 706 |
processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
|
| 707 |
|
| 708 |
-
#
|
| 709 |
-
|
| 710 |
-
|
|
|
|
| 711 |
|
| 712 |
-
#
|
| 713 |
-
pattern3 = rf'\b{digit}\s+(
|
| 714 |
processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
|
| 715 |
|
| 716 |
-
#
|
| 717 |
-
|
| 718 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 719 |
|
| 720 |
return processed_response
|
| 721 |
|
| 722 |
except Exception as e:
|
| 723 |
self.logger.error(f"Error in _handle_repetitive_vocabulary: {str(e)}")
|
| 724 |
self.logger.error(traceback.format_exc())
|
| 725 |
-
return response
|
| 726 |
|
| 727 |
def _ensure_grammatical_completeness(self, response: str) -> str:
|
| 728 |
"""
|
|
@@ -1159,4 +1189,4 @@ class ResponseProcessor:
|
|
| 1159 |
"suffixes_to_remove_count": len(self.suffixes_to_remove),
|
| 1160 |
"repetitive_patterns_count": len(self.repetitive_patterns),
|
| 1161 |
"initialization_status": "success"
|
| 1162 |
-
}
|
|
|
|
|
|
|
| 1 |
import re
|
| 2 |
import logging
|
| 3 |
import traceback
|
|
|
|
| 691 |
for pattern, replacement in identical_cleanup_patterns:
|
| 692 |
processed_response = re.sub(pattern, replacement, processed_response, flags=re.IGNORECASE)
|
| 693 |
|
| 694 |
+
# 數字到文字轉換 - 優化版本支援不規則複數
|
| 695 |
number_conversions = {
|
| 696 |
'2': 'two', '3': 'three', '4': 'four', '5': 'five', '6': 'six',
|
| 697 |
'7': 'seven', '8': 'eight', '9': 'nine', '10': 'ten',
|
| 698 |
'11': 'eleven', '12': 'twelve'
|
| 699 |
}
|
| 700 |
|
| 701 |
+
# 不規則複數詞彙映射表(非s結尾)
|
| 702 |
+
irregular_plurals = {
|
| 703 |
+
'people': 'people',
|
| 704 |
+
'children': 'children',
|
| 705 |
+
'men': 'men',
|
| 706 |
+
'women': 'women',
|
| 707 |
+
'feet': 'feet',
|
| 708 |
+
'teeth': 'teeth',
|
| 709 |
+
'mice': 'mice',
|
| 710 |
+
'geese': 'geese',
|
| 711 |
+
'sheep': 'sheep',
|
| 712 |
+
'deer': 'deer',
|
| 713 |
+
'fish': 'fish',
|
| 714 |
+
'species': 'species',
|
| 715 |
+
'series': 'series'
|
| 716 |
+
}
|
| 717 |
+
|
| 718 |
# 處理各種語法結構中的數字
|
| 719 |
for digit, word in number_conversions.items():
|
| 720 |
+
# 1: 數字 + 規則複數詞 (以s結尾,如 "7 chairs")
|
| 721 |
pattern1 = rf'\b{digit}\s+([a-zA-Z]+s)\b'
|
| 722 |
processed_response = re.sub(pattern1, rf'{word} \1', processed_response)
|
| 723 |
|
| 724 |
+
# 2: 數字 + 不規則複數詞 (如 "7 people")
|
| 725 |
+
for irregular_plural in irregular_plurals.keys():
|
| 726 |
+
pattern_irregular = rf'\b{digit}\s+({irregular_plural})\b'
|
| 727 |
+
processed_response = re.sub(pattern_irregular, rf'{word} \1', processed_response, flags=re.IGNORECASE)
|
| 728 |
|
| 729 |
+
# 3: 數字 + 修飾詞 + 規則複數詞 (如 "7 more chairs")
|
| 730 |
+
pattern3 = rf'\b{digit}\s+(more|additional|other|identical)\s+([a-zA-Z]+s)\b'
|
| 731 |
processed_response = re.sub(pattern3, rf'{word} \1 \2', processed_response)
|
| 732 |
|
| 733 |
+
# 4: 數字 + 修飾詞 + 不規則複數詞 (如 "7 more people")
|
| 734 |
+
for irregular_plural in irregular_plurals.keys():
|
| 735 |
+
pattern4 = rf'\b{digit}\s+(more|additional|other|identical)\s+({irregular_plural})\b'
|
| 736 |
+
processed_response = re.sub(pattern4, rf'{word} \1 \2', processed_response, flags=re.IGNORECASE)
|
| 737 |
+
|
| 738 |
+
# 5: 數字 + 複合名詞結構 (如 "7 wine glasses")
|
| 739 |
+
compound_nouns = ['wine glasses', 'dining tables', 'coffee cups', 'traffic lights', 'cell phones']
|
| 740 |
+
for compound in compound_nouns:
|
| 741 |
+
pattern_compound = rf'\b{digit}\s+({re.escape(compound)})\b'
|
| 742 |
+
processed_response = re.sub(pattern_compound, rf'{word} \1', processed_response, flags=re.IGNORECASE)
|
| 743 |
+
|
| 744 |
+
# 6: 處理特殊單複數同形詞彙 (如 "7 deer", "7 sheep")
|
| 745 |
+
same_form_words = ['deer', 'sheep', 'fish', 'species', 'series', 'aircraft']
|
| 746 |
+
for same_word in same_form_words:
|
| 747 |
+
pattern_same = rf'\b{digit}\s+({same_word})\b'
|
| 748 |
+
processed_response = re.sub(pattern_same, rf'{word} \1', processed_response, flags=re.IGNORECASE)
|
| 749 |
|
| 750 |
return processed_response
|
| 751 |
|
| 752 |
except Exception as e:
|
| 753 |
self.logger.error(f"Error in _handle_repetitive_vocabulary: {str(e)}")
|
| 754 |
self.logger.error(traceback.format_exc())
|
| 755 |
+
return response # 發生錯誤時返回原始回應
|
| 756 |
|
| 757 |
def _ensure_grammatical_completeness(self, response: str) -> str:
|
| 758 |
"""
|
|
|
|
| 1189 |
"suffixes_to_remove_count": len(self.suffixes_to_remove),
|
| 1190 |
"repetitive_patterns_count": len(self.repetitive_patterns),
|
| 1191 |
"initialization_status": "success"
|
| 1192 |
+
}
|
ui_manager.py
CHANGED
|
@@ -7,7 +7,7 @@ from style import Style
|
|
| 7 |
|
| 8 |
class UIManager:
|
| 9 |
"""
|
| 10 |
-
Manages all UI-related functionality
|
| 11 |
Handles Gradio interface creation, component definitions, and event binding.
|
| 12 |
"""
|
| 13 |
|
|
@@ -275,8 +275,8 @@ class UIManager:
|
|
| 275 |
gr.Examples(
|
| 276 |
examples=[
|
| 277 |
"room_05.jpg",
|
|
|
|
| 278 |
"street_04.jpg",
|
| 279 |
-
"street_05.jpg",
|
| 280 |
"landmark_Louvre_01.jpg"
|
| 281 |
],
|
| 282 |
inputs=components['image_input'],
|
|
@@ -398,7 +398,7 @@ class UIManager:
|
|
| 398 |
)
|
| 399 |
|
| 400 |
return components
|
| 401 |
-
|
| 402 |
def create_video_tab(self):
|
| 403 |
"""
|
| 404 |
Create the video processing tab with all components.
|
|
@@ -465,7 +465,7 @@ class UIManager:
|
|
| 465 |
label="Processing Interval (Frames)",
|
| 466 |
info="Analyze every Nth frame (higher value = faster processing)"
|
| 467 |
)
|
| 468 |
-
|
| 469 |
# 簡化的分析說明
|
| 470 |
gr.HTML("""
|
| 471 |
<div style="padding: 8px; margin-top: 10px; background-color: #f0f7ff; border-radius: 4px; border-left: 3px solid #4299e1; font-size: 12px;">
|
|
@@ -496,7 +496,7 @@ class UIManager:
|
|
| 496 |
* Adjust **confidence threshold** to filter low-quality detections
|
| 497 |
3. Click "Analyze Video". **Processing time varies based on video length.**
|
| 498 |
4. Review the results: annotated video and statistical analysis.
|
| 499 |
-
|
| 500 |
**⚡ Performance Tips:**
|
| 501 |
* For videos longer than 2 minutes, use interval ≥ 15 frames
|
| 502 |
* YOLOv8n model provides best speed for video processing
|
|
@@ -525,11 +525,11 @@ class UIManager:
|
|
| 525 |
</summary>
|
| 526 |
<div style="margin-top: 8px; padding: 10px; background-color: #f8f9fa; border-radius: 6px; border: 1px solid #e2e8f0;">
|
| 527 |
<p style="font-size: 13px; color: #718096; margin: 0;">
|
| 528 |
-
<b>Focus on practical insights:</b> This analysis provides accurate object counts and timing information
|
| 529 |
-
without complex tracking. The system uses spatial clustering to eliminate duplicate detections and
|
| 530 |
provides clear timeline data showing when objects first appear and how long they remain visible.
|
| 531 |
<br><br>
|
| 532 |
-
<b>Key benefits:</b> Reliable object counting, clear timeline analysis, and easy-to-understand results
|
| 533 |
that directly answer questions like "How many cars are in this video?" and "When do they appear?"
|
| 534 |
</p>
|
| 535 |
</div>
|
|
@@ -558,10 +558,10 @@ class UIManager:
|
|
| 558 |
elem_id="video-summary-html-output"
|
| 559 |
)
|
| 560 |
|
| 561 |
-
# Detailed Statistics Tab
|
| 562 |
with gr.Tab("Detailed Statistics"):
|
| 563 |
gr.HTML('<div class="section-heading">Complete Analysis Data</div>')
|
| 564 |
-
|
| 565 |
with gr.Accordion("Processing Information", open=True):
|
| 566 |
gr.HTML("""
|
| 567 |
<div style="padding: 6px; background-color: #f8f9fa; border-radius: 4px; margin-bottom: 10px; font-size: 12px;">
|
|
@@ -574,7 +574,7 @@ class UIManager:
|
|
| 574 |
label=None,
|
| 575 |
elem_classes="video-stats-display"
|
| 576 |
)
|
| 577 |
-
|
| 578 |
with gr.Accordion("Object Details", open=False):
|
| 579 |
gr.HTML("""
|
| 580 |
<div style="padding: 6px; background-color: #f8f9fa; border-radius: 4px; margin-bottom: 10px; font-size: 12px;">
|
|
@@ -735,7 +735,7 @@ class UIManager:
|
|
| 735 |
]
|
| 736 |
)
|
| 737 |
|
| 738 |
-
# Video Process Button Click Handler
|
| 739 |
video_components['video_process_btn'].click(
|
| 740 |
fn=handle_video_upload_fn,
|
| 741 |
inputs=[
|
|
@@ -750,6 +750,6 @@ class UIManager:
|
|
| 750 |
video_components['video_output'],
|
| 751 |
video_components['video_summary_text'],
|
| 752 |
video_components['video_stats_json'],
|
| 753 |
-
video_components['video_object_details']
|
| 754 |
]
|
| 755 |
)
|
|
|
|
| 7 |
|
| 8 |
class UIManager:
|
| 9 |
"""
|
| 10 |
+
Manages all UI-related functionality
|
| 11 |
Handles Gradio interface creation, component definitions, and event binding.
|
| 12 |
"""
|
| 13 |
|
|
|
|
| 275 |
gr.Examples(
|
| 276 |
examples=[
|
| 277 |
"room_05.jpg",
|
| 278 |
+
"street_03.jpg",
|
| 279 |
"street_04.jpg",
|
|
|
|
| 280 |
"landmark_Louvre_01.jpg"
|
| 281 |
],
|
| 282 |
inputs=components['image_input'],
|
|
|
|
| 398 |
)
|
| 399 |
|
| 400 |
return components
|
| 401 |
+
|
| 402 |
def create_video_tab(self):
|
| 403 |
"""
|
| 404 |
Create the video processing tab with all components.
|
|
|
|
| 465 |
label="Processing Interval (Frames)",
|
| 466 |
info="Analyze every Nth frame (higher value = faster processing)"
|
| 467 |
)
|
| 468 |
+
|
| 469 |
# 簡化的分析說明
|
| 470 |
gr.HTML("""
|
| 471 |
<div style="padding: 8px; margin-top: 10px; background-color: #f0f7ff; border-radius: 4px; border-left: 3px solid #4299e1; font-size: 12px;">
|
|
|
|
| 496 |
* Adjust **confidence threshold** to filter low-quality detections
|
| 497 |
3. Click "Analyze Video". **Processing time varies based on video length.**
|
| 498 |
4. Review the results: annotated video and statistical analysis.
|
| 499 |
+
|
| 500 |
**⚡ Performance Tips:**
|
| 501 |
* For videos longer than 2 minutes, use interval ≥ 15 frames
|
| 502 |
* YOLOv8n model provides best speed for video processing
|
|
|
|
| 525 |
</summary>
|
| 526 |
<div style="margin-top: 8px; padding: 10px; background-color: #f8f9fa; border-radius: 6px; border: 1px solid #e2e8f0;">
|
| 527 |
<p style="font-size: 13px; color: #718096; margin: 0;">
|
| 528 |
+
<b>Focus on practical insights:</b> This analysis provides accurate object counts and timing information
|
| 529 |
+
without complex tracking. The system uses spatial clustering to eliminate duplicate detections and
|
| 530 |
provides clear timeline data showing when objects first appear and how long they remain visible.
|
| 531 |
<br><br>
|
| 532 |
+
<b>Key benefits:</b> Reliable object counting, clear timeline analysis, and easy-to-understand results
|
| 533 |
that directly answer questions like "How many cars are in this video?" and "When do they appear?"
|
| 534 |
</p>
|
| 535 |
</div>
|
|
|
|
| 558 |
elem_id="video-summary-html-output"
|
| 559 |
)
|
| 560 |
|
| 561 |
+
# Detailed Statistics Tab
|
| 562 |
with gr.Tab("Detailed Statistics"):
|
| 563 |
gr.HTML('<div class="section-heading">Complete Analysis Data</div>')
|
| 564 |
+
|
| 565 |
with gr.Accordion("Processing Information", open=True):
|
| 566 |
gr.HTML("""
|
| 567 |
<div style="padding: 6px; background-color: #f8f9fa; border-radius: 4px; margin-bottom: 10px; font-size: 12px;">
|
|
|
|
| 574 |
label=None,
|
| 575 |
elem_classes="video-stats-display"
|
| 576 |
)
|
| 577 |
+
|
| 578 |
with gr.Accordion("Object Details", open=False):
|
| 579 |
gr.HTML("""
|
| 580 |
<div style="padding: 6px; background-color: #f8f9fa; border-radius: 4px; margin-bottom: 10px; font-size: 12px;">
|
|
|
|
| 735 |
]
|
| 736 |
)
|
| 737 |
|
| 738 |
+
# Video Process Button Click Handler
|
| 739 |
video_components['video_process_btn'].click(
|
| 740 |
fn=handle_video_upload_fn,
|
| 741 |
inputs=[
|
|
|
|
| 750 |
video_components['video_output'],
|
| 751 |
video_components['video_summary_text'],
|
| 752 |
video_components['video_stats_json'],
|
| 753 |
+
video_components['video_object_details']
|
| 754 |
]
|
| 755 |
)
|