MLRC_Bench

Running

Armeddinosaur commited on Apr 7

Commit

fc0a17a

1 Parent(s): 74cd82e

Please work!!

Files changed (3) hide show

Assests/MLRC_Bench_overview.png CHANGED Viewed

src/components/tasks.py CHANGED Viewed

@@ -70,7 +70,8 @@ While current results suggest that LLM-based research agents still fall short of
         "Perception Temporal Action Loc": "https://ptchallenge-workshop.github.io",
         "Product Recommendation": "https://www.aicrowd.com/challenges/amazon-kdd-cup-23-multilingual-recommendation-challenge",
         "Meta Learning": "https://metalearning.chalearn.org/",
-        "Llm Merging": "https://llm-merging.github.io"
     }
     # Update links mapping to use display names as keys

         "Perception Temporal Action Loc": "https://ptchallenge-workshop.github.io",
         "Product Recommendation": "https://www.aicrowd.com/challenges/amazon-kdd-cup-23-multilingual-recommendation-challenge",
         "Meta Learning": "https://metalearning.chalearn.org/",
+        "Llm Merging": "https://llm-merging.github.io",
+        "Rainfall Prediction": "https://weather4cast.net/neurips-2023/"
     }
     # Update links mapping to use display names as keys

src/utils/config.py CHANGED Viewed

@@ -100,5 +100,7 @@ tasks_info = {
     task_display_names.get("Machine Unlearning", "Machine Unlearning"):
         "Evaluating how well models can 'unlearn' specific information when required.",
     task_display_names.get("Backdoor Trigger Recovery", "Backdoor Trigger Recovery"):
-        "Testing resilience against backdoor attacks and ability to recover from triggered behaviors."
 }

     task_display_names.get("Machine Unlearning", "Machine Unlearning"):
         "Evaluating how well models can 'unlearn' specific information when required.",
     task_display_names.get("Backdoor Trigger Recovery", "Backdoor Trigger Recovery"):
+        "Testing resilience against backdoor attacks and ability to recover from triggered behaviors.",
+    task_display_names.get("Rainfall Prediction", "Rainfall Prediction"):
+        "Testing the model's ability to predict rainfall based on historical data and weather patterns."
 }