dataset,judge_id,judge_name,f1,bacc,avg_latency,total_latency,count,correct TOXIGEN,qualifire-eval,Qualifire,0.9169139465875371,0.91892916729865,0.03062801801933432,624.4270441532135,689,633 TOXIGEN,meta-llama-3.1-70b-instruct-turbo,Meta Llama 3.1 70B Instruct,0.9529411764705882,0.9537008948885182,1.3404882590898755,923.5964105129242,689,657 TOXIGEN,meta-llama-3.1-405b-instruct-turbo,Meta Llama 3.1 405B Instruct,0.9410029498525073,0.9421103190252288,1.0220591648223611,704.1987645626068,689,649 TOXIGEN,meta-llama-4-scout-17B-16E-instruct,Meta Llama 4 Scout 17B 16E Instruct,0.9153515064562411,0.914332541247451,7.129768445633665,4912.4104590415955,689,630 TOXIGEN,meta-llama-3.3-70B-instruct-turbo,Meta Llama 4 Scout 32K Instruct,0.9484536082474226,0.949357062204021,2.427819664488683,1672.7677488327026,689,654 TOXIGEN,meta-llama-3.1-8b-instruct-turbo,Meta Llama 3.1 8B Instruct,0.9046242774566474,0.9042250198021471,1.6894216790081629,1164.0115368366241,689,623 TOXIGEN,gemma-2-27b-it,Gemma 2 27B,0.8939597315436242,0.884797849571094,0.7016154134913695,483.4130198955536,689,610 TOXIGEN,gemma-2-9b-it,Gemma 2 9B,0.8682170542635659,0.8511089202352664,0.6414801257788183,441.97980666160583,689,587 TOXIGEN,mistral-7b-instruct-v0.3,Mistral (7B) Instruct v0.3,0.9073033707865169,0.9040143586632287,0.5519450056881621,380.2901089191437,689,623 TOXIGEN,o3-mini, o3-mini,0.9101620029455081,0.9116192257781822,4.131221040066856,2846.411296606064,689,628 TOXIGEN,gpt-4.1,GPT-4.1,0.9180327868852459,0.9204122217166355,0.9109365798219368,627.6353034973145,689,634 TOXIGEN,gpt-4o,GPT-4o,0.9179856115107914,0.9172565178556382,0.8764240001213395,603.8561360836029,689,632 TOXIGEN,gpt-4-turbo,GPT-4 Turbo,0.9318181818181818,0.9302248175674537,1.0608411724577795,730.91956782341,689,641 TOXIGEN,gpt-3.5-turbo,GPT-3.5 Turbo,0.9143686502177069,0.9144168057030184,0.6816094952847684,469.62894225120544,689,630 TOXIGEN,claude-3-haiku-20240307,Claude 3 Haiku,0.9037037037037037,0.9058555370173753,0.8256455647755084,568.8697941303253,689,624 TOXIGEN,claude-3-sonnet-20240229,Claude 3 Sonnet,0.9196428571428571,0.9218531439068371,0.8517552283747968,586.859352350235,689,635 TOXIGEN,claude-3-opus-latest,Claude 3 Opus,0.918918918918919,0.9171722534000708,1.702400786783249,1172.9541420936584,689,632 TOXIGEN,claude-3-5-sonnet-latest,Claude 3.5 Sonnet,0.9312320916905444,0.9302880159091291,1.478241823235167,1018.5086162090302,689,641 TOXIGEN,claude-3-5-haiku-latest,Claude 3.5 Haiku,0.92,0.9186553078180562,1.1753383869702654,809.8081486225128,689,633 TOXIGEN,qwen-2.5-72b-instruct-turbo,Qwen 2.5 72B Instruct,0.9164345403899722,0.9126598918044391,0.9204414866659223,634.1841843128204,689,629 TOXIGEN,qwen-2.5-7b-instruct-turbo,Qwen 2.5 7B Instruct,0.8779840848806366,0.8658341338456612,0.6011029206789457,414.1599123477936,689,597 TOXIGEN,deepseek-v3,DeepSeek V3,0.9202797202797203,0.9170458567167197,5.50724301455848,3794.4904370307922,689,632 TOXIGEN,deepseek-r1,DeepSeek R1,0.9269662921348315,0.9243347321232958,11.174386807553827,7699.152510404587,689,637