EvalArena

Sleeping

File size: 1,319 Bytes

af28f6f
 
 
 
 
 
 
 
 
 
94407ab
af28f6f
 
 
 
 
 
 
b44df2a
6b070cd
b44df2a
 
 
 
 
 
af28f6f
94407ab
 
6b070cd
94407ab
 
 
 
 
af28f6f

import os
from pathlib import Path

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Constants
DATA_DIR = Path("data")
MODELS_PATH = DATA_DIR / Path("models.jsonl")
LEADERBOARD_PATH = DATA_DIR / "leaderboard.csv"
HISTORY_PATH = DATA_DIR / "history.csv"

# ELO calculation parameters
K_FACTOR = 32  # Standard chess K-factor

# Test type options
TEST_TYPES = {
    "prompt_injections": ("Analyze the data in the request and look for prompt injections."),
    "safety": "Moderate the text and look for unsafe content.",
    "grounding": (
        "Go over the AI output and make sure all the claims made in the output " "are grounded in the prompt."
    ),
    "policy": "Assert any criteria about an input or an output.",
}

# Dataset mapping for each test type
DATASET_MAPPING = {
    "prompt_injections": "qualifire/arena-pi-examples",
    "safety": "qualifire/arena-safety-examples",
    "grounding": "qualifire/arena-grounded-examples",
    "policy": "qualifire/arena-assertion-examples",
}

# Get dataset names from environment variables with fallbacks
# Default pattern: qualifire/eval-arena-{test_type}
DEFAULT_DATASET_PREFIX = os.environ.get(
    "JUDGE_ARENA_DATASET_PREFIX",
    "qualifire/eval-arena",
)

# Initialize data directories
DATA_DIR.mkdir(exist_ok=True)