where is weights

by m1kk0n - opened 9 days ago

9 days ago

Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at zeroentropy/zerank-2 and are newly initialized: ['score.weight']

BounharAbdelaziz

7 days ago

same here!

>>> model = CrossEncoder("zeroentropy/zerank-2", trust_remote_code=True)
Loading weights:   1%|                | 2/398 [00:00<00:00, 3876.44it/s, Materializing param=model.layers.0.input_layernorm.weight]
Loading weights: 100%|██████████████████████████████████| 398/398 [00:00<00:00, 5486.03it/s, Materializing param=model.norm.weight]
Qwen3ForSequenceClassification LOAD REPORT from: zeroentropy/zerank-2
Key          | Status  | 
-------------+---------+-
score.weight | MISSING | 

Notes:
- MISSING       :those params were newly initialized because missing from the checkpoint. Consider training on your downstream task.

So the head gets random init...

callmeal

4 days ago

•

edited 4 days ago

Had the same issue.
I pointed codex with gpt-5.3-high at it and this is what it came up with:

I identified a real model-loading bug behind your logs (score.weight gets randomly initialized each cold start). I’m patching scripts/modal_reranker.py to load ZeroEntropy models through their native modeling_zeranker.py path instead of CrossEncoder sequence-classification mode, and I’ll also make smoke output explicit about expected shutdown.

This is the code it created to make this model run on Modal so you might need to adapt it for your usage:

import os
import types
from importlib import util as importlib_util
from typing import Any

import modal


APP_NAME = os.getenv("MODAL_RERANK_APP_NAME", "latent-papers-rerank")
DEFAULT_MODEL = os.getenv("MODAL_RERANK_DEFAULT_MODEL", "zeroentropy/zerank-2")
GPU_TYPE = os.getenv("MODAL_RERANK_GPU", "L4")
MODEL_CACHE_DIR = "/root/.cache/huggingface"

_default_batch_size = int(os.getenv("MODAL_RERANK_BATCH_SIZE", "16"))
if _default_batch_size <= 0:
    raise ValueError("MODAL_RERANK_BATCH_SIZE must be >= 1")

_default_max_length = int(os.getenv("MODAL_RERANK_MAX_LENGTH", "1024"))
if _default_max_length <= 0:
    raise ValueError("MODAL_RERANK_MAX_LENGTH must be >= 1")

HF_CACHE_VOLUME = modal.Volume.from_name("latent-papers-hf-cache", create_if_missing=True)
IMAGE = modal.Image.debian_slim(python_version="3.12").pip_install(
    "sentence-transformers>=5.2.2",
    "torch>=2.10.0",
    "accelerate>=1.12.0",
)
app = modal.App(APP_NAME)




@app
	.cls(
    image=IMAGE,
    gpu=GPU_TYPE,
    volumes={MODEL_CACHE_DIR: HF_CACHE_VOLUME},
    max_containers=8,
    scaledown_window=300,
    timeout=3600,
)


@modal
	.concurrent(max_inputs=8)
class ZerankReranker:
    def _is_zeroentropy_reranker(self, model_name: str) -> bool:
        normalized = (model_name or "").strip().lower()
        return normalized.startswith("zeroentropy/zerank-")

    def _load_zeroentropy_model(self, model_name: str) -> Any:
        from huggingface_hub import hf_hub_download

        module_path = hf_hub_download(model_name, "modeling_zeranker.py")
        spec = importlib_util.spec_from_file_location("modeling_zeranker", module_path)
        if spec is None or spec.loader is None:
            raise RuntimeError(f"Failed to load {module_path} as a Python module.")
        module = importlib_util.module_from_spec(spec)
        spec.loader.exec_module(module)

        # The upstream helper hardcodes MODEL_PATH/global_device.
        module.MODEL_PATH = model_name
        model_device = module.torch.device("cuda")
        module.global_device = model_device

        tokenizer, model = module.load_model(model_device)
        model.eval()
        yes_token_id = tokenizer.encode("Yes", add_special_tokens=False)[0]
        state = types.SimpleNamespace(
            inner_tokenizer=tokenizer,
            inner_model=model,
            inner_yes_token_id=yes_token_id,
        )

        class _Predictor:
            def predict(self, pairs: list[tuple[str, str]], **_: Any) -> list[float]:
                with module.torch.inference_mode():
                    return module.predict(state, query_documents=pairs)

        return _Predictor()

    def _load_model(self, model_name: str) -> None:
        self.model_name = model_name
        if self._is_zeroentropy_reranker(model_name):
            self.model = self._load_zeroentropy_model(model_name)
            print(f"Loaded ZeroEntropy reranker using native runtime: {model_name}")
            return

        from sentence_transformers import CrossEncoder

        self.model = CrossEncoder(
            model_name,
            device="cuda",
            trust_remote_code=True,
            max_length=_default_max_length,
        )
        print(f"Loaded CrossEncoder reranker: {model_name}")

    

@modal
	.enter()
    def setup(self) -> None:
        self._load_model(DEFAULT_MODEL)

    

@modal
	.method()
    def rerank(self, *, query: str, documents: list[str], model: str | None = None) -> dict[str, Any]:
        if not isinstance(query, str):
            raise TypeError("query must be a string")
        if not isinstance(documents, list):
            raise TypeError("documents must be a list of strings")

        selected_model = (model or "").strip() or DEFAULT_MODEL
        if selected_model != self.model_name:
            self._load_model(selected_model)

        if not documents:
            return {"scores": []}

        pairs = [(query, str(doc or "")) for doc in documents]
        scores = self.model.predict(
            pairs,
            batch_size=_default_batch_size,
            show_progress_bar=False,
        )
        return {"scores": [float(score) for score in scores]}




@app
	.local_entrypoint()
def smoke(
    query: str = "How can retrieval augmentation improve reasoning?",
    document: str = "This paper introduces a reranking method for query-document relevance.",
) -> None:
    model = ZerankReranker()
    result = model.rerank.remote(
        query=query,
        documents=[document],
        model=DEFAULT_MODEL,
    )
    print(result)
    print("Smoke test completed. This is expected to shut down after local entrypoint returns.")

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment