Spaces:
Sleeping
Sleeping
Kyryll Kochkin
commited on
Commit
·
b08494f
1
Parent(s):
320f8e1
Temporarily disable default models for Vercel sizing test
Browse files- README.md +5 -8
- app/core/model_registry.py +6 -6
- tests/test_model_registry.py +19 -5
- tests/test_openai_compat.py +2 -1
README.md
CHANGED
|
@@ -45,14 +45,11 @@ All configuration is driven via environment variables (see `app/core/settings.py
|
|
| 45 |
| `ENABLE_EMBEDDINGS_BACKEND` | Enable embeddings backend (returns 501 when `False`) | `False` |
|
| 46 |
| `CORS_ALLOW_ORIGINS` | Comma-separated list of allowed origins | empty |
|
| 47 |
|
| 48 |
-
The default in-memory registry (see `app/core/model_registry.py`) currently exposes
|
| 49 |
-
|
| 50 |
-
- `GPT3-dev`
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
serverless bundle under Vercel's 250 MB ceiling. Uncomment the relevant `ModelSpec` definitions or supply a custom registry file
|
| 54 |
-
if you need to re-enable them. Use `MODEL_ALLOW_LIST` (for example, `MODEL_ALLOW_LIST=GPT3-dev`) to limit a deployment to a
|
| 55 |
-
smaller subset of models.
|
| 56 |
|
| 57 |
### Estimating Model Artifact Sizes
|
| 58 |
|
|
|
|
| 45 |
| `ENABLE_EMBEDDINGS_BACKEND` | Enable embeddings backend (returns 501 when `False`) | `False` |
|
| 46 |
| `CORS_ALLOW_ORIGINS` | Comma-separated list of allowed origins | empty |
|
| 47 |
|
| 48 |
+
The default in-memory registry (see `app/core/model_registry.py`) currently exposes no models. All bundled `ModelSpec`
|
| 49 |
+
definitions are commented out so Vercel can build the project without downloading Hugging Face checkpoints while we diagnose
|
| 50 |
+
the serverless size regression. Uncomment the relevant entries (for example, the 17M-parameter `GPT3-dev` checkpoint) or supply
|
| 51 |
+
a custom registry file once you are ready to serve a model. Use `MODEL_ALLOW_LIST` to restrict a deployment to a smaller subset
|
| 52 |
+
of model IDs when re-enabling them.
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
### Estimating Model Artifact Sizes
|
| 55 |
|
app/core/model_registry.py
CHANGED
|
@@ -49,12 +49,12 @@ _DEFAULT_MODELS: List[ModelSpec] = [
|
|
| 49 |
# dtype="float16",
|
| 50 |
# device="auto",
|
| 51 |
# ),
|
| 52 |
-
ModelSpec(
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
),
|
| 58 |
# ModelSpec(
|
| 59 |
# name="GPT3-dev-125m",
|
| 60 |
# hf_repo="k050506koch/GPT3-dev-125m", # TODO confirm
|
|
|
|
| 49 |
# dtype="float16",
|
| 50 |
# device="auto",
|
| 51 |
# ),
|
| 52 |
+
# ModelSpec(
|
| 53 |
+
# name="GPT3-dev",
|
| 54 |
+
# hf_repo="k050506koch/GPT3-dev", # TODO confirm
|
| 55 |
+
# dtype="float16",
|
| 56 |
+
# device="auto",
|
| 57 |
+
# ),
|
| 58 |
# ModelSpec(
|
| 59 |
# name="GPT3-dev-125m",
|
| 60 |
# hf_repo="k050506koch/GPT3-dev-125m", # TODO confirm
|
tests/test_model_registry.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
"""Tests for the dynamic model registry filtering."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
|
|
|
| 4 |
import sys
|
| 5 |
import types
|
| 6 |
from pathlib import Path
|
|
@@ -56,13 +57,26 @@ def reset_registry(monkeypatch):
|
|
| 56 |
apply()
|
| 57 |
|
| 58 |
|
| 59 |
-
def
|
| 60 |
names = {spec.name for spec in model_registry.list_models()}
|
| 61 |
-
assert names ==
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
reset_registry(allow_list=["GPT3-dev"])
|
| 66 |
names = {spec.name for spec in model_registry.list_models()}
|
| 67 |
assert names == {"GPT3-dev"}
|
| 68 |
|
|
|
|
| 1 |
"""Tests for the dynamic model registry filtering."""
|
| 2 |
from __future__ import annotations
|
| 3 |
|
| 4 |
+
import json
|
| 5 |
import sys
|
| 6 |
import types
|
| 7 |
from pathlib import Path
|
|
|
|
| 57 |
apply()
|
| 58 |
|
| 59 |
|
| 60 |
+
def test_default_registry_is_empty(reset_registry):
|
| 61 |
names = {spec.name for spec in model_registry.list_models()}
|
| 62 |
+
assert names == set()
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_model_allow_list_filters(reset_registry, tmp_path: Path):
|
| 66 |
+
registry_path = tmp_path / "registry.json"
|
| 67 |
+
registry_path.write_text(
|
| 68 |
+
json.dumps(
|
| 69 |
+
[
|
| 70 |
+
{"name": "GPT3-dev", "hf_repo": "dummy/dev"},
|
| 71 |
+
{"name": "Tiny", "hf_repo": "dummy/tiny"},
|
| 72 |
+
]
|
| 73 |
+
)
|
| 74 |
+
)
|
| 75 |
+
reset_registry(registry_path=str(registry_path))
|
| 76 |
+
names = {spec.name for spec in model_registry.list_models()}
|
| 77 |
+
assert names == {"GPT3-dev", "Tiny"}
|
| 78 |
|
| 79 |
+
reset_registry(allow_list=["GPT3-dev"], registry_path=str(registry_path))
|
|
|
|
| 80 |
names = {spec.name for spec in model_registry.list_models()}
|
| 81 |
assert names == {"GPT3-dev"}
|
| 82 |
|
tests/test_openai_compat.py
CHANGED
|
@@ -123,7 +123,7 @@ from app.schemas.completions import CompletionRequest
|
|
| 123 |
def test_list_models() -> None:
|
| 124 |
payload = models.list_available_models()
|
| 125 |
assert payload["object"] == "list"
|
| 126 |
-
assert
|
| 127 |
|
| 128 |
|
| 129 |
def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
@@ -135,6 +135,7 @@ def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
| 135 |
return DummyResult()
|
| 136 |
|
| 137 |
monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
|
|
|
|
| 138 |
payload = CompletionRequest.model_validate({
|
| 139 |
"model": "GPT3-dev",
|
| 140 |
"prompt": "Hello",
|
|
|
|
| 123 |
def test_list_models() -> None:
|
| 124 |
payload = models.list_available_models()
|
| 125 |
assert payload["object"] == "list"
|
| 126 |
+
assert payload["data"] == []
|
| 127 |
|
| 128 |
|
| 129 |
def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
|
|
|
|
| 135 |
return DummyResult()
|
| 136 |
|
| 137 |
monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
|
| 138 |
+
monkeypatch.setattr("app.routers.completions.get_model_spec", lambda model: None)
|
| 139 |
payload = CompletionRequest.model_validate({
|
| 140 |
"model": "GPT3-dev",
|
| 141 |
"prompt": "Hello",
|