Spaces:

k050506koch
/

gpt3-dev-api

Sleeping

Kyryll Kochkin commited on Oct 23

Commit

b08494f

1 Parent(s): 320f8e1

Temporarily disable default models for Vercel sizing test

Files changed (4) hide show

README.md CHANGED Viewed

@@ -45,14 +45,11 @@ All configuration is driven via environment variables (see `app/core/settings.py
 | `ENABLE_EMBEDDINGS_BACKEND` | Enable embeddings backend (returns 501 when `False`) | `False` |
 | `CORS_ALLOW_ORIGINS` | Comma-separated list of allowed origins | empty |
-The default in-memory registry (see `app/core/model_registry.py`) currently exposes only the following model ID:
-- `GPT3-dev` (17M parameters)
-Legacy configurations for larger GPT3-dev checkpoints and GPT-2 remain in the source tree but are commented out to keep the
-serverless bundle under Vercel's 250 MB ceiling. Uncomment the relevant `ModelSpec` definitions or supply a custom registry file
-if you need to re-enable them. Use `MODEL_ALLOW_LIST` (for example, `MODEL_ALLOW_LIST=GPT3-dev`) to limit a deployment to a
-smaller subset of models.
 ### Estimating Model Artifact Sizes

 | `ENABLE_EMBEDDINGS_BACKEND` | Enable embeddings backend (returns 501 when `False`) | `False` |
 | `CORS_ALLOW_ORIGINS` | Comma-separated list of allowed origins | empty |
+The default in-memory registry (see `app/core/model_registry.py`) currently exposes no models. All bundled `ModelSpec`
+definitions are commented out so Vercel can build the project without downloading Hugging Face checkpoints while we diagnose
+the serverless size regression. Uncomment the relevant entries (for example, the 17M-parameter `GPT3-dev` checkpoint) or supply
+a custom registry file once you are ready to serve a model. Use `MODEL_ALLOW_LIST` to restrict a deployment to a smaller subset
+of model IDs when re-enabling them.
 ### Estimating Model Artifact Sizes

app/core/model_registry.py CHANGED Viewed

@@ -49,12 +49,12 @@ _DEFAULT_MODELS: List[ModelSpec] = [
     #     dtype="float16",
     #     device="auto",
     # ),
-    ModelSpec(
-        name="GPT3-dev",
-        hf_repo="k050506koch/GPT3-dev",  # TODO confirm
-        dtype="float16",
-        device="auto",
-    ),
     # ModelSpec(
     #     name="GPT3-dev-125m",
     #     hf_repo="k050506koch/GPT3-dev-125m",  # TODO confirm

     #     dtype="float16",
     #     device="auto",
     # ),
+    # ModelSpec(
+    #     name="GPT3-dev",
+    #     hf_repo="k050506koch/GPT3-dev",  # TODO confirm
+    #     dtype="float16",
+    #     device="auto",
+    # ),
     # ModelSpec(
     #     name="GPT3-dev-125m",
     #     hf_repo="k050506koch/GPT3-dev-125m",  # TODO confirm

tests/test_model_registry.py CHANGED Viewed

@@ -1,6 +1,7 @@
 """Tests for the dynamic model registry filtering."""
 from __future__ import annotations
 import sys
 import types
 from pathlib import Path
@@ -56,13 +57,26 @@ def reset_registry(monkeypatch):
     apply()
-def test_default_registry_includes_all_models(reset_registry):
     names = {spec.name for spec in model_registry.list_models()}
-    assert names == {"GPT3-dev"}
-def test_model_allow_list_filters(reset_registry):
-    reset_registry(allow_list=["GPT3-dev"])
     names = {spec.name for spec in model_registry.list_models()}
     assert names == {"GPT3-dev"}

 """Tests for the dynamic model registry filtering."""
 from __future__ import annotations
+import json
 import sys
 import types
 from pathlib import Path
     apply()
+def test_default_registry_is_empty(reset_registry):
     names = {spec.name for spec in model_registry.list_models()}
+    assert names == set()
+def test_model_allow_list_filters(reset_registry, tmp_path: Path):
+    registry_path = tmp_path / "registry.json"
+    registry_path.write_text(
+        json.dumps(
+            [
+                {"name": "GPT3-dev", "hf_repo": "dummy/dev"},
+                {"name": "Tiny", "hf_repo": "dummy/tiny"},
+            ]
+        )
+    )
+    reset_registry(registry_path=str(registry_path))
+    names = {spec.name for spec in model_registry.list_models()}
+    assert names == {"GPT3-dev", "Tiny"}
+    reset_registry(allow_list=["GPT3-dev"], registry_path=str(registry_path))
     names = {spec.name for spec in model_registry.list_models()}
     assert names == {"GPT3-dev"}

tests/test_openai_compat.py CHANGED Viewed

@@ -123,7 +123,7 @@ from app.schemas.completions import CompletionRequest
 def test_list_models() -> None:
     payload = models.list_available_models()
     assert payload["object"] == "list"
-    assert any(model["id"] == "GPT3-dev" for model in payload["data"])
 def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
@@ -135,6 +135,7 @@ def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
         return DummyResult()
     monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
     payload = CompletionRequest.model_validate({
         "model": "GPT3-dev",
         "prompt": "Hello",

 def test_list_models() -> None:
     payload = models.list_available_models()
     assert payload["object"] == "list"
+    assert payload["data"] == []
 def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
         return DummyResult()
     monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
+    monkeypatch.setattr("app.routers.completions.get_model_spec", lambda model: None)
     payload = CompletionRequest.model_validate({
         "model": "GPT3-dev",
         "prompt": "Hello",