Kyryll Kochkin commited on
Commit
b08494f
·
1 Parent(s): 320f8e1

Temporarily disable default models for Vercel sizing test

Browse files
README.md CHANGED
@@ -45,14 +45,11 @@ All configuration is driven via environment variables (see `app/core/settings.py
45
  | `ENABLE_EMBEDDINGS_BACKEND` | Enable embeddings backend (returns 501 when `False`) | `False` |
46
  | `CORS_ALLOW_ORIGINS` | Comma-separated list of allowed origins | empty |
47
 
48
- The default in-memory registry (see `app/core/model_registry.py`) currently exposes only the following model ID:
49
-
50
- - `GPT3-dev` (17M parameters)
51
-
52
- Legacy configurations for larger GPT3-dev checkpoints and GPT-2 remain in the source tree but are commented out to keep the
53
- serverless bundle under Vercel's 250 MB ceiling. Uncomment the relevant `ModelSpec` definitions or supply a custom registry file
54
- if you need to re-enable them. Use `MODEL_ALLOW_LIST` (for example, `MODEL_ALLOW_LIST=GPT3-dev`) to limit a deployment to a
55
- smaller subset of models.
56
 
57
  ### Estimating Model Artifact Sizes
58
 
 
45
  | `ENABLE_EMBEDDINGS_BACKEND` | Enable embeddings backend (returns 501 when `False`) | `False` |
46
  | `CORS_ALLOW_ORIGINS` | Comma-separated list of allowed origins | empty |
47
 
48
+ The default in-memory registry (see `app/core/model_registry.py`) currently exposes no models. All bundled `ModelSpec`
49
+ definitions are commented out so Vercel can build the project without downloading Hugging Face checkpoints while we diagnose
50
+ the serverless size regression. Uncomment the relevant entries (for example, the 17M-parameter `GPT3-dev` checkpoint) or supply
51
+ a custom registry file once you are ready to serve a model. Use `MODEL_ALLOW_LIST` to restrict a deployment to a smaller subset
52
+ of model IDs when re-enabling them.
 
 
 
53
 
54
  ### Estimating Model Artifact Sizes
55
 
app/core/model_registry.py CHANGED
@@ -49,12 +49,12 @@ _DEFAULT_MODELS: List[ModelSpec] = [
49
  # dtype="float16",
50
  # device="auto",
51
  # ),
52
- ModelSpec(
53
- name="GPT3-dev",
54
- hf_repo="k050506koch/GPT3-dev", # TODO confirm
55
- dtype="float16",
56
- device="auto",
57
- ),
58
  # ModelSpec(
59
  # name="GPT3-dev-125m",
60
  # hf_repo="k050506koch/GPT3-dev-125m", # TODO confirm
 
49
  # dtype="float16",
50
  # device="auto",
51
  # ),
52
+ # ModelSpec(
53
+ # name="GPT3-dev",
54
+ # hf_repo="k050506koch/GPT3-dev", # TODO confirm
55
+ # dtype="float16",
56
+ # device="auto",
57
+ # ),
58
  # ModelSpec(
59
  # name="GPT3-dev-125m",
60
  # hf_repo="k050506koch/GPT3-dev-125m", # TODO confirm
tests/test_model_registry.py CHANGED
@@ -1,6 +1,7 @@
1
  """Tests for the dynamic model registry filtering."""
2
  from __future__ import annotations
3
 
 
4
  import sys
5
  import types
6
  from pathlib import Path
@@ -56,13 +57,26 @@ def reset_registry(monkeypatch):
56
  apply()
57
 
58
 
59
- def test_default_registry_includes_all_models(reset_registry):
60
  names = {spec.name for spec in model_registry.list_models()}
61
- assert names == {"GPT3-dev"}
62
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
- def test_model_allow_list_filters(reset_registry):
65
- reset_registry(allow_list=["GPT3-dev"])
66
  names = {spec.name for spec in model_registry.list_models()}
67
  assert names == {"GPT3-dev"}
68
 
 
1
  """Tests for the dynamic model registry filtering."""
2
  from __future__ import annotations
3
 
4
+ import json
5
  import sys
6
  import types
7
  from pathlib import Path
 
57
  apply()
58
 
59
 
60
+ def test_default_registry_is_empty(reset_registry):
61
  names = {spec.name for spec in model_registry.list_models()}
62
+ assert names == set()
63
+
64
+
65
+ def test_model_allow_list_filters(reset_registry, tmp_path: Path):
66
+ registry_path = tmp_path / "registry.json"
67
+ registry_path.write_text(
68
+ json.dumps(
69
+ [
70
+ {"name": "GPT3-dev", "hf_repo": "dummy/dev"},
71
+ {"name": "Tiny", "hf_repo": "dummy/tiny"},
72
+ ]
73
+ )
74
+ )
75
+ reset_registry(registry_path=str(registry_path))
76
+ names = {spec.name for spec in model_registry.list_models()}
77
+ assert names == {"GPT3-dev", "Tiny"}
78
 
79
+ reset_registry(allow_list=["GPT3-dev"], registry_path=str(registry_path))
 
80
  names = {spec.name for spec in model_registry.list_models()}
81
  assert names == {"GPT3-dev"}
82
 
tests/test_openai_compat.py CHANGED
@@ -123,7 +123,7 @@ from app.schemas.completions import CompletionRequest
123
  def test_list_models() -> None:
124
  payload = models.list_available_models()
125
  assert payload["object"] == "list"
126
- assert any(model["id"] == "GPT3-dev" for model in payload["data"])
127
 
128
 
129
  def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
@@ -135,6 +135,7 @@ def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
135
  return DummyResult()
136
 
137
  monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
 
138
  payload = CompletionRequest.model_validate({
139
  "model": "GPT3-dev",
140
  "prompt": "Hello",
 
123
  def test_list_models() -> None:
124
  payload = models.list_available_models()
125
  assert payload["object"] == "list"
126
+ assert payload["data"] == []
127
 
128
 
129
  def test_completions_non_stream(monkeypatch: pytest.MonkeyPatch) -> None:
 
135
  return DummyResult()
136
 
137
  monkeypatch.setattr("app.routers.completions.engine.generate", fake_generate)
138
+ monkeypatch.setattr("app.routers.completions.get_model_spec", lambda model: None)
139
  payload = CompletionRequest.model_validate({
140
  "model": "GPT3-dev",
141
  "prompt": "Hello",