rayymaxx commited on
Commit
516d1c9
Β·
1 Parent(s): cce707f

Made changes to app structure

Browse files
Files changed (4) hide show
  1. Dockerfile +6 -13
  2. README.md +13 -0
  3. app.py β†’ app..py +29 -18
  4. requirements.txt +2 -2
Dockerfile CHANGED
@@ -1,18 +1,11 @@
1
- # Use Python 3.12
2
- FROM python:3.12-slim
3
 
4
- # Set working directory
5
- WORKDIR /app
6
 
7
- # Copy files
8
- COPY . .
9
-
10
- # Install dependencies
11
- RUN pip install --upgrade pip
12
- RUN pip install -r requirements.txt
13
 
14
- # Expose FastAPI port
15
- EXPOSE 7860
16
 
17
- # Run FastAPI
18
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
1
+ FROM python:3.10
 
2
 
3
+ WORKDIR /code
 
4
 
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
 
 
 
 
7
 
8
+ COPY . .
 
9
 
10
+ # Run FastAPI app with uvicorn
11
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -8,3 +8,16 @@ pinned: false
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
11
+
12
+ # DirectEd-AI-LoRA-API πŸš€
13
+
14
+ This Hugging Face Space hosts my fine-tuned LoRA model using FastAPI.
15
+ Endpoint: `POST /generate`
16
+
17
+ Example request:
18
+ ```json
19
+ {
20
+ "prompt": "Explain React in MERN stack.",
21
+ "max_new_tokens": 200,
22
+ "temperature": 0.7
23
+ }
app.py β†’ app..py RENAMED
@@ -2,30 +2,41 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from peft import PeftModel
 
5
 
6
- # --- Config ---
7
- BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
8
- FINETUNED_ADAPTER = "rayymaxx/DirectEd-AI-LoRA"
9
- MAX_NEW_TOKENS = 200
10
 
11
- app = FastAPI(title="Directed AI FastAPI")
 
 
12
 
13
- # --- Load model & tokenizer once at startup ---
14
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
15
- base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto")
16
- model = PeftModel.from_pretrained(base_model, FINETUNED_ADAPTER)
17
- text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
18
 
19
- class Prompt(BaseModel):
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  prompt: str
 
 
21
 
22
  @app.post("/generate")
23
- def generate_text(prompt_data: Prompt):
24
- prompt_text = prompt_data.prompt
25
- output = text_generator(prompt_text, max_new_tokens=MAX_NEW_TOKENS, do_sample=True, temperature=0.7)
 
 
 
 
26
  return {"response": output[0]["generated_text"]}
27
-
28
-
29
- @app.get("/")
30
- def greet_json():
31
- return {"Hello": "World!"}
 
2
  from pydantic import BaseModel
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
  from peft import PeftModel
5
+ import torch
6
 
7
+ app = FastAPI()
 
 
 
8
 
9
+ # πŸ‘‡ Replace with your actual repos
10
+ BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
11
+ ADAPTER_REPO = "rayymaxx/DirectEd-AI-LoRA" # your adapter repo
12
 
13
+ # Load tokenizer
14
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
 
 
 
15
 
16
+ # Load base model
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ BASE_MODEL,
19
+ device_map="auto",
20
+ torch_dtype=torch.float16,
21
+ )
22
+
23
+ # Load adapter
24
+ model = PeftModel.from_pretrained(model, ADAPTER_REPO)
25
+
26
+ # Pipeline
27
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
28
+
29
+ class Request(BaseModel):
30
  prompt: str
31
+ max_new_tokens: int = 150
32
+ temperature: float = 0.7
33
 
34
  @app.post("/generate")
35
+ def generate(req: Request):
36
+ output = pipe(
37
+ req.prompt,
38
+ max_new_tokens=req.max_new_tokens,
39
+ temperature=req.temperature,
40
+ do_sample=True
41
+ )
42
  return {"response": output[0]["generated_text"]}
 
 
 
 
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  fastapi
2
  uvicorn
3
- transformers>=4.35.0
 
4
  torch
5
  peft
6
- trl
 
1
  fastapi
2
  uvicorn
3
+ transformers
4
+ accelerate
5
  torch
6
  peft