Spaces:

abancp
/

10M-LLM

Sleeping

abancp commited on Jul 15

Commit

419d496

verified ·

1 Parent(s): 9cc709f

Update inference_fine_tune.py

Files changed (1) hide show

inference_fine_tune.py CHANGED Viewed

@@ -32,19 +32,18 @@ state = torch.load(model_path,map_location=torch.device('cpu'))
 model.load_state_dict(state['model_state_dict'])
 def generate_response(prompt: str):
-    print("Prompt:", prompt)
     input_tokens = tokenizer.encode(prompt).ids
     input_tokens = [user_token_id] + input_tokens + [ai_token_id]
     if len(input_tokens) > config['seq_len']:
-        print(f"Exceeding max length of input: {config['seq_len']}")
         return
-    input_tokens = torch.tensor(input_tokens).unsqueeze(0).to(device)  # (1, seq_len)
     temperature = 0.7
     top_k = 50
     i = 0
     while input_tokens.shape[1] < 2000:
         out = model.decode(input_tokens)
@@ -55,8 +54,10 @@ def generate_response(prompt: str):
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
-        decoded_word = tokenizer.decode([next_token.item()])
-        yield decoded_word  # Streaming output token-by-token
         input_tokens = torch.cat([input_tokens, next_token], dim=1)
         if input_tokens.shape[1] > config['seq_len']:

 model.load_state_dict(state['model_state_dict'])
 def generate_response(prompt: str):
     input_tokens = tokenizer.encode(prompt).ids
     input_tokens = [user_token_id] + input_tokens + [ai_token_id]
     if len(input_tokens) > config['seq_len']:
+        yield gr.Textbox.update(value="Prompt too long.")
         return
+    input_tokens = torch.tensor(input_tokens).unsqueeze(0).to(device)
     temperature = 0.7
     top_k = 50
     i = 0
+    generated_text = ""
     while input_tokens.shape[1] < 2000:
         out = model.decode(input_tokens)
         next_token = torch.multinomial(probs, num_samples=1)
         next_token = top_k_indices.gather(-1, next_token)
+        word = tokenizer.decode([next_token.item()])
+        generated_text += word
+        yield gr.Textbox.update(value=generated_text)
         input_tokens = torch.cat([input_tokens, next_token], dim=1)
         if input_tokens.shape[1] > config['seq_len']: