Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| AnySecret Chat Assistant - HuggingFace Spaces Gradio Interface | |
| A specialized AI assistant for AnySecret configuration management | |
| """ | |
| import os | |
| import gradio as gr | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from peft import PeftModel | |
| import logging | |
| import sys | |
| from huggingface_hub import login | |
| # Configure logging to show in HF Spaces | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(levelname)s - %(message)s', | |
| handlers=[ | |
| logging.StreamHandler(sys.stdout), | |
| logging.StreamHandler(sys.stderr) | |
| ] | |
| ) | |
| logger = logging.getLogger(__name__) | |
| # Model configuration | |
| BASE_MODEL = "meta-llama/Llama-3.2-3B-Instruct" | |
| PEFT_MODEL = "anysecret-io/anysecret-assistant" | |
| # Global variables for model and tokenizer | |
| model = None | |
| tokenizer = None | |
| device = None | |
| model_error = None | |
| def load_model(): | |
| """Load the model and tokenizer with improved error handling""" | |
| global model, tokenizer, device, model_error | |
| try: | |
| logger.info("๐ Starting model loading process...") | |
| # Check HuggingFace authentication | |
| hf_token = os.environ.get('HF_TOKEN') | |
| if hf_token: | |
| logger.info("๐ HuggingFace token found, logging in...") | |
| login(token=hf_token) | |
| else: | |
| logger.warning("โ ๏ธ No HF_TOKEN found in environment") | |
| # Determine device and log system info | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| logger.info(f"๐ฅ๏ธ Using device: {device}") | |
| if torch.cuda.is_available(): | |
| gpu_name = torch.cuda.get_device_name(0) | |
| total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3) | |
| logger.info(f"๐ฎ GPU: {gpu_name} ({total_memory:.1f}GB)") | |
| else: | |
| logger.info("๐ป Running on CPU") | |
| # Load tokenizer first | |
| logger.info(f"๐ Loading tokenizer from {BASE_MODEL}...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| BASE_MODEL, | |
| use_fast=True, | |
| trust_remote_code=True | |
| ) | |
| if tokenizer.pad_token is None: | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.padding_side = "left" | |
| logger.info("โ Tokenizer loaded successfully") | |
| except Exception as e: | |
| logger.error(f"โ Failed to load tokenizer: {e}") | |
| model_error = f"Tokenizer loading failed: {str(e)}" | |
| return False | |
| # Load base model with CPU optimizations | |
| logger.info(f"๐ค Loading base model from {BASE_MODEL}...") | |
| try: | |
| base_model = AutoModelForCausalLM.from_pretrained( | |
| BASE_MODEL, | |
| torch_dtype=torch.float32, # Use float32 for better CPU compatibility | |
| device_map=None, # Don't use auto device mapping on CPU | |
| trust_remote_code=True, | |
| low_cpu_mem_usage=True, | |
| use_cache=False # Disable KV cache to save memory | |
| ) | |
| logger.info("โ Base model loaded successfully") | |
| except Exception as e: | |
| logger.error(f"โ Failed to load base model: {e}") | |
| model_error = f"Base model loading failed: {str(e)}" | |
| return False | |
| # Load LoRA adapter | |
| logger.info(f"๐ Loading LoRA adapter from {PEFT_MODEL}...") | |
| try: | |
| model = PeftModel.from_pretrained( | |
| base_model, | |
| PEFT_MODEL, | |
| torch_dtype=torch.float32 | |
| ) | |
| logger.info("โ LoRA adapter loaded successfully") | |
| except Exception as e: | |
| logger.error(f"โ Failed to load LoRA adapter: {e}") | |
| model_error = f"LoRA adapter loading failed: {str(e)}" | |
| return False | |
| # Move to device and set eval mode | |
| try: | |
| model = model.to(device) | |
| model.eval() | |
| logger.info("โ Model moved to device and set to eval mode") | |
| except Exception as e: | |
| logger.error(f"โ Failed to move model to device: {e}") | |
| model_error = f"Device placement failed: {str(e)}" | |
| return False | |
| logger.info("๐ Model loaded successfully!") | |
| return True | |
| except Exception as e: | |
| logger.error(f"๐ฅ Critical error during model loading: {e}") | |
| model_error = f"Critical loading error: {str(e)}" | |
| import traceback | |
| traceback.print_exc() | |
| return False | |
| def generate_response(message, history, max_new_tokens=256, temperature=0.1, top_p=0.9): | |
| """Generate response from the model""" | |
| if model is None or tokenizer is None: | |
| if model_error: | |
| return f"โ Model loading failed: {model_error}" | |
| return "โณ Model is still loading. Please try again in a moment." | |
| try: | |
| logger.info(f"๐ฌ Generating response for: {message[:50]}...") | |
| # Format the conversation with proper prompt structure | |
| conversation = "" | |
| # Add conversation history (limit to prevent memory issues) | |
| recent_history = history[-3:] if len(history) > 3 else history | |
| for user_msg, assistant_msg in recent_history: | |
| conversation += f"### Instruction:\n{user_msg}\n\n### Response:\n{assistant_msg}\n\n" | |
| # Add current message | |
| conversation += f"### Instruction:\n{message}\n\n### Response:\n" | |
| # Tokenize with length limits | |
| inputs = tokenizer( | |
| conversation, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=512, # Reduced for memory efficiency | |
| padding=True | |
| ).to(device) | |
| # Generate with conservative settings | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=max_new_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| do_sample=True if temperature > 0 else False, | |
| pad_token_id=tokenizer.pad_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| repetition_penalty=1.1, | |
| no_repeat_ngram_size=3 | |
| ) | |
| # Decode response | |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Extract just the new response | |
| if "### Response:\n" in full_response: | |
| response_parts = full_response.split("### Response:\n") | |
| response = response_parts[-1].strip() | |
| else: | |
| # Fallback: get text after the input | |
| input_text = tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True) | |
| response = full_response[len(input_text):].strip() | |
| # Clean up response | |
| response = response.replace("### Instruction:", "").strip() | |
| response = response.split("### Instruction:")[0].strip() | |
| if not response: | |
| response = "I apologize, but I couldn't generate a proper response. Could you rephrase your question?" | |
| logger.info(f"โ Generated response: {response[:50]}...") | |
| return response | |
| except Exception as e: | |
| logger.error(f"๐ฅ Error generating response: {e}") | |
| return f"Sorry, I encountered an error while generating a response: {str(e)}" | |
| def chat_interface(message, history): | |
| """Main chat interface function for Gradio""" | |
| response = generate_response(message, history, max_new_tokens=256) | |
| return response | |
| # Custom CSS for AnySecret branding | |
| css = """ | |
| .gradio-container { | |
| max-width: 1000px !important; | |
| } | |
| /* Increase chat window height */ | |
| .chatbot { | |
| min-height: 500px !important; | |
| max-height: 600px !important; | |
| } | |
| /* Style for GPU link */ | |
| .gpu-link { | |
| background: linear-gradient(135deg, #10b981 0%, #059669 100%); | |
| color: white; | |
| padding: 12px 20px; | |
| border-radius: 8px; | |
| text-decoration: none; | |
| font-weight: bold; | |
| display: inline-block; | |
| margin: 10px 5px; | |
| transition: transform 0.2s; | |
| } | |
| .gpu-link:hover { | |
| transform: translateY(-2px); | |
| color: white; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 20px 0; | |
| background: linear-gradient(135deg, #6366f1 0%, #818cf8 100%); | |
| color: white; | |
| margin-bottom: 20px; | |
| border-radius: 10px; | |
| } | |
| .header h1 { | |
| margin: 0; | |
| font-size: 2.5em; | |
| font-weight: bold; | |
| } | |
| .header p { | |
| margin: 10px 0 0 0; | |
| font-size: 1.1em; | |
| opacity: 0.9; | |
| } | |
| .error-container { | |
| background-color: #fee2e2; | |
| border: 1px solid #fecaca; | |
| border-radius: 8px; | |
| padding: 16px; | |
| margin: 16px 0; | |
| color: #dc2626; | |
| } | |
| .loading-container { | |
| background-color: #fef3c7; | |
| border: 1px solid #fde68a; | |
| border-radius: 8px; | |
| padding: 16px; | |
| margin: 16px 0; | |
| color: #d97706; | |
| } | |
| .footer { | |
| text-align: center; | |
| padding: 20px 0; | |
| color: #666; | |
| font-size: 0.9em; | |
| } | |
| .footer-section { | |
| margin: 15px 0; | |
| } | |
| .footer-section h4 { | |
| margin: 10px 0 5px 0; | |
| color: #333; | |
| font-size: 0.95em; | |
| font-weight: bold; | |
| } | |
| .model-links { | |
| display: flex; | |
| justify-content: center; | |
| flex-wrap: wrap; | |
| gap: 8px; | |
| margin: 8px 0; | |
| } | |
| .model-link { | |
| background-color: #f3f4f6; | |
| color: #374151; | |
| padding: 6px 12px; | |
| border-radius: 6px; | |
| text-decoration: none; | |
| font-size: 0.8em; | |
| transition: background-color 0.2s; | |
| display: inline-flex; | |
| align-items: center; | |
| gap: 4px; | |
| } | |
| .model-link:hover { | |
| background-color: #e5e7eb; | |
| color: #374151; | |
| } | |
| .model-link.disabled { | |
| opacity: 0.5; | |
| cursor: not-allowed; | |
| } | |
| .hf-icon::before { | |
| content: '๐ค'; | |
| } | |
| .replicate-icon::before { | |
| content: '๐'; | |
| } | |
| """ | |
| # Start model loading | |
| logger.info("๐ Initializing AnySecret Chat Assistant...") | |
| model_loaded = load_model() | |
| # Create Gradio interface | |
| with gr.Blocks(css=css, title="AnySecret Chat Assistant") as demo: | |
| # Header | |
| gr.HTML(""" | |
| <div class="header"> | |
| <h1>๐ AnySecret Chat Assistant</h1> | |
| <p>Your AI assistant for configuration management across any cloud provider</p> | |
| </div> | |
| """) | |
| if model_loaded: | |
| # Main chat interface | |
| chatbot = gr.ChatInterface( | |
| fn=chat_interface, | |
| title="", | |
| description="Ask me anything about AnySecret configuration management, CLI commands, cloud integrations, or best practices!", | |
| examples=[ | |
| "How do I configure AnySecret for AWS?", | |
| "What's the difference between secrets and parameters?", | |
| "Show me a GitHub Actions workflow example", | |
| "How do I set up AnySecret with Kubernetes?", | |
| "What are best practices for production secrets?", | |
| "How do I migrate from AWS Parameter Store?" | |
| ], | |
| retry_btn="๐ Retry", | |
| undo_btn="โฉ๏ธ Undo", | |
| clear_btn="๐๏ธ Clear Chat", | |
| submit_btn="Send", | |
| stop_btn="โน๏ธ Stop" | |
| ) | |
| # Status info and GPU link | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 10px; background-color: #dcfce7; border-radius: 8px; margin: 10px 0;"> | |
| <p style="color: #166534; margin: 0 0 10px 0;"> | |
| โ Model loaded successfully! Running on CPU for optimal compatibility. | |
| </p> | |
| <a href="https://huggingface.co/anysecret-io/anysecret-assistant" target="_blank" class="gpu-link"> | |
| ๐ Run the open AnySecret Assistant on your GPU | |
| </a> | |
| </div> | |
| """) | |
| else: | |
| # Error state with details | |
| error_html = f""" | |
| <div class="error-container"> | |
| <h2>โ ๏ธ Model Loading Failed</h2> | |
| <p><strong>Error:</strong> {model_error if model_error else 'Unknown error occurred'}</p> | |
| <p>This is likely due to:</p> | |
| <ul> | |
| <li>Memory constraints on the free tier</li> | |
| <li>Model access permissions</li> | |
| <li>Temporary HuggingFace issues</li> | |
| </ul> | |
| <p><strong>Solutions:</strong></p> | |
| <ul> | |
| <li>Try refreshing the page in a few minutes</li> | |
| <li>Check that both models exist and are accessible</li> | |
| <li>Contact support if the issue persists</li> | |
| </ul> | |
| <p style="font-size: 0.9em; margin-top: 15px;"> | |
| <strong>Models:</strong><br> | |
| Base: {BASE_MODEL}<br> | |
| LoRA: {PEFT_MODEL} | |
| </p> | |
| </div> | |
| """ | |
| gr.HTML(error_html) | |
| # Footer | |
| gr.HTML(""" | |
| <div class="footer"> | |
| <div class="footer-section"> | |
| <p> | |
| Powered by <strong>AnySecret.io</strong> โข | |
| <a href="https://anysecret.io" target="_blank">Website</a> โข | |
| <a href="https://docs.anysecret.io" target="_blank">Documentation</a> โข | |
| <a href="https://github.com/anysecret-io/anysecret-lib" target="_blank">GitHub</a> | |
| </p> | |
| </div> | |
| <div class="footer-section"> | |
| <h4>๐ค LLM Assistant</h4> | |
| <div class="model-links"> | |
| <a href="https://huggingface.co/anysecret-io/anysecret-assistant" target="_blank" class="model-link"> | |
| <span class="hf-icon"></span> 3B Model | |
| </a> | |
| <a href="#" class="model-link disabled" title="Coming Soon"> | |
| <span class="hf-icon"></span> 7B Model | |
| </a> | |
| <a href="#" class="model-link disabled" title="Coming Soon"> | |
| <span class="hf-icon"></span> 13B Model | |
| </a> | |
| <a href="https://huggingface.co/spaces/anysecret-io/anysecret-chat" target="_blank" class="model-link"> | |
| <span class="hf-icon"></span> Chat | |
| </a> | |
| <a href="#" class="model-link disabled" title="Coming Soon"> | |
| <span class="replicate-icon"></span> Replicate | |
| </a> | |
| </div> | |
| </div> | |
| <div class="footer-section"> | |
| <p style="font-size: 0.8em; opacity: 0.7;"> | |
| This assistant is trained on AnySecret documentation and best practices. | |
| For production support, please visit our official channels. | |
| </p> | |
| </div> | |
| </div> | |
| """) | |
| # Launch configuration | |
| if __name__ == "__main__": | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| debug=True, # Enable debug mode | |
| show_error=True, | |
| quiet=False | |
| ) |