FROM python:3.10-slim # Create user but don't switch yet RUN useradd -m -u 1000 user WORKDIR /app # Install Tesseract OCR and system dependencies as root RUN apt-get update && \ apt-get install -y --no-install-recommends tesseract-ocr libglib2.0-0 libsm6 libxext6 libxrender-dev && \ rm -rf /var/lib/apt/lists/* # Now switch to the non-root user USER user # Copy requirements.txt COPY --chown=user Backend/requirements.txt . # Install Python dependencies (including nltk and langdetect) RUN pip install --no-cache-dir --upgrade -r requirements.txt # (Re)install nltk and download punkt as user, ensuring clean install and data in user dir RUN pip install --upgrade --force-reinstall nltk && \ python -m nltk.downloader punkt_tab # Verify punkt is present (will print path in build logs) RUN python -c "import nltk; print(nltk.data.find('tokenizers/punkt_tab'))" # Set PATH for user-installed Python packages ENV PATH="/home/user/.local/bin:${PATH}" # Copy all backend code COPY --chown=user Backend/ . EXPOSE 7860 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]