Portfolio-DE / build_index.py
KSuhas's picture
Upload folder using huggingface_hub
9e2d0a3 verified
import os
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, Settings
from llama_index.embeddings.fastembed import FastEmbedEmbedding
# ----- 1) Load Data -----
files_to_index = [
"./Lebenslauf_SuhasKamuni.pdf",
"./about_me.txt",
"./more_about_me.txt"
]
try:
documents = SimpleDirectoryReader(input_files=files_to_index).load_data()
print(f"Loaded {len(documents)} document(s) from ./Portfolio_German/")
except FileNotFoundError:
print("Error: './Portfolio_German/' folder not found.")
exit()
# ----- 2) Configure Embedding Model -----
try:
embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
print("✅ Hugging Face embedding model configured.")
except Exception as e:
print(f"Error configuring Hugging Face model: {e}")
exit()
# ----- 3) Build and Store Index -----
index = VectorStoreIndex.from_documents(documents, show_progress=True)
index.storage_context.persist(persist_dir="./index_storage")
print("\n✅ Portfolio index built.")