|
|
from pymongo.mongo_client import MongoClient
|
|
|
from pymongo.server_api import ServerApi
|
|
|
import datetime
|
|
|
import os
|
|
|
import uuid
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
load_dotenv()
|
|
|
uri = os.getenv("MONGO_URL")
|
|
|
|
|
|
|
|
|
client = MongoClient(uri, server_api=ServerApi('1'))
|
|
|
|
|
|
|
|
|
try:
|
|
|
client.admin.command('ping')
|
|
|
print("β
Connected to MongoDB!")
|
|
|
except Exception as e:
|
|
|
print("β MongoDB connection failed:", e)
|
|
|
|
|
|
|
|
|
db = client["ReasoningData"]
|
|
|
collection = db["formatted_data"]
|
|
|
|
|
|
|
|
|
global_question_list = []
|
|
|
counter = 0
|
|
|
|
|
|
|
|
|
def generate_unique_id():
|
|
|
"""
|
|
|
Generates a unique ID for each question.
|
|
|
Example: ODR_20250822_123456_uuid
|
|
|
"""
|
|
|
global counter
|
|
|
prefix = "ODR"
|
|
|
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
|
unique_id = f"{prefix}_{timestamp}_{counter:05d}"
|
|
|
counter = (counter + 1) % 100000
|
|
|
return unique_id
|
|
|
|
|
|
|
|
|
def convert_into_mongo_document(parsed_json):
|
|
|
"""
|
|
|
Converts parsed Pydantic object (or dict) into Mongo document.
|
|
|
"""
|
|
|
if hasattr(parsed_json, "dict"):
|
|
|
parsed_json = parsed_json.dict()
|
|
|
|
|
|
data = {
|
|
|
"question_id": generate_unique_id(),
|
|
|
"question_content": parsed_json.get("question_content"),
|
|
|
"answer_language": parsed_json.get("answer_language"),
|
|
|
"reasoning_content": parsed_json.get("reasoning_content"),
|
|
|
"answer_content": parsed_json.get("answer_content"),
|
|
|
}
|
|
|
return data
|
|
|
|
|
|
|
|
|
def insert_into_mongo(data):
|
|
|
"""
|
|
|
Inserts a document into MongoDB.
|
|
|
"""
|
|
|
try:
|
|
|
data["_id"] = data["question_id"]
|
|
|
result = collection.insert_one(data)
|
|
|
global_question_list.append(result.inserted_id)
|
|
|
print("β
Inserted document ID:", result.inserted_id)
|
|
|
except Exception as e:
|
|
|
print("β Error inserting document:", e)
|
|
|
|
|
|
|
|
|
def save_in_db(parsed_json):
|
|
|
"""
|
|
|
Full pipeline: convert β insert.
|
|
|
"""
|
|
|
data = convert_into_mongo_document(parsed_json)
|
|
|
insert_into_mongo(data)
|
|
|
|
|
|
|