Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,8 @@ from datetime import datetime
|
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Dict, Any, Tuple, Optional
|
| 6 |
from contextlib import contextmanager
|
|
|
|
|
|
|
| 7 |
|
| 8 |
import duckdb
|
| 9 |
import pandas as pd
|
|
@@ -36,6 +38,52 @@ EXPORT_DIR.mkdir(exist_ok=True)
|
|
| 36 |
# Query timeout in seconds
|
| 37 |
QUERY_TIMEOUT_SECONDS = 30
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
# -------------------------------------------------------------------
|
| 40 |
# Data Models
|
| 41 |
# -------------------------------------------------------------------
|
|
@@ -81,8 +129,8 @@ def get_db_connection():
|
|
| 81 |
logger.info("Establishing MotherDuck connection")
|
| 82 |
conn = duckdb.connect(f"md:?motherduck_token={token}")
|
| 83 |
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
|
| 87 |
yield conn
|
| 88 |
|
|
@@ -114,13 +162,21 @@ def execute_query(conn: duckdb.DuckDBPyConnection, query: str,
|
|
| 114 |
Raises:
|
| 115 |
Exception: For query execution errors
|
| 116 |
"""
|
|
|
|
| 117 |
try:
|
| 118 |
logger.info(f"Executing {description}")
|
| 119 |
result = conn.execute(query).df()
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
return result
|
| 122 |
except Exception as e:
|
| 123 |
-
|
|
|
|
| 124 |
raise Exception(f"Query execution failed for {description}: {str(e)}")
|
| 125 |
|
| 126 |
|
|
|
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Dict, Any, Tuple, Optional
|
| 6 |
from contextlib import contextmanager
|
| 7 |
+
import signal
|
| 8 |
+
from functools import wraps
|
| 9 |
|
| 10 |
import duckdb
|
| 11 |
import pandas as pd
|
|
|
|
| 38 |
# Query timeout in seconds
|
| 39 |
QUERY_TIMEOUT_SECONDS = 30
|
| 40 |
|
| 41 |
+
# -------------------------------------------------------------------
|
| 42 |
+
# Query Timeout Handler (since DuckDB doesn't support statement_timeout)
|
| 43 |
+
# -------------------------------------------------------------------
|
| 44 |
+
class QueryTimeoutError(Exception):
|
| 45 |
+
"""Raised when a query exceeds the timeout limit"""
|
| 46 |
+
pass
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def with_timeout(timeout_seconds: int = QUERY_TIMEOUT_SECONDS):
|
| 50 |
+
"""
|
| 51 |
+
Decorator to add timeout to query functions.
|
| 52 |
+
Note: This is a simplified version. For production, consider using
|
| 53 |
+
concurrent.futures or multiprocessing for better timeout handling.
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
timeout_seconds: Maximum execution time in seconds
|
| 57 |
+
"""
|
| 58 |
+
def decorator(func):
|
| 59 |
+
@wraps(func)
|
| 60 |
+
def wrapper(*args, **kwargs):
|
| 61 |
+
# For now, we'll log the timeout but not enforce it
|
| 62 |
+
# since signal.alarm doesn't work well with threads
|
| 63 |
+
# and DuckDB doesn't support native query timeouts
|
| 64 |
+
logger.debug(f"Starting {func.__name__} with {timeout_seconds}s timeout")
|
| 65 |
+
start_time = datetime.now()
|
| 66 |
+
|
| 67 |
+
try:
|
| 68 |
+
result = func(*args, **kwargs)
|
| 69 |
+
elapsed = (datetime.now() - start_time).total_seconds()
|
| 70 |
+
|
| 71 |
+
if elapsed > timeout_seconds:
|
| 72 |
+
logger.warning(
|
| 73 |
+
f"{func.__name__} exceeded timeout: {elapsed:.2f}s > {timeout_seconds}s"
|
| 74 |
+
)
|
| 75 |
+
else:
|
| 76 |
+
logger.debug(f"{func.__name__} completed in {elapsed:.2f}s")
|
| 77 |
+
|
| 78 |
+
return result
|
| 79 |
+
except Exception as e:
|
| 80 |
+
elapsed = (datetime.now() - start_time).total_seconds()
|
| 81 |
+
logger.error(f"{func.__name__} failed after {elapsed:.2f}s: {str(e)}")
|
| 82 |
+
raise
|
| 83 |
+
|
| 84 |
+
return wrapper
|
| 85 |
+
return decorator
|
| 86 |
+
|
| 87 |
# -------------------------------------------------------------------
|
| 88 |
# Data Models
|
| 89 |
# -------------------------------------------------------------------
|
|
|
|
| 129 |
logger.info("Establishing MotherDuck connection")
|
| 130 |
conn = duckdb.connect(f"md:?motherduck_token={token}")
|
| 131 |
|
| 132 |
+
# Note: DuckDB/MotherDuck doesn't support statement_timeout like PostgreSQL
|
| 133 |
+
# Query timeouts should be handled at application level with threading/async
|
| 134 |
|
| 135 |
yield conn
|
| 136 |
|
|
|
|
| 162 |
Raises:
|
| 163 |
Exception: For query execution errors
|
| 164 |
"""
|
| 165 |
+
start_time = datetime.now()
|
| 166 |
try:
|
| 167 |
logger.info(f"Executing {description}")
|
| 168 |
result = conn.execute(query).df()
|
| 169 |
+
elapsed = (datetime.now() - start_time).total_seconds()
|
| 170 |
+
logger.info(f"{description} completed: {len(result)} rows in {elapsed:.2f}s")
|
| 171 |
+
|
| 172 |
+
# Warn if query is slow
|
| 173 |
+
if elapsed > QUERY_TIMEOUT_SECONDS:
|
| 174 |
+
logger.warning(f"{description} exceeded timeout threshold: {elapsed:.2f}s")
|
| 175 |
+
|
| 176 |
return result
|
| 177 |
except Exception as e:
|
| 178 |
+
elapsed = (datetime.now() - start_time).total_seconds()
|
| 179 |
+
logger.error(f"{description} failed after {elapsed:.2f}s: {str(e)}")
|
| 180 |
raise Exception(f"Query execution failed for {description}: {str(e)}")
|
| 181 |
|
| 182 |
|