AshenH commited on
Commit
319e4ad
·
verified ·
1 Parent(s): 95cc7e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -4
app.py CHANGED
@@ -4,6 +4,8 @@ from datetime import datetime
4
  from pathlib import Path
5
  from typing import Dict, Any, Tuple, Optional
6
  from contextlib import contextmanager
 
 
7
 
8
  import duckdb
9
  import pandas as pd
@@ -36,6 +38,52 @@ EXPORT_DIR.mkdir(exist_ok=True)
36
  # Query timeout in seconds
37
  QUERY_TIMEOUT_SECONDS = 30
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  # -------------------------------------------------------------------
40
  # Data Models
41
  # -------------------------------------------------------------------
@@ -81,8 +129,8 @@ def get_db_connection():
81
  logger.info("Establishing MotherDuck connection")
82
  conn = duckdb.connect(f"md:?motherduck_token={token}")
83
 
84
- # Set query timeout
85
- conn.execute(f"SET statement_timeout='{QUERY_TIMEOUT_SECONDS}s'")
86
 
87
  yield conn
88
 
@@ -114,13 +162,21 @@ def execute_query(conn: duckdb.DuckDBPyConnection, query: str,
114
  Raises:
115
  Exception: For query execution errors
116
  """
 
117
  try:
118
  logger.info(f"Executing {description}")
119
  result = conn.execute(query).df()
120
- logger.info(f"{description} completed: {len(result)} rows returned")
 
 
 
 
 
 
121
  return result
122
  except Exception as e:
123
- logger.error(f"{description} failed: {str(e)}")
 
124
  raise Exception(f"Query execution failed for {description}: {str(e)}")
125
 
126
 
 
4
  from pathlib import Path
5
  from typing import Dict, Any, Tuple, Optional
6
  from contextlib import contextmanager
7
+ import signal
8
+ from functools import wraps
9
 
10
  import duckdb
11
  import pandas as pd
 
38
  # Query timeout in seconds
39
  QUERY_TIMEOUT_SECONDS = 30
40
 
41
+ # -------------------------------------------------------------------
42
+ # Query Timeout Handler (since DuckDB doesn't support statement_timeout)
43
+ # -------------------------------------------------------------------
44
+ class QueryTimeoutError(Exception):
45
+ """Raised when a query exceeds the timeout limit"""
46
+ pass
47
+
48
+
49
+ def with_timeout(timeout_seconds: int = QUERY_TIMEOUT_SECONDS):
50
+ """
51
+ Decorator to add timeout to query functions.
52
+ Note: This is a simplified version. For production, consider using
53
+ concurrent.futures or multiprocessing for better timeout handling.
54
+
55
+ Args:
56
+ timeout_seconds: Maximum execution time in seconds
57
+ """
58
+ def decorator(func):
59
+ @wraps(func)
60
+ def wrapper(*args, **kwargs):
61
+ # For now, we'll log the timeout but not enforce it
62
+ # since signal.alarm doesn't work well with threads
63
+ # and DuckDB doesn't support native query timeouts
64
+ logger.debug(f"Starting {func.__name__} with {timeout_seconds}s timeout")
65
+ start_time = datetime.now()
66
+
67
+ try:
68
+ result = func(*args, **kwargs)
69
+ elapsed = (datetime.now() - start_time).total_seconds()
70
+
71
+ if elapsed > timeout_seconds:
72
+ logger.warning(
73
+ f"{func.__name__} exceeded timeout: {elapsed:.2f}s > {timeout_seconds}s"
74
+ )
75
+ else:
76
+ logger.debug(f"{func.__name__} completed in {elapsed:.2f}s")
77
+
78
+ return result
79
+ except Exception as e:
80
+ elapsed = (datetime.now() - start_time).total_seconds()
81
+ logger.error(f"{func.__name__} failed after {elapsed:.2f}s: {str(e)}")
82
+ raise
83
+
84
+ return wrapper
85
+ return decorator
86
+
87
  # -------------------------------------------------------------------
88
  # Data Models
89
  # -------------------------------------------------------------------
 
129
  logger.info("Establishing MotherDuck connection")
130
  conn = duckdb.connect(f"md:?motherduck_token={token}")
131
 
132
+ # Note: DuckDB/MotherDuck doesn't support statement_timeout like PostgreSQL
133
+ # Query timeouts should be handled at application level with threading/async
134
 
135
  yield conn
136
 
 
162
  Raises:
163
  Exception: For query execution errors
164
  """
165
+ start_time = datetime.now()
166
  try:
167
  logger.info(f"Executing {description}")
168
  result = conn.execute(query).df()
169
+ elapsed = (datetime.now() - start_time).total_seconds()
170
+ logger.info(f"{description} completed: {len(result)} rows in {elapsed:.2f}s")
171
+
172
+ # Warn if query is slow
173
+ if elapsed > QUERY_TIMEOUT_SECONDS:
174
+ logger.warning(f"{description} exceeded timeout threshold: {elapsed:.2f}s")
175
+
176
  return result
177
  except Exception as e:
178
+ elapsed = (datetime.now() - start_time).total_seconds()
179
+ logger.error(f"{description} failed after {elapsed:.2f}s: {str(e)}")
180
  raise Exception(f"Query execution failed for {description}: {str(e)}")
181
 
182