Spaces:
Sleeping
Sleeping
Update generate_indexes.py
Browse files- generate_indexes.py +0 -7
generate_indexes.py
CHANGED
|
@@ -7,7 +7,6 @@ from typing import List, Dict
|
|
| 7 |
import numpy as np
|
| 8 |
import faiss
|
| 9 |
import pandas as pd
|
| 10 |
-
import tabula
|
| 11 |
from sentence_transformers import SentenceTransformer
|
| 12 |
from rank_bm25 import BM25Okapi
|
| 13 |
|
|
@@ -84,12 +83,6 @@ def read_pdf_tables(pdf_path, pages="all"):
|
|
| 84 |
|
| 85 |
def extract_tables_from_pdf(pdf_path: str, pages="all") -> List[Dict]:
|
| 86 |
"""Extract tables from financial PDF into structured row-year-value dicts."""
|
| 87 |
-
# tables = tabula.read_pdf(
|
| 88 |
-
# pdf_path,
|
| 89 |
-
# pages=pages,
|
| 90 |
-
# multiple_tables=True,
|
| 91 |
-
# pandas_options={'dtype': str}
|
| 92 |
-
# )
|
| 93 |
tables = read_pdf_tables(pdf_path)
|
| 94 |
|
| 95 |
table_rows = []
|
|
|
|
| 7 |
import numpy as np
|
| 8 |
import faiss
|
| 9 |
import pandas as pd
|
|
|
|
| 10 |
from sentence_transformers import SentenceTransformer
|
| 11 |
from rank_bm25 import BM25Okapi
|
| 12 |
|
|
|
|
| 83 |
|
| 84 |
def extract_tables_from_pdf(pdf_path: str, pages="all") -> List[Dict]:
|
| 85 |
"""Extract tables from financial PDF into structured row-year-value dicts."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
tables = read_pdf_tables(pdf_path)
|
| 87 |
|
| 88 |
table_rows = []
|