rishabhsetiya commited on
Commit
a523b60
·
verified ·
1 Parent(s): 9db2d7f

Update generate_indexes.py

Browse files
Files changed (1) hide show
  1. generate_indexes.py +0 -7
generate_indexes.py CHANGED
@@ -7,7 +7,6 @@ from typing import List, Dict
7
  import numpy as np
8
  import faiss
9
  import pandas as pd
10
- import tabula
11
  from sentence_transformers import SentenceTransformer
12
  from rank_bm25 import BM25Okapi
13
 
@@ -84,12 +83,6 @@ def read_pdf_tables(pdf_path, pages="all"):
84
 
85
  def extract_tables_from_pdf(pdf_path: str, pages="all") -> List[Dict]:
86
  """Extract tables from financial PDF into structured row-year-value dicts."""
87
- # tables = tabula.read_pdf(
88
- # pdf_path,
89
- # pages=pages,
90
- # multiple_tables=True,
91
- # pandas_options={'dtype': str}
92
- # )
93
  tables = read_pdf_tables(pdf_path)
94
 
95
  table_rows = []
 
7
  import numpy as np
8
  import faiss
9
  import pandas as pd
 
10
  from sentence_transformers import SentenceTransformer
11
  from rank_bm25 import BM25Okapi
12
 
 
83
 
84
  def extract_tables_from_pdf(pdf_path: str, pages="all") -> List[Dict]:
85
  """Extract tables from financial PDF into structured row-year-value dicts."""
 
 
 
 
 
 
86
  tables = read_pdf_tables(pdf_path)
87
 
88
  table_rows = []