Spaces:
Running
Running
buzzCraft
commited on
Commit
·
be5af2d
1
Parent(s):
681cecd
Created setup.py and updated readme
Browse files- .gitignore +1 -0
- README.md +13 -0
- setup.py +40 -0
- src/database.py +6 -2
- src/extractor.py +1 -1
.gitignore
CHANGED
|
@@ -11,3 +11,4 @@
|
|
| 11 |
.idea/vcs.xml
|
| 12 |
extractor.log
|
| 13 |
data/games.db
|
|
|
|
|
|
| 11 |
.idea/vcs.xml
|
| 12 |
extractor.log
|
| 13 |
data/games.db
|
| 14 |
+
/data/dataset/SoccerNet
|
README.md
CHANGED
|
@@ -12,6 +12,19 @@ Rename .env_demo to .env and fill in the required fields.
|
|
| 12 |
|
| 13 |
## Setting up the database
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
### Required data
|
| 16 |
The data required to run the code is not included in this repository.
|
| 17 |
The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).
|
|
|
|
| 12 |
|
| 13 |
## Setting up the database
|
| 14 |
|
| 15 |
+
By running
|
| 16 |
+
````bash
|
| 17 |
+
python setup.py
|
| 18 |
+
````
|
| 19 |
+
from project root, all files will be downloaded, and the database will be set up.
|
| 20 |
+
Before running the setup, make sure to fill in the required fields in the .env file, and do a
|
| 21 |
+
````bash
|
| 22 |
+
pip install soccernet
|
| 23 |
+
````
|
| 24 |
+
as this package is not in the requirements.txt file.
|
| 25 |
+
Expected setup time is around 10 minutes.
|
| 26 |
+
|
| 27 |
+
If you want to download the data and set up the database manually, you can do so by following the instructions below.
|
| 28 |
### Required data
|
| 29 |
The data required to run the code is not included in this repository.
|
| 30 |
The data can be downloaded from the [Soccernet](https://www.soccer-net.org/data).
|
setup.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Try to import pandas, if fails, ask the user to install it
|
| 2 |
+
try:
|
| 3 |
+
from SoccerNet.Downloader import SoccerNetDownloader
|
| 4 |
+
except ImportError:
|
| 5 |
+
print("SoccerNet package not found. Please install it by running 'pip install soccernet'")
|
| 6 |
+
exit(1)
|
| 7 |
+
|
| 8 |
+
from src.database import process_json_files,fill_Augmented_Team, fill_Augmented_League
|
| 9 |
+
import threading
|
| 10 |
+
|
| 11 |
+
mySoccerNetDownloader = SoccerNetDownloader(LocalDirectory="data/dataset/SoccerNet")
|
| 12 |
+
|
| 13 |
+
# Download function
|
| 14 |
+
def download_labels(file_name):
|
| 15 |
+
try:
|
| 16 |
+
mySoccerNetDownloader.downloadGames(files=[file_name], split=["train", "valid", "test"])
|
| 17 |
+
except Exception as e:
|
| 18 |
+
print(f"Error downloading {file_name}: {e}")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# Create threads for downloading different sets of labels
|
| 22 |
+
thread_v2 = threading.Thread(target=download_labels, args=("Labels-v2.json",))
|
| 23 |
+
thread_caption = threading.Thread(target=download_labels, args=("Labels-caption.json",))
|
| 24 |
+
|
| 25 |
+
# Start the threads
|
| 26 |
+
thread_v2.start()
|
| 27 |
+
thread_caption.start()
|
| 28 |
+
|
| 29 |
+
# Wait for both threads to complete
|
| 30 |
+
thread_v2.join()
|
| 31 |
+
thread_caption.join()
|
| 32 |
+
|
| 33 |
+
print("All files downloaded successfully!")
|
| 34 |
+
print("Creating database..")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
process_json_files("data/dataset/SoccerNet/")
|
| 39 |
+
fill_Augmented_Team("data/Dataset/augmented_teams.csv")
|
| 40 |
+
fill_Augmented_League("data/Dataset/augmented_leagues.csv")
|
src/database.py
CHANGED
|
@@ -3,8 +3,12 @@ from sqlalchemy.orm import declarative_base, sessionmaker
|
|
| 3 |
import pandas as pd
|
| 4 |
import os
|
| 5 |
import json
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
| 8 |
Base = declarative_base()
|
| 9 |
|
| 10 |
|
|
@@ -445,7 +449,7 @@ def fill_Augmented_League(file_path):
|
|
| 445 |
|
| 446 |
if __name__ == "__main__":
|
| 447 |
# Example directory path
|
| 448 |
-
process_json_files('../data/Dataset/
|
| 449 |
fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
|
| 450 |
fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
|
| 451 |
# Rename the event/annotation table to something more descriptive. Events are fucking everything else over
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import os
|
| 5 |
import json
|
| 6 |
+
import dotenv
|
| 7 |
+
dotenv.load_dotenv()
|
| 8 |
|
| 9 |
+
db_uri = os.getenv('DATABASE_PATH')
|
| 10 |
+
db_uri = f"sqlite:///{db_uri}"
|
| 11 |
+
engine = create_engine(db_uri, echo=False)
|
| 12 |
Base = declarative_base()
|
| 13 |
|
| 14 |
|
|
|
|
| 449 |
|
| 450 |
if __name__ == "__main__":
|
| 451 |
# Example directory path
|
| 452 |
+
process_json_files('../data/Dataset/SoccerNet/')
|
| 453 |
fill_Augmented_Team('../data/Dataset/augmented_teams.csv')
|
| 454 |
fill_Augmented_League('../data/Dataset/augmented_leagues.csv')
|
| 455 |
# Rename the event/annotation table to something more descriptive. Events are fucking everything else over
|
src/extractor.py
CHANGED
|
@@ -414,7 +414,7 @@ def update_prompt(prompt, properties, pk, properties_original, retrievers):
|
|
| 414 |
if orig_value != updated_value and pk_value:
|
| 415 |
update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
|
| 416 |
elif orig_value != updated_value:
|
| 417 |
-
update_statement = f"\n- {orig_value} (now referred to as {updated_value}."
|
| 418 |
elif pk_value:
|
| 419 |
update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
|
| 420 |
elif orig_value == updated_value and pk_value:
|
|
|
|
| 414 |
if orig_value != updated_value and pk_value:
|
| 415 |
update_statement = f"\n- {orig_value} (now referred to as {updated_value}) has a primary key: {pk_value}."
|
| 416 |
elif orig_value != updated_value:
|
| 417 |
+
update_statement = f"\n- {orig_value} (now referred to as {updated_value}.)"
|
| 418 |
elif pk_value:
|
| 419 |
update_statement = f"\n- {orig_value} has a primary key: {pk_value}."
|
| 420 |
elif orig_value == updated_value and pk_value:
|