|
|
import os |
|
|
import io |
|
|
import re |
|
|
import time |
|
|
import requests |
|
|
import pdfplumber |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from bs4 import BeautifulSoup |
|
|
import folium |
|
|
from folium.plugins import FloatImage |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
|
|
|
file_name = 'bathing_sites.csv' |
|
|
url = 'https://eau.gouvernement.lu/fr/domaines-activite/eauxbaignade/sites-de-baignade.html' |
|
|
|
|
|
def extract_coordinates(url): |
|
|
x_match = re.search(r'X=(\d+)', url) |
|
|
y_match = re.search(r'Y=(\d+)', url) |
|
|
|
|
|
x = int(x_match.group(1) if x_match else 0) |
|
|
y = int(y_match.group(1) if y_match else 0) |
|
|
|
|
|
R = 6378137 |
|
|
if x != 0: |
|
|
x = (x / R) * (180 / np.pi) |
|
|
if y != 0: |
|
|
y = (180 / np.pi) * (2 * np.arctan(np.exp(y / R)) - np.pi / 2) |
|
|
|
|
|
return pd.Series([x, y]) |
|
|
|
|
|
def get_coordinates(pdf_list): |
|
|
sites_list = [] |
|
|
for lake in pdf_list: |
|
|
url_pdf = 'https:' + lake |
|
|
response_pdf = requests.get(url_pdf) |
|
|
bytes_io = io.BytesIO(response_pdf.content) |
|
|
|
|
|
with pdfplumber.open(bytes_io) as pdf: |
|
|
page = pdf.pages[0] |
|
|
text = page.extract_text() |
|
|
site = text.split('\n')[1].split(' ')[-1].split('’')[-1].replace('-', ' ').title().replace('Sure', 'Sûre').strip() |
|
|
for page in pdf.pages: |
|
|
tables = page.extract_table() |
|
|
if tables and ('baignade' in tables[0][0]): |
|
|
headers = tables[0] |
|
|
headers = headers[:3] |
|
|
headers.append('Sector') |
|
|
headers.append('Lake') |
|
|
i = 1 |
|
|
for table in tables[1:]: |
|
|
table = table[:3] |
|
|
if (site == 'Weiswampach') or (site == 'Remerschen'): |
|
|
table.append('Zone' + ' ' + str(i)) |
|
|
elif site == 'Echternach': |
|
|
table.append('Designated Zone') |
|
|
else: |
|
|
table.append(table[0].split(' ')[1].strip()) |
|
|
table.append(site) |
|
|
sites_list.append(table) |
|
|
i += 1 |
|
|
|
|
|
df = pd.DataFrame(sites_list, columns = headers) |
|
|
df = df.dropna() |
|
|
df = df.iloc[:, 1 : ] |
|
|
df = df.iloc[:, ::-1] |
|
|
df.columns = ['Lake', 'Sector', 'Y', 'X'] |
|
|
df[['Y', 'X']] = df[['Y', 'X']].apply(pd.to_numeric, errors='coerce') |
|
|
df = df.drop_duplicates(subset = ['Lake', 'Sector'], keep = 'last').reset_index(drop = True) |
|
|
return df |
|
|
|
|
|
|
|
|
def file_download(): |
|
|
df = pd.read_html(url)[0] |
|
|
|
|
|
response = requests.get(url) |
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
df['images'] = [tag.find("img")["src"] for tag in soup.select("td:has(img)")] |
|
|
df['URL coordinates'] = [tag.find("a")["href"] for tag in soup.select("td:has(a)") if 'geoportail' in tag.find("a")["href"]] |
|
|
pdf_list = [tag.find("a")["href"] for tag in soup.select("td:has(a)") if 'pdf' in tag.find("a")["href"]] |
|
|
df_coord = get_coordinates(pdf_list) |
|
|
|
|
|
df.columns = ['Lake', 'Sector', 'Water Quality', 'Swimming allowed', 'Reason for ban', 'Traffic lights', 'URL coordinates'] |
|
|
|
|
|
name_trim = ['Lac de la ', 'Lac de ', 'Etangs de ', 'Lac d\''] |
|
|
quality_dict = {'Excellente': 'Excellent', 'Bonne': 'Good', 'Suffisante': 'Adequate', 'Insuffisante': 'Inadequate'} |
|
|
df['Water Quality'] = df['Water Quality'].map(quality_dict).fillna(df['Water Quality']) |
|
|
df['Lake'] = df['Lake'].str.replace('|'.join(name_trim), '', regex=True) |
|
|
df['Lake'] = df['Lake'].str.split('(').str[0].str.strip() |
|
|
df['Sector'] = df['Sector'].astype(str).apply(lambda x: 'Designated Zone' if 'baignade' in x else x) |
|
|
df['Reason for ban'] = df['Reason for ban'].astype(str).apply(lambda x: 'nan' if '* Les informations ' in x else x) |
|
|
df['Reason for ban'] = df['Reason for ban'].replace({'nan': 'No ban'}) |
|
|
df['Swimming allowed'] = df['Swimming allowed'].astype('string') |
|
|
df.loc[df['Traffic lights'].str.contains('greng'), 'Swimming allowed'] = 'Yes' |
|
|
df.loc[df['Traffic lights'].str.contains('roud'), 'Swimming allowed'] = 'No' |
|
|
df = df.fillna('N/A') |
|
|
|
|
|
df[['long', 'lat']] = df['URL coordinates'].apply(extract_coordinates) |
|
|
df[['long', 'lat']] = df[['long', 'lat']].apply(pd.to_numeric, errors='coerce') |
|
|
|
|
|
df = df.reset_index(drop = True) |
|
|
df = pd.merge(left=df, right=df_coord, how='left', left_on=['Lake', 'Sector'], right_on=['Lake', 'Sector']) |
|
|
|
|
|
df.loc[df['long']==0, 'long'] = np.nan |
|
|
df.loc[df['lat']==0, 'lat'] = np.nan |
|
|
df['long'] = df['long'].fillna(df['X']) |
|
|
df['lat'] = df['lat'].fillna(df['Y']) |
|
|
|
|
|
df.drop(columns=['Traffic lights', 'URL coordinates', 'X', 'Y'], inplace=True) |
|
|
|
|
|
df.to_csv(file_name, index=False) |
|
|
return df |
|
|
|
|
|
def load_data(force_refresh=False): |
|
|
if force_refresh or (not os.path.exists(file_name)) or ((time.time() - os.path.getmtime(file_name)) > 3600): |
|
|
return file_download() |
|
|
return pd.read_csv(file_name) |
|
|
|
|
|
def create_map(force_refresh=False): |
|
|
df = load_data(force_refresh) |
|
|
|
|
|
|
|
|
if df.empty: |
|
|
m = folium.Map(location=[49.8153, 6.1296], zoom_start=9) |
|
|
else: |
|
|
m = folium.Map(location=[df['lat'].mean(), df['long'].mean()], zoom_start=9) |
|
|
|
|
|
|
|
|
for _, row in df.iterrows(): |
|
|
color = 'green' if row['Swimming allowed'] == 'Yes' else \ |
|
|
'red' if row['Swimming allowed'] == 'No' else 'gray' |
|
|
|
|
|
popup_text = f""" |
|
|
<b>Lake:</b> {row['Lake']}<br> |
|
|
<b>Sector:</b> {row['Sector']}<br> |
|
|
<b>Latitude:</b> {row['lat']:.6f}<br> |
|
|
<b>Longitude:</b> {row['long']:.6f}<br> |
|
|
<b>Water Quality:</b> {row['Water Quality']}<br> |
|
|
<b>Swimming allowed:</b> {row['Swimming allowed']}<br> |
|
|
<b>Reason for ban:</b> {row['Reason for ban']} |
|
|
""" |
|
|
|
|
|
folium.CircleMarker( |
|
|
location=[row['lat'], row['long']], |
|
|
radius=8, |
|
|
color=color, |
|
|
fill=True, |
|
|
fill_color=color, |
|
|
fill_opacity=0.7, |
|
|
popup=folium.Popup(popup_text, max_width=300) |
|
|
).add_to(m) |
|
|
|
|
|
|
|
|
folium.TileLayer('openstreetmap').add_to(m) |
|
|
|
|
|
|
|
|
m.get_root().html.add_child(folium.Element(""" |
|
|
<style> |
|
|
.leaflet-control-attribution { |
|
|
display: none !important; |
|
|
} |
|
|
</style> |
|
|
""")) |
|
|
|
|
|
|
|
|
return m._repr_html_() |
|
|
|
|
|
|
|
|
with gr.Blocks(title="LuxSplash") as app: |
|
|
gr.Markdown("# 🏊♂️ LuxSplash") |
|
|
gr.Markdown("[Freedom Luxembourg](https://freeletz.lu)") |
|
|
|
|
|
with gr.Row(): |
|
|
refresh_btn = gr.Button("Refresh Data", variant="primary") |
|
|
|
|
|
map_html = gr.HTML() |
|
|
|
|
|
|
|
|
app.load(fn=lambda: create_map(False), inputs=None, outputs=map_html) |
|
|
|
|
|
|
|
|
refresh_btn.click( |
|
|
fn=lambda: create_map(True), |
|
|
inputs=None, |
|
|
outputs=map_html |
|
|
) |
|
|
gr.Markdown( |
|
|
"Data sourced from the official Luxembourg government website, the only authoritative source for bathing site information: " |
|
|
"[eau.gouvernement.lu](https://eau.gouvernement.lu/fr/domaines-activite/eauxbaignade/sites-de-baignade.html )" |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |
|
|
|