|
|
import os |
|
|
import re |
|
|
import time |
|
|
import requests |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
from bs4 import BeautifulSoup |
|
|
import subprocess |
|
|
import gradio as gr |
|
|
import folium |
|
|
from folium.plugins import FloatImage |
|
|
|
|
|
|
|
|
if "SPACE_ID" in os.environ: |
|
|
print("Running in Hugging Face Space - installing Playwright...") |
|
|
subprocess.run([sys.executable, "-m", "pip", "install", "playwright"], check=True) |
|
|
subprocess.run([sys.executable, "-m", "playwright", "install", "chromium"], check=True) |
|
|
|
|
|
|
|
|
from playwright.sync_api import sync_playwright |
|
|
|
|
|
|
|
|
file_name = 'bathing_sites.csv' |
|
|
url = 'https://eau.gouvernement.lu/fr/domaines-activite/eauxbaignade/sites-de-baignade.html' |
|
|
|
|
|
|
|
|
def get_final_url(url): |
|
|
with sync_playwright() as p: |
|
|
browser = p.chromium.launch(headless=True) |
|
|
page = browser.new_page() |
|
|
page.set_extra_http_headers({"max-redirects": "9"}) |
|
|
if (('&X=' not in url) or ('&X=' not in url)): |
|
|
page.goto(url, timeout=5000) |
|
|
page.wait_for_timeout(2000) |
|
|
url = page.url |
|
|
browser.close() |
|
|
return url |
|
|
|
|
|
def extract_coordinates(url): |
|
|
x_match = re.search(r'X=(\d+)', url) |
|
|
y_match = re.search(r'Y=(\d+)', url) |
|
|
|
|
|
x = x_match.group(1) if x_match else None |
|
|
y = y_match.group(1) if y_match else None |
|
|
|
|
|
return pd.Series([x, y]) |
|
|
|
|
|
def web_mercator_to_wgs84(x, y): |
|
|
R = 6378137 |
|
|
lon = (x / R) * (180 / np.pi) |
|
|
lat = (180 / np.pi) * (2 * np.arctan(np.exp(y / R)) - np.pi / 2) |
|
|
return lat, lon |
|
|
|
|
|
def file_download(): |
|
|
df = pd.read_html(url)[0] |
|
|
|
|
|
response = requests.get(url) |
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
df['images'] = [tag.find("img")["src"] for tag in soup.select("td:has(img)")] |
|
|
df['geoport'] = [tag.find("a")["href"] for tag in soup.select("td:has(a)") if 'geoportail' in tag.find("a")["href"]] |
|
|
|
|
|
df['geoport'] = df['geoport'].apply(get_final_url) |
|
|
|
|
|
df.columns = ['Lake', 'Sector', 'Water Quality', 'Swimming allowed', 'Reason for ban', 'Traffic lights', 'URL coordinates'] |
|
|
|
|
|
name_trim = ['Lac de la ', 'Lac de ', 'Etangs de ', 'Lac d\''] |
|
|
quality_dict = {'Excellente': 'Excellent', 'Bonne': 'Good', 'Suffisante': 'Adequate', 'Insuffisante': 'Inadequate'} |
|
|
df['Water Quality'] = df['Water Quality'].map(quality_dict).fillna(df['Water Quality']) |
|
|
df['Lake'] = df['Lake'].str.replace('|'.join(name_trim), '', regex=True) |
|
|
df['Lake'] = df['Lake'].str.split('(').str[0].str.strip() |
|
|
df['Sector'] = df['Sector'].astype(str).apply(lambda x: 'Designated Zone' if 'baignade' in x else x) |
|
|
df['Reason for ban'] = df['Reason for ban'].astype(str).apply(lambda x: 'nan' if '* Les informations ' in x else x) |
|
|
df['Reason for ban'] = df['Reason for ban'].replace({'nan': 'No ban'}) |
|
|
df['Swimming allowed'] = df['Swimming allowed'].astype('string') |
|
|
df.loc[df['Traffic lights'].str.contains('greng'), 'Swimming allowed'] = 'Yes' |
|
|
df.loc[df['Traffic lights'].str.contains('roud'), 'Swimming allowed'] = 'No' |
|
|
df = df.fillna('N/A') |
|
|
|
|
|
df[['X', 'Y']] = df['URL coordinates'].apply(extract_coordinates) |
|
|
df[['X', 'Y']] = df[['X', 'Y']].apply(pd.to_numeric, errors='coerce') |
|
|
df[['lat', 'long']] = df.apply(lambda row: web_mercator_to_wgs84(row['X'], row['Y']), axis=1, result_type='expand') |
|
|
df.drop(columns=['Traffic lights', 'URL coordinates', 'X', 'Y'], inplace=True) |
|
|
|
|
|
df.to_csv(file_name, index=False) |
|
|
return df |
|
|
|
|
|
def load_data(force_refresh=False): |
|
|
if force_refresh or (not os.path.exists(file_name)) or ((time.time() - os.path.getmtime(file_name)) > 3600): |
|
|
return file_download() |
|
|
return pd.read_csv(file_name) |
|
|
|
|
|
def create_map(force_refresh=False): |
|
|
df = load_data(force_refresh) |
|
|
|
|
|
|
|
|
if df.empty: |
|
|
m = folium.Map(location=[49.8153, 6.1296], zoom_start=9) |
|
|
else: |
|
|
m = folium.Map(location=[df['lat'].mean(), df['long'].mean()], zoom_start=9) |
|
|
|
|
|
|
|
|
for _, row in df.iterrows(): |
|
|
color = 'green' if row['Swimming allowed'] == 'Yes' else \ |
|
|
'red' if row['Swimming allowed'] == 'No' else 'gray' |
|
|
|
|
|
popup_text = f""" |
|
|
<b>Lake:</b> {row['Lake']}<br> |
|
|
<b>Sector:</b> {row['Sector']}<br> |
|
|
<b>Latitude:</b> {row['lat']:.6f}<br> |
|
|
<b>Longitude:</b> {row['long']:.6f}<br> |
|
|
<b>Water Quality:</b> {row['Water Quality']}<br> |
|
|
<b>Swimming allowed:</b> {row['Swimming allowed']}<br> |
|
|
<b>Reason for ban:</b> {row['Reason for ban']} |
|
|
""" |
|
|
|
|
|
folium.CircleMarker( |
|
|
location=[row['lat'], row['long']], |
|
|
radius=8, |
|
|
color=color, |
|
|
fill=True, |
|
|
fill_color=color, |
|
|
fill_opacity=0.7, |
|
|
popup=folium.Popup(popup_text, max_width=300) |
|
|
).add_to(m) |
|
|
|
|
|
|
|
|
folium.TileLayer('openstreetmap').add_to(m) |
|
|
|
|
|
|
|
|
m.get_root().html.add_child(folium.Element(""" |
|
|
<style> |
|
|
.leaflet-control-attribution { |
|
|
display: none !important; |
|
|
} |
|
|
</style> |
|
|
""")) |
|
|
|
|
|
|
|
|
return m._repr_html_() |
|
|
|
|
|
|
|
|
with gr.Blocks(title="LuxSplash") as app: |
|
|
gr.Markdown("# πββοΈ LuxSplash") |
|
|
gr.Markdown("[Freedom Luxembourg](https://freeletz.lu)") |
|
|
|
|
|
with gr.Row(): |
|
|
refresh_btn = gr.Button("Refresh Data", variant="primary") |
|
|
|
|
|
map_html = gr.HTML() |
|
|
|
|
|
|
|
|
app.load(fn=lambda: create_map(False), inputs=None, outputs=map_html) |
|
|
|
|
|
|
|
|
refresh_btn.click( |
|
|
fn=lambda: create_map(True), |
|
|
inputs=None, |
|
|
outputs=map_html |
|
|
) |
|
|
gr.Markdown( |
|
|
"Data sourced from the official Luxembourg government website, the only authoritative source for bathing site information: " |
|
|
"[eau.gouvernement.lu](https://eau.gouvernement.lu/fr/domaines-activite/eauxbaignade/sites-de-baignade.html )" |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |
|
|
|