Spaces:
Runtime error
Runtime error
| import requests | |
| import json | |
| def generate_lang_code_file(): | |
| # URL of the Flores README containing the language codes | |
| url = 'https://raw.githubusercontent.com/openlanguagedata/flores/main/README.md' | |
| # Fetch the page content | |
| response = requests.get(url) | |
| content = response.text | |
| # Extract the table content by parsing the plain text | |
| lines = content.split('\n') | |
| # Initialize a flag to start capturing data | |
| languages = [] | |
| start_parsing = False | |
| for line in lines: | |
| if "Language coverage" in line: | |
| start_parsing = True | |
| continue | |
| if start_parsing: | |
| if line.strip() == "": | |
| continue | |
| if '|' not in line: | |
| continue | |
| parts = line.split('|') | |
| if len(parts) >= 2: | |
| code = parts[1].strip()[1:-1] | |
| identifier = parts[2].strip()[1:-1] | |
| name = parts[3].strip() | |
| languages.append({"code": code, "identifier": identifier, "name": name}) | |
| # Omit the labels and divider | |
| languages = languages[2:] | |
| # Convert to JSON | |
| json_data = json.dumps(languages, indent=4) | |
| # Save the JSON data to a file | |
| file_path = 'flores_language_codes.json' | |
| with open(file_path, 'w') as file: | |
| file.write(json_data) | |
| print(f"JSON data saved to {file_path}") | |
| # generate_lang_code_file() | |
| def get_language_code(language_name, | |
| json_file_path='flores_language_codes.json'): | |
| # Load the JSON data from the file | |
| with open(json_file_path, 'r') as file: | |
| languages = json.load(file) | |
| # Search for the language code by language name | |
| for language in languages: | |
| if language['name'].lower() == language_name.lower(): | |
| return language['code'] | |
| return None # Return None if the language name is not found | |
| def get_language_list( | |
| json_file_path='flores_language_codes.json'): | |
| # Load the JSON data from the file | |
| with open(json_file_path, 'r') as file: | |
| languages = json.load(file) | |
| # extract language name | |
| language_names = [language['name'] for language in languages] | |
| return language_names |