Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import plotly.express as px | |
| import pandas as pd | |
| import random | |
| import logging | |
| from sentence_transformers import SentenceTransformer, util | |
| from datasets import load_dataset | |
| def load_model(name): | |
| return SentenceTransformer(name) | |
| def load_words_dataset(): | |
| dataset = load_dataset("marksverdhei/wordnet-definitions-en-2021", split="train") | |
| return dataset["Word"] | |
| def choose_secret_word(): | |
| all_words = load_words_dataset() | |
| return random.choice(all_words) | |
| all_words = load_words_dataset() | |
| model_names = [ | |
| 'sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', | |
| 'BAAI/bge-small-en-v1.5' | |
| ] | |
| models = { | |
| name: load_model(name) for name in model_names | |
| } | |
| secret_word =choose_secret_word().lower().strip() | |
| secret_embedding = [models[name].encode(secret_word) for name in model_names] | |
| print("Secret word ", secret_word) | |
| if 'words' not in st.session_state: | |
| st.session_state['words'] = [] | |
| st.write('Try to guess a secret word by semantic similarity') | |
| word = st.text_input("Input a word") | |
| used_words = [w[0] for w in st.session_state['words']] | |
| if st.button("Guess") or word: | |
| if word not in used_words: | |
| word_embedding = [models[name].encode(word.lower().strip()) for name in model_names] | |
| similarities = [util.pytorch_cos_sim(secret_embedding[i], word_embedding[i]).cpu().numpy()[0][0] for i, name in enumerate(model_names)] | |
| st.session_state['words'].append([str(word)] + similarities) | |
| words_df = pd.DataFrame( | |
| st.session_state['words'], | |
| columns=["word"] + ["Similarity for " + name for name in model_names] | |
| ).sort_values(by=["Similarity for sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"], ascending=False) | |
| st.dataframe(words_df, use_container_width=True) | |