Spaces:
Runtime error
Runtime error
ncoop57
commited on
Commit
·
a2dd03e
1
Parent(s):
e0be252
Add additional checks
Browse files- app.py +42 -5
- requirements.txt +1 -1
app.py
CHANGED
|
@@ -42,9 +42,12 @@ for name in dataset_names:
|
|
| 42 |
)
|
| 43 |
dataset_data[name] = {
|
| 44 |
"ds": ds,
|
| 45 |
-
"
|
| 46 |
"check_char_repetition_criteria": np.array(ds["check_char_repetition_criteria"]),
|
| 47 |
"check_flagged_words_criteria": np.array(ds["check_flagged_words_criteria"]),
|
|
|
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
|
| 50 |
def plt_plot(criteria, dataset, threshold):
|
|
@@ -83,6 +86,18 @@ def check_filtered(criteria, dataset, threshold):
|
|
| 83 |
with gr.Blocks() as demo:
|
| 84 |
dataset = gr.Radio(dataset_names, label="Dataset", value="arXiv")
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
with gr.Tab("Character Repetition Criteria"):
|
| 87 |
# plot some random data
|
| 88 |
plot = gr.Plot()
|
|
@@ -95,18 +110,18 @@ with gr.Blocks() as demo:
|
|
| 95 |
check_fn = partial(check_filtered, "check_char_repetition_criteria")
|
| 96 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 97 |
|
| 98 |
-
with gr.Tab("Word
|
| 99 |
plot = gr.Plot()
|
| 100 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 101 |
calculate = gr.Button("Calculate")
|
| 102 |
check = gr.Button("Check Filtered Data")
|
| 103 |
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
| 104 |
-
plot_fn = partial(plt_plot, "
|
| 105 |
calculate.click(plot_fn, [dataset, threshold], plot)
|
| 106 |
-
check_fn = partial(check_filtered, "
|
| 107 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 108 |
|
| 109 |
-
with gr.Tab("Flagged Word Criteria")
|
| 110 |
plot = gr.Plot()
|
| 111 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 112 |
calculate = gr.Button("Calculate")
|
|
@@ -117,5 +132,27 @@ with gr.Blocks() as demo:
|
|
| 117 |
check_fn = partial(check_filtered, "check_flagged_words_criteria")
|
| 118 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
if __name__ == "__main__":
|
| 121 |
demo.launch()
|
|
|
|
| 42 |
)
|
| 43 |
dataset_data[name] = {
|
| 44 |
"ds": ds,
|
| 45 |
+
"check_word_number_criteria": np.array(ds["check_word_number_criteria"]),
|
| 46 |
"check_char_repetition_criteria": np.array(ds["check_char_repetition_criteria"]),
|
| 47 |
"check_flagged_words_criteria": np.array(ds["check_flagged_words_criteria"]),
|
| 48 |
+
"check_stop_word_ratio_criteria": np.array(ds["check_stop_word_ratio_criteria"]),
|
| 49 |
+
"check_perplexity_criteria": np.array(ds["check_perplexity_criteria"]),
|
| 50 |
+
"check_language_criteria": np.array(ds["check_language_criteria"]),
|
| 51 |
}
|
| 52 |
|
| 53 |
def plt_plot(criteria, dataset, threshold):
|
|
|
|
| 86 |
with gr.Blocks() as demo:
|
| 87 |
dataset = gr.Radio(dataset_names, label="Dataset", value="arXiv")
|
| 88 |
|
| 89 |
+
with gr.Tab("Number of Words Criteria"):
|
| 90 |
+
# plot some random data
|
| 91 |
+
plot = gr.Plot()
|
| 92 |
+
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 93 |
+
calculate = gr.Button("Calculate")
|
| 94 |
+
check = gr.Button("Check Filtered Data")
|
| 95 |
+
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
| 96 |
+
plot_fn = partial(plt_plot, "check_word_number_criteria")
|
| 97 |
+
calculate.click(plot_fn, [dataset, threshold], plot)
|
| 98 |
+
check_fn = partial(check_filtered, "check_word_number_criteria")
|
| 99 |
+
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 100 |
+
|
| 101 |
with gr.Tab("Character Repetition Criteria"):
|
| 102 |
# plot some random data
|
| 103 |
plot = gr.Plot()
|
|
|
|
| 110 |
check_fn = partial(check_filtered, "check_char_repetition_criteria")
|
| 111 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 112 |
|
| 113 |
+
with gr.Tab("Stop Word Ratio Criteria"):
|
| 114 |
plot = gr.Plot()
|
| 115 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 116 |
calculate = gr.Button("Calculate")
|
| 117 |
check = gr.Button("Check Filtered Data")
|
| 118 |
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
| 119 |
+
plot_fn = partial(plt_plot, "check_stop_word_ratio_criteria")
|
| 120 |
calculate.click(plot_fn, [dataset, threshold], plot)
|
| 121 |
+
check_fn = partial(check_filtered, "check_stop_word_ratio_criteria")
|
| 122 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 123 |
|
| 124 |
+
with gr.Tab("Flagged Word Criteria"):
|
| 125 |
plot = gr.Plot()
|
| 126 |
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 127 |
calculate = gr.Button("Calculate")
|
|
|
|
| 132 |
check_fn = partial(check_filtered, "check_flagged_words_criteria")
|
| 133 |
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 134 |
|
| 135 |
+
with gr.Tab("Perplexity Criteria"):
|
| 136 |
+
plot = gr.Plot()
|
| 137 |
+
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 138 |
+
calculate = gr.Button("Calculate")
|
| 139 |
+
check = gr.Button("Check Filtered Data")
|
| 140 |
+
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
| 141 |
+
plot_fn = partial(plt_plot, "check_perplexity_criteria")
|
| 142 |
+
calculate.click(plot_fn, [dataset, threshold], plot)
|
| 143 |
+
check_fn = partial(check_filtered, "check_perplexity_criteria")
|
| 144 |
+
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 145 |
+
|
| 146 |
+
with gr.Tab("Language Detection Criteria"):
|
| 147 |
+
plot = gr.Plot()
|
| 148 |
+
threshold = gr.Slider(minimum=0, maximum=1, label="Threshold")
|
| 149 |
+
calculate = gr.Button("Calculate")
|
| 150 |
+
check = gr.Button("Check Filtered Data")
|
| 151 |
+
filtered_data = gr.Textbox(lines=5, label="Filtered Data")
|
| 152 |
+
plot_fn = partial(plt_plot, "check_language_criteria")
|
| 153 |
+
calculate.click(plot_fn, [dataset, threshold], plot)
|
| 154 |
+
check_fn = partial(check_filtered, "check_language_criteria")
|
| 155 |
+
check.click(check_fn, [dataset, threshold], filtered_data)
|
| 156 |
+
|
| 157 |
if __name__ == "__main__":
|
| 158 |
demo.launch()
|
requirements.txt
CHANGED
|
@@ -1,2 +1,2 @@
|
|
| 1 |
scrubadub
|
| 2 |
-
squeakily
|
|
|
|
| 1 |
scrubadub
|
| 2 |
+
git+https://github.com/CarperAI/squeakily.git
|