Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -359,155 +359,71 @@ demo = gr.Interface(
|
|
| 359 |
|
| 360 |
demo.launch()
|
| 361 |
'''
|
|
|
|
| 362 |
import gradio as gr
|
| 363 |
-
from transformers import TFBertForSequenceClassification, BertTokenizer
|
| 364 |
-
import tensorflow as tf
|
| 365 |
import praw
|
| 366 |
-
import
|
| 367 |
-
import pytesseract
|
| 368 |
-
from PIL import Image
|
| 369 |
-
import numpy as np
|
| 370 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 371 |
-
import torch
|
| 372 |
-
from scipy.special import softmax
|
| 373 |
import plotly.graph_objs as go
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
-
#
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
model = TFBertForSequenceClassification.from_pretrained("shrish191/sentiment-bert")
|
| 380 |
-
tokenizer = BertTokenizer.from_pretrained("shrish191/sentiment-bert")
|
| 381 |
-
|
| 382 |
-
LABELS = {
|
| 383 |
-
0: "Neutral",
|
| 384 |
-
1: "Positive",
|
| 385 |
-
2: "Negative"
|
| 386 |
-
}
|
| 387 |
-
|
| 388 |
-
# Load fallback model
|
| 389 |
-
fallback_model_name = "cardiffnlp/twitter-roberta-base-sentiment"
|
| 390 |
-
fallback_tokenizer = AutoTokenizer.from_pretrained(fallback_model_name)
|
| 391 |
-
fallback_model = AutoModelForSequenceClassification.from_pretrained(fallback_model_name)
|
| 392 |
-
|
| 393 |
-
# Reddit API setup
|
| 394 |
-
reddit = praw.Reddit(
|
| 395 |
-
client_id=os.getenv("REDDIT_CLIENT_ID"),
|
| 396 |
-
client_secret=os.getenv("REDDIT_CLIENT_SECRET"),
|
| 397 |
-
user_agent=os.getenv("REDDIT_USER_AGENT", "sentiment-classifier-ui")
|
| 398 |
-
)
|
| 399 |
-
|
| 400 |
-
def fetch_reddit_text(reddit_url):
|
| 401 |
-
try:
|
| 402 |
-
submission = reddit.submission(url=reddit_url)
|
| 403 |
-
return f"{submission.title}\n\n{submission.selftext}"
|
| 404 |
-
except Exception as e:
|
| 405 |
-
return f"Error fetching Reddit post: {str(e)}"
|
| 406 |
-
|
| 407 |
-
def fetch_subreddit_texts(subreddit_name, limit=15):
|
| 408 |
-
texts = []
|
| 409 |
-
try:
|
| 410 |
-
subreddit = reddit.subreddit(subreddit_name)
|
| 411 |
-
for submission in subreddit.hot(limit=limit):
|
| 412 |
-
combined = f"{submission.title} {submission.selftext}".strip()
|
| 413 |
-
if combined:
|
| 414 |
-
texts.append(combined)
|
| 415 |
-
return texts
|
| 416 |
-
except Exception as e:
|
| 417 |
-
return [f"Error fetching subreddit: {str(e)}"]
|
| 418 |
|
| 419 |
-
def
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
scores = softmax(output.logits.numpy()[0])
|
| 424 |
labels = ['Negative', 'Neutral', 'Positive']
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
counts = {"Positive": 0, "Neutral": 0, "Negative": 0}
|
| 429 |
-
for text in texts:
|
| 430 |
-
try:
|
| 431 |
-
inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
|
| 432 |
-
outputs = model(inputs)
|
| 433 |
-
probs = tf.nn.softmax(outputs.logits, axis=1)
|
| 434 |
-
confidence = float(tf.reduce_max(probs).numpy())
|
| 435 |
-
pred_label = tf.argmax(probs, axis=1).numpy()[0]
|
| 436 |
-
|
| 437 |
-
if confidence < 0.5:
|
| 438 |
-
label = fallback_classifier(text).split(":")[-1].strip()
|
| 439 |
-
else:
|
| 440 |
-
label = LABELS[pred_label]
|
| 441 |
-
|
| 442 |
-
counts[label] += 1
|
| 443 |
-
except:
|
| 444 |
-
continue
|
| 445 |
-
return counts
|
| 446 |
-
|
| 447 |
-
def sentiment_pie_chart(counts):
|
| 448 |
-
labels = list(counts.keys())
|
| 449 |
-
values = list(counts.values())
|
| 450 |
-
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
|
| 451 |
-
fig.update_layout(title_text="Sentiment Distribution in Subreddit")
|
| 452 |
-
return fig
|
| 453 |
-
|
| 454 |
-
def classify_sentiment(text_input, reddit_url, image, subreddit_name):
|
| 455 |
-
# Subreddit Dashboard has priority
|
| 456 |
-
if subreddit_name.strip():
|
| 457 |
-
texts = fetch_subreddit_texts(subreddit_name)
|
| 458 |
-
if "Error" in texts[0]:
|
| 459 |
-
return texts[0]
|
| 460 |
-
counts = classify_multiple_sentiments(texts)
|
| 461 |
-
return sentiment_pie_chart(counts)
|
| 462 |
-
|
| 463 |
-
if reddit_url.strip():
|
| 464 |
-
text = fetch_reddit_text(reddit_url)
|
| 465 |
-
elif image is not None:
|
| 466 |
-
try:
|
| 467 |
-
img_array = np.array(image)
|
| 468 |
-
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
| 469 |
-
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
|
| 470 |
-
text = pytesseract.image_to_string(thresh)
|
| 471 |
-
except Exception as e:
|
| 472 |
-
return f"[!] OCR failed: {str(e)}"
|
| 473 |
-
elif text_input.strip():
|
| 474 |
-
text = text_input
|
| 475 |
-
else:
|
| 476 |
-
return "[!] Please enter some text, upload an image, or provide a Reddit URL."
|
| 477 |
-
|
| 478 |
-
if text.lower().startswith("error") or "Unable to extract" in text:
|
| 479 |
-
return f"[!] {text}"
|
| 480 |
-
|
| 481 |
-
try:
|
| 482 |
-
inputs = tokenizer(text, return_tensors="tf", truncation=True, padding=True)
|
| 483 |
-
outputs = model(inputs)
|
| 484 |
-
probs = tf.nn.softmax(outputs.logits, axis=1)
|
| 485 |
-
confidence = float(tf.reduce_max(probs).numpy())
|
| 486 |
-
pred_label = tf.argmax(probs, axis=1).numpy()[0]
|
| 487 |
-
|
| 488 |
-
if confidence < 0.5:
|
| 489 |
-
return fallback_classifier(text)
|
| 490 |
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
demo = gr.Interface(
|
| 497 |
-
fn=classify_sentiment,
|
| 498 |
-
inputs=[
|
| 499 |
-
gr.Textbox(label="Text Input (can be tweet or any content)", placeholder="Paste tweet or type any content here...", lines=4),
|
| 500 |
-
gr.Textbox(label="Reddit Post URL", placeholder="Paste a Reddit post URL (optional)", lines=1),
|
| 501 |
-
gr.Image(label="Upload Image (optional)", type="pil"),
|
| 502 |
-
gr.Textbox(label="Subreddit Name", placeholder="e.g. AskReddit (optional)", lines=1),
|
| 503 |
-
],
|
| 504 |
-
outputs=gr.outputs.Component(label="Result"),
|
| 505 |
-
title="Sentiment Analyzer with Dashboard",
|
| 506 |
-
description="\ud83d\udd0d Paste any text, Reddit post URL, upload an image, or enter a subreddit name to analyze sentiment."
|
| 507 |
)
|
| 508 |
|
| 509 |
-
|
| 510 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
|
| 513 |
|
|
|
|
| 359 |
|
| 360 |
demo.launch()
|
| 361 |
'''
|
| 362 |
+
|
| 363 |
import gradio as gr
|
|
|
|
|
|
|
| 364 |
import praw
|
| 365 |
+
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
import plotly.graph_objs as go
|
| 367 |
+
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
|
| 368 |
+
from tensorflow.nn import softmax
|
| 369 |
+
import numpy as np
|
| 370 |
|
| 371 |
+
# Load model and tokenizer
|
| 372 |
+
model_name = "shrish191/sentiment-bert"
|
| 373 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 374 |
+
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
+
def classify_sentiment(text):
|
| 377 |
+
inputs = tokenizer(text, return_tensors="tf", padding=True, truncation=True)
|
| 378 |
+
outputs = model(inputs)
|
| 379 |
+
scores = softmax(outputs.logits, axis=1).numpy()[0]
|
|
|
|
| 380 |
labels = ['Negative', 'Neutral', 'Positive']
|
| 381 |
+
sentiment = labels[np.argmax(scores)]
|
| 382 |
+
confidence = round(float(np.max(scores)) * 100, 2)
|
| 383 |
+
return sentiment, confidence
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 384 |
|
| 385 |
+
# Reddit sentiment dashboard
|
| 386 |
+
reddit = praw.Reddit(
|
| 387 |
+
client_id="YOUR_CLIENT_ID",
|
| 388 |
+
client_secret="YOUR_CLIENT_SECRET",
|
| 389 |
+
user_agent="YOUR_USER_AGENT"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
)
|
| 391 |
|
| 392 |
+
def analyze_subreddit(subreddit_name, num_posts):
|
| 393 |
+
posts = []
|
| 394 |
+
for submission in reddit.subreddit(subreddit_name).hot(limit=num_posts):
|
| 395 |
+
if not submission.stickied:
|
| 396 |
+
sentiment, confidence = classify_sentiment(submission.title)
|
| 397 |
+
posts.append({"title": submission.title, "sentiment": sentiment, "confidence": confidence})
|
| 398 |
+
|
| 399 |
+
df = pd.DataFrame(posts)
|
| 400 |
+
sentiment_counts = df['sentiment'].value_counts().reindex(['Positive', 'Neutral', 'Negative'], fill_value=0)
|
| 401 |
+
total = sentiment_counts.sum()
|
| 402 |
+
sentiment_percentages = (sentiment_counts / total * 100).round(2)
|
| 403 |
+
|
| 404 |
+
fig = go.Figure(data=[
|
| 405 |
+
go.Pie(labels=sentiment_percentages.index, values=sentiment_percentages.values, hole=.4)
|
| 406 |
+
])
|
| 407 |
+
fig.update_layout(title="Sentiment Distribution in r/{} ({} posts)".format(subreddit_name, num_posts))
|
| 408 |
+
|
| 409 |
+
return df, fig
|
| 410 |
+
|
| 411 |
+
with gr.Blocks() as demo:
|
| 412 |
+
gr.Markdown("## Reddit Subreddit Sentiment Dashboard")
|
| 413 |
+
subreddit_input = gr.Textbox(label="Enter Subreddit (without r/)", placeholder="e.g., technology")
|
| 414 |
+
num_posts_input = gr.Slider(10, 100, step=10, value=30, label="Number of Posts to Analyze")
|
| 415 |
+
analyze_button = gr.Button("Analyze")
|
| 416 |
+
sentiment_table = gr.Dataframe(label="Post Sentiments")
|
| 417 |
+
sentiment_chart = gr.Plot(label="Sentiment Pie Chart")
|
| 418 |
+
|
| 419 |
+
analyze_button.click(
|
| 420 |
+
analyze_subreddit,
|
| 421 |
+
inputs=[subreddit_input, num_posts_input],
|
| 422 |
+
outputs=[sentiment_table, sentiment_chart]
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
+
if __name__ == "__main__":
|
| 426 |
+
demo.launch()
|
| 427 |
|
| 428 |
|
| 429 |
|