import pandas as pd import numpy as np import gradio as gr from datetime import datetime, timedelta import matplotlib.pyplot as plt import seaborn as sns from transformers import pipeline from collections import Counter import re from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory from wordcloud import WordCloud import warnings import os warnings.filterwarnings('ignore') # Set style untuk matplotlib plt.style.use('default') sns.set_palette("husl") # 1. Membaca file CSV def load_data(): try: df = pd.read_csv('scraped_articles.csv') # Konversi kolom date ke datetime df['date'] = pd.to_datetime(df['date'], errors='coerce') # Hapus row dengan date invalid df = df.dropna(subset=['date']) return df except FileNotFoundError: print("File scraped_articles.csv tidak ditemukan") # Return dataframe kosong dengan struktur yang benar return pd.DataFrame(columns=['id', 'source', 'keyword', 'date', 'title', 'content', 'author', 'thumbnail', 'url', 'hash', 'created_at']) except Exception as e: print(f"Error loading data: {e}") return pd.DataFrame(columns=['id', 'source', 'keyword', 'date', 'title', 'content', 'author', 'thumbnail', 'url', 'hash', 'created_at']) # 2. Prediksi Sentimen dengan Model yang Tepat untuk Sentimen Bahasa Indonesia class SentimentAnalyzer: def __init__(self): try: # Gunakan model yang khusus untuk sentiment analysis Bahasa Indonesia print("Memuat model sentiment analysis...") model_name = "w11wo/indonesian-roberta-base-sentiment-classifier" self.sentiment_pipeline = pipeline( "sentiment-analysis", model=model_name, tokenizer=model_name, max_length=512, truncation=True ) print("Model sentiment analysis berhasil dimuat") except Exception as e: print(f"Error loading sentiment model: {e}") print("Menggunakan model fallback...") try: # Fallback model self.sentiment_pipeline = pipeline( "sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment", max_length=512, truncation=True ) except Exception as e2: print(f"Error loading fallback model: {e2}") self.sentiment_pipeline = None def predict_sentiment(self, text): if self.sentiment_pipeline is None: return "NETRAL", 0.5 if pd.isna(text) or text == "" or str(text).strip() == "": return "NETRAL", 0.5 try: # Batasi panjang teks untuk menghindari error text = str(text).strip() if len(text) > 1000: text = text[:1000] result = self.sentiment_pipeline(text)[0] # Map label berdasarkan model yang digunakan label = result['label'].upper() confidence = result['score'] # Normalisasi label if 'POSITIF' in label or 'POSITIVE' in label or label in ['5', '4', 'LABEL_2']: return "POSITIF", confidence elif 'NEGATIF' in label or 'NEGATIVE' in label or label in ['1', '2', 'LABEL_0']: return "NEGATIF", confidence else: return "NETRAL", confidence except Exception as e: print(f"Error predicting sentiment: {e}") return "NETRAL", 0.5 # 3. Ekstraksi Keyword dengan Stopword Indonesia class KeywordExtractor: def __init__(self): try: self.factory = StopWordRemoverFactory() self.stopword = self.factory.create_stop_word_remover() # Tambahan stopwords manual self.additional_stopwords = {'yang', 'dan', 'di', 'dari', 'dalam', 'untuk', 'pada', 'ke', 'dengan', 'ini', 'itu', 'saya', 'kamu', 'kami', 'kita', 'mereka', 'adalah', 'ada', 'akan', 'atau', 'juga', 'tidak', 'bukan', 'sudah', 'belum', 'saja', 'bisa', 'dapat', 'harus', 'perlu', 'ingin', 'lagi', 'lalu', 'kemudian', 'saat', 'ketika', 'karena', 'sebab', 'jika', 'apabila', 'meski', 'walaupun', 'agar', 'supaya', 'sehingga', 'maka', 'oleh', 'kepada', 'terhadap', 'atas', 'bawah', 'depan', 'belakang', 'samping', 'antara', 'demi', 'hingga', 'sampai', 'sejak', 'selama', 'tentang', 'mengenai', 'menurut', 'oleh', 'karena', 'itu', 'ini', 'situ', 'sana', 'sini', 'mana', 'apa', 'siapa', 'kapan', 'dimana', 'bagaimana', 'mengapa', 'berapa'} except Exception as e: print(f"Error initializing keyword extractor: {e}") self.stopword = None def clean_text(self, text): if pd.isna(text): return "" text = str(text).lower() # Hapus karakter khusus, angka, dan tanda baca text = re.sub(r'[^a-zA-Z\s]', ' ', text) # Hapus extra spaces text = re.sub(r'\s+', ' ', text).strip() return text def remove_stopwords(self, text): if self.stopword is None: return self.clean_text(text) cleaned_text = self.clean_text(text) processed_text = self.stopword.remove(cleaned_text) # Hapus additional stopwords words = processed_text.split() filtered_words = [word for word in words if word not in self.additional_stopwords and len(word) > 2] return ' '.join(filtered_words) def extract_keywords(self, texts, top_n=20): all_words = [] for text in texts: if pd.isna(text): continue processed_text = self.remove_stopwords(text) words = processed_text.split() all_words.extend([word for word in words if len(word) > 2]) word_freq = Counter(all_words) return word_freq.most_common(top_n) # Fungsi untuk memeriksa apakah file hasil analisis sudah ada dan valid def check_existing_analysis(sources): """Memeriksa apakah file hasil analisis sudah ada dan valid""" try: # File utama yang harus ada main_file = 'articles_with_sentiment.csv' if not os.path.exists(main_file): print("File analisis tidak ditemukan, perlu analisis ulang") return False, None, None, None, None # Baca file utama existing_df = pd.read_csv(main_file) if existing_df.empty: print("File analisis kosong, perlu analisis ulang") return False, None, None, None, None # Konversi kolom date existing_df['date'] = pd.to_datetime(existing_df['date'], errors='coerce') # Dapatkan range tanggal dari data min_date = existing_df['date'].min() max_date = existing_df['date'].max() # Filter berdasarkan sumber jika bukan "Semua" if sources != "Semua": filtered_df = existing_df[existing_df['source'] == sources].copy() else: filtered_df = existing_df.copy() if filtered_df.empty: print(f"Tidak ada data untuk sumber {sources}, perlu analisis ulang") return False, None, None, None, None # Periksa file-file pendukung support_files = [ 'sentiment_trend_daily.csv', 'sentiment_by_source.csv', 'sentiment_distribution.csv', 'extracted_keywords.csv' ] for file in support_files: if not os.path.exists(file): print(f"File pendukung {file} tidak ditemukan, perlu analisis ulang") return False, None, None, None, None # Baca keywords keywords_df = pd.read_csv('extracted_keywords.csv') keywords = list(zip(keywords_df['keyword'], keywords_df['frequency'])) print("File analisis ditemukan dan valid, menggunakan data yang ada") return True, filtered_df, keywords, min_date, max_date except Exception as e: print(f"Error checking existing analysis: {e}") return False, None, None, None, None # Fungsi untuk memfilter data berdasarkan tanggal def filter_by_date(df, start_date, end_date): """ Memfilter DataFrame berdasarkan rentang tanggal (kolom 'date'). Kompatibel dengan input DateTime dari Gradio. """ try: if df.empty or 'date' not in df.columns: print("⚠️ Data kosong atau kolom 'date' tidak ditemukan.") return pd.DataFrame() # Pastikan kolom date valid df['date'] = pd.to_datetime(df['date'], errors='coerce') df = df.dropna(subset=['date']) # Validasi input tanggal def parse_dt(value): if value is None or value == "": return None if isinstance(value, (datetime, pd.Timestamp)): return pd.Timestamp(value) try: return pd.to_datetime(str(value)) except: return None start_dt = parse_dt(start_date) end_dt = parse_dt(end_date) if start_dt is None: start_dt = df['date'].min() if end_dt is None: end_dt = df['date'].max() # Jika range terbalik if start_dt > end_dt: start_dt, end_dt = end_dt, start_dt print(f"📅 Memfilter dari {start_dt:%Y-%m-%d} hingga {end_dt:%Y-%m-%d}") mask = (df['date'] >= start_dt) & (df['date'] <= end_dt) filtered_df = df.loc[mask].copy() print(f"✅ {len(filtered_df)} baris setelah filter tanggal.") return filtered_df except Exception as e: print("❌ ERROR filter_by_date:", e) import traceback traceback.print_exc() return df # Fungsi untuk melakukan analisis ulang pada data yang difilter def reanalyze_filtered_data(filtered_df): """Melakukan analisis ulang pada data yang sudah difilter""" try: # Inisialisasi analyzer #sentiment_analyzer = SentimentAnalyzer() keyword_extractor = KeywordExtractor() # Prediksi sentimen ulang #print("Memproses prediksi sentimen ulang...") #sentiments = [] #confidences = [] #for idx, content in enumerate(filtered_df['content']): # if idx % 10 == 0: # Progress indicator # print(f"Memproses artikel {idx+1}/{len(filtered_df)}") # sentiment, confidence = sentiment_analyzer.predict_sentiment(content) # sentiments.append(sentiment) # confidences.append(confidence) #filtered_df['sentiment'] = sentiments #filtered_df['confidence'] = confidences # Ekstraksi keyword ulang print("Memproses ekstraksi keyword ulang...") keywords = keyword_extractor.extract_keywords(filtered_df['content']) return filtered_df, keywords except Exception as e: print(f"Error in reanalysis: {e}") return filtered_df, [] # Fungsi untuk menyimpan hasil analisis ke CSV def save_analysis_results(filtered_df, keywords): """Menyimpan hasil analisis ke berbagai file CSV""" try: # 1. Simpan data artikel dengan sentimen articles_with_sentiment = filtered_df[['id', 'source', 'keyword', 'date', 'title', 'content', 'author', 'url', 'sentiment', 'confidence']].copy() articles_with_sentiment['date'] = articles_with_sentiment['date'].dt.strftime('%Y-%m-%d') articles_with_sentiment.to_csv('articles_with_sentiment.csv', index=False, encoding='utf-8') print("✓ Disimpan: articles_with_sentiment.csv") # 2. Simpan rekap sentimen per tanggal daily_sentiment = filtered_df.groupby([filtered_df['date'].dt.date, 'sentiment']).size().unstack(fill_value=0) daily_sentiment.reset_index(inplace=True) daily_sentiment.rename(columns={'date': 'tanggal'}, inplace=True) daily_sentiment.to_csv('sentiment_trend_daily.csv', index=False, encoding='utf-8') print("✓ Disimpan: sentiment_trend_daily.csv") # 3. Simpan rekap sentimen per sumber source_sentiment = filtered_df.groupby(['source', 'sentiment']).size().unstack(fill_value=0) source_sentiment.reset_index(inplace=True) source_sentiment.to_csv('sentiment_by_source.csv', index=False, encoding='utf-8') print("✓ Disimpan: sentiment_by_source.csv") # 4. Simpan distribusi sentimen keseluruhan sentiment_distribution = filtered_df['sentiment'].value_counts().reset_index() sentiment_distribution.columns = ['sentiment', 'count'] sentiment_distribution['percentage'] = (sentiment_distribution['count'] / len(filtered_df) * 100).round(2) sentiment_distribution.to_csv('sentiment_distribution.csv', index=False, encoding='utf-8') print("✓ Disimpan: sentiment_distribution.csv") # 5. Simpan keywords dan frekuensinya keywords_df = pd.DataFrame(keywords, columns=['keyword', 'frequency']) keywords_df.to_csv('extracted_keywords.csv', index=False, encoding='utf-8') print("✓ Disimpan: extracted_keywords.csv") # 6. Simpan confidence statistics confidence_stats = filtered_df.groupby('sentiment')['confidence'].agg(['mean', 'std', 'min', 'max']).reset_index() confidence_stats.columns = ['sentiment', 'confidence_mean', 'confidence_std', 'confidence_min', 'confidence_max'] confidence_stats.to_csv('confidence_statistics.csv', index=False, encoding='utf-8') print("✓ Disimpan: confidence_statistics.csv") return True except Exception as e: print(f"Error saving analysis results: {e}") return False # Fungsi untuk visualisasi def create_sentiment_time_plot(filtered_df): fig, ax = plt.subplots(figsize=(12, 6)) try: # Group by date dan sentiment filtered_df['date_only'] = filtered_df['date'].dt.date daily_sentiment = filtered_df.groupby(['date_only', 'sentiment']).size().unstack(fill_value=0) # Pastikan kolom POSITIF dan NEGATIF ada for sentiment in ['POSITIF', 'NEGATIF', 'NETRAL']: if sentiment not in daily_sentiment.columns: daily_sentiment[sentiment] = 0 # Urutkan berdasarkan tanggal daily_sentiment = daily_sentiment.sort_index() # Buat stacked bar chart if not daily_sentiment.empty: bottom = None colors = {'POSITIF': '#2ecc71', 'NEGATIF': '#e74c3c', 'NETRAL': '#95a5a6'} for sentiment, color in colors.items(): if bottom is None: ax.bar(daily_sentiment.index, daily_sentiment[sentiment], label=sentiment, alpha=0.8, color=color) bottom = daily_sentiment[sentiment] else: ax.bar(daily_sentiment.index, daily_sentiment[sentiment], bottom=bottom, label=sentiment, alpha=0.8, color=color) bottom += daily_sentiment[sentiment] ax.set_xlabel('Tanggal') ax.set_ylabel('Jumlah Artikel') ax.set_title('Trend Sentimen Artikel per Tanggal') ax.legend() plt.xticks(rotation=45) plt.tight_layout() except Exception as e: print(f"Error creating time plot: {e}") ax.text(0.5, 0.5, 'Error creating chart', ha='center', va='center', transform=ax.transAxes) return fig def create_wordcloud_plot(keywords): fig, ax = plt.subplots(figsize=(10, 6)) try: if not keywords: ax.text(0.5, 0.5, 'Tidak ada data keyword', ha='center', va='center', transform=ax.transAxes) ax.set_title('Word Cloud - Keyword Terpopuler') return fig wordcloud = WordCloud( width=800, height=400, background_color='white', colormap='viridis', max_words=50 ).generate_from_frequencies(dict(keywords)) ax.imshow(wordcloud, interpolation='bilinear') ax.axis('off') ax.set_title('Word Cloud - Keyword Terpopuler') plt.tight_layout() except Exception as e: print(f"Error creating wordcloud: {e}") ax.text(0.5, 0.5, 'Error creating wordcloud', ha='center', va='center', transform=ax.transAxes) return fig def create_pie_chart(filtered_df): fig, ax = plt.subplots(figsize=(10, 6)) try: sentiment_counts = filtered_df['sentiment'].value_counts() colors = ['#2ecc71', '#e74c3c', '#95a5a6'] # hijau, merah, abu-abu # Pastikan urutan: POSITIF, NEGATIF, NETRAL labels = ['POSITIF', 'NEGATIF', 'NETRAL'] counts = [sentiment_counts.get(label, 0) for label in labels] # Hanya tampilkan label dengan nilai > 0 filtered_labels = [] filtered_counts = [] filtered_colors = [] for label, count, color in zip(labels, counts, colors): if count > 0: filtered_labels.append(label) filtered_counts.append(count) filtered_colors.append(color) if filtered_counts: wedges, texts, autotexts = ax.pie( filtered_counts, labels=filtered_labels, autopct=lambda p: f'{p:.1f}%' if p > 0 else '', colors=filtered_colors, startangle=90 ) # Perbaiki tampilan persentase for autotext in autotexts: autotext.set_color('white') autotext.set_fontweight('bold') else: ax.text(0.5, 0.5, 'Tidak ada data', ha='center', va='center', transform=ax.transAxes) ax.set_title('Distribusi Sentimen Artikel') plt.tight_layout() except Exception as e: print(f"Error creating pie chart: {e}") ax.text(0.5, 0.5, 'Error creating chart', ha='center', va='center', transform=ax.transAxes) return fig def create_sentiment_source_pie_charts(filtered_df): """ Membuat pie chart untuk masing-masing sentimen (POSITIF, NEGATIF, NETRAL) yang menunjukkan proporsi sumber artikel di tiap kategori sentimen. """ fig, axes = plt.subplots(1, 3, figsize=(18, 7)) sentiments = ['POSITIF', 'NEGATIF', 'NETRAL'] colors = { 'POSITIF': '#2ecc71', # hijau 'NEGATIF': '#e74c3c', # merah 'NETRAL': '#95a5a6' # abu-abu } try: # Pastikan kolom dibutuhkan tersedia if 'sentiment' not in filtered_df.columns or 'source' not in filtered_df.columns: for ax in axes: ax.text(0.5, 0.5, 'Kolom "sentiment" atau "source" tidak ditemukan', ha='center', va='center', transform=ax.transAxes) plt.tight_layout() return fig # Plot untuk masing-masing sentimen for i, sentiment in enumerate(sentiments): ax = axes[i] subset = filtered_df[filtered_df['sentiment'] == sentiment] if subset.empty: ax.text(0.5, 0.5, f'Tidak ada data {sentiment.lower()}', ha='center', va='center', transform=ax.transAxes) ax.set_title(f'Sentimen {sentiment}', color=colors[sentiment]) ax.axis('off') continue # Hitung jumlah artikel per sumber counts = subset['source'].value_counts() labels = counts.index sizes = counts.values # Buat pie chart wedges, texts, autotexts = ax.pie( sizes, labels=labels, autopct=lambda p: f'{p:.1f}%' if p > 0 else '', startangle=90, colors=plt.cm.Pastel2.colors, pctdistance=0.8 ) # Ubah style teks otomatis for autotext in autotexts: autotext.set_color('white') autotext.set_fontweight('bold') ax.set_title(f"Proporsi Sumber ({sentiment})", fontsize=12, color=colors[sentiment], weight='bold') ax.axis('equal') # supaya pie tidak oval plt.tight_layout() return fig except Exception as e: print(f"Error creating sentiment-source pie charts: {e}") for ax in axes: ax.text(0.5, 0.5, 'Error membuat chart', ha='center', va='center', transform=ax.transAxes) plt.tight_layout() return fig def create_source_sentiment_plot(filtered_df): """ Membuat grouped (clustered) bar plot menampilkan distribusi sentimen per sumber artikel. """ fig, ax = plt.subplots(figsize=(12, 7)) try: # Agregasi data per source dan sentiment source_sentiment = filtered_df.groupby(['source', 'sentiment']).size().unstack(fill_value=0) if not source_sentiment.empty: # Pastikan semua kategori sentiment ada for sentiment in ['POSITIF', 'NEGATIF', 'NETRAL']: if sentiment not in source_sentiment.columns: source_sentiment[sentiment] = 0 # Warna sentimen colors = {'POSITIF': '#2ecc71', 'NEGATIF': '#e74c3c', 'NETRAL': '#95a5a6'} # Urutan kolom untuk konsistensi source_sentiment = source_sentiment[['POSITIF', 'NEGATIF', 'NETRAL']] # Plot bar cluster (tidak stacked) source_sentiment.plot( kind='bar', ax=ax, color=[colors[c] for c in source_sentiment.columns], width=0.7, stacked=False ) # Label dan judul ax.set_xlabel('Sumber Artikel') ax.set_ylabel('Jumlah Artikel') ax.set_title('Distribusi Sentimen per Sumber Artikel') # Menyesuaikan tampilan plt.xticks(rotation=45, ha='right') ax.legend(title='Sentimen') plt.grid(axis='y', linestyle='--', alpha=0.6) plt.tight_layout() else: ax.text(0.5, 0.5, 'Tidak ada data', ha='center', va='center', transform=ax.transAxes) except Exception as e: print(f"Error creating source sentiment plot: {e}") ax.text(0.5, 0.5, 'Error creating chart', ha='center', va='center', transform=ax.transAxes) return fig # Main Gradio Interface - Analisis Utama def analyze_articles(sources): try: # --- Periksa analisis existing --- analysis_exists, existing_df, existing_keywords, min_date, max_date = check_existing_analysis(sources) if analysis_exists: print("📦 Menggunakan data analisis yang sudah ada...") filtered_df = existing_df keywords = existing_keywords analysis_type = "Data Existing" else: print("🚀 Melakukan analisis baru...") df = load_data() if df.empty: return ( None, None, None, None, None, "Data tidak ditemukan atau kosong", None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) ) # Filter berdasarkan sumber if sources != "Semua": filtered_df = df[df['source'] == sources].copy() else: filtered_df = df.copy() if filtered_df.empty: return ( None, None, None, None, None, "Tidak ada data untuk sumber dipilih", None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) ) # Prediksi sentimen dan ekstraksi keyword sentiment_analyzer = SentimentAnalyzer() keyword_extractor = KeywordExtractor() sentiments, confidences = [], [] for i, content in enumerate(filtered_df['content']): sentiment, confidence = sentiment_analyzer.predict_sentiment(content) sentiments.append(sentiment) confidences.append(confidence) filtered_df['sentiment'] = sentiments filtered_df['confidence'] = confidences keywords = keyword_extractor.extract_keywords(filtered_df['content']) # Tentukan tanggal awal dan akhir min_date = filtered_df['date'].min() max_date = filtered_df['date'].max() analysis_type = "Analisis Baru" # --- Buat visualisasi --- print("📈 Membuat visualisasi...") time_plot = create_sentiment_time_plot(filtered_df) wordcloud_plot = create_wordcloud_plot(keywords) pie_chart = create_pie_chart(filtered_df) source_plot = create_source_sentiment_plot(filtered_df) sentiment_source_pie_chart = create_sentiment_source_pie_charts(filtered_df) # --- Rekapitulasi data --- total = len(filtered_df) pos = (filtered_df['sentiment'] == 'POSITIF').sum() neg = (filtered_df['sentiment'] == 'NEGATIF').sum() neu = (filtered_df['sentiment'] == 'NETRAL').sum() avg_conf = filtered_df['confidence'].mean() recap_text = f""" REKAP ANALISIS ARTIKEL: ──────────────────────── • Tipe Analisis : {analysis_type} • Total Artikel : {total} • Positif : {pos} ({(pos/total*100):.1f}%) • Negatif : {neg} ({(neg/total*100):.1f}%) • Netral : {neu} ({(neu/total*100):.1f}%) • Confidence Rata2 : {avg_conf:.3f} • Sumber : {sources} • Keyword Unik : {len(keywords)} kata """ # Aktifkan filter jika sukses filter_enabled = True # ✅ Return 11 nilai sesuai urutan komponen UI return ( time_plot, # 1 wordcloud_plot, # 2 pie_chart, # 3 source_plot, # 4 sentiment_source_pie_chart, # 5 recap_text, # 6 min_date, # 7 max_date, # 8 gr.update(interactive=filter_enabled), # 9 start_date gr.update(interactive=filter_enabled), # 10 end_date gr.update(interactive=filter_enabled) # 11 apply_filter_btn ) except Exception as e: print(f"❌ Error in analyze_articles: {e}") return ( None, None, None, None, None, f"Terjadi error: {str(e)}", None, None, gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False) ) # Fungsi untuk apply filter tanggal def apply_date_filter(start_date, end_date, sources): try: # Periksa file utama analisis if not os.path.exists('articles_with_sentiment.csv'): return None, None, None, None, None, "File analisis tidak ditemukan. Lakukan analisis terlebih dahulu." existing_df = pd.read_csv('articles_with_sentiment.csv') existing_df['date'] = pd.to_datetime(existing_df['date'], errors='coerce') if existing_df.empty: return None, None, None, None, None, "Data analisis kosong. Lakukan analisis terlebih dahulu." # Filter berdasar sumber if sources != "Semua": filtered_df = existing_df[existing_df['source'] == sources].copy() else: filtered_df = existing_df.copy() # Filter tanggal filtered_df = filter_by_date(filtered_df, start_date, end_date) if filtered_df.empty: return None, None, None, None, None, "Tidak ada data dalam rentang tanggal yang dipilih." # Re-analysis print("🔁 Melakukan analisis ulang untuk data yang difilter...") reanalyzed_df, keywords = reanalyze_filtered_data(filtered_df) # Buat ulang semua visual time_plot = create_sentiment_time_plot(reanalyzed_df) wordcloud_plot = create_wordcloud_plot(keywords) pie_chart = create_pie_chart(reanalyzed_df) source_plot = create_source_sentiment_plot(reanalyzed_df) # Plot tambahan: Pie per sumber (atau NER jika ada) sentiment_source_pie_chart = create_sentiment_source_pie_charts(reanalyzed_df) # Opsional: Jika kamu juga sudah menambahkan NER # ner_plot = analyze_named_entities(reanalyzed_df['content']) # Rekap teks total = len(reanalyzed_df) pos = (reanalyzed_df['sentiment'] == 'POSITIF').sum() neg = (reanalyzed_df['sentiment'] == 'NEGATIF').sum() neu = (reanalyzed_df['sentiment'] == 'NETRAL').sum() avg_conf = reanalyzed_df['confidence'].mean() recap_text = f""" REKAP ANALISIS FILTER TANGGAL: ────────────────────────────── • Rentang Tanggal : {start_date} hingga {end_date} • Total Artikel : {total} • POSITIF : {pos} ({(pos/total*100):.1f}%) • NEGATIF : {neg} ({(neg/total*100):.1f}%) • NETRAL : {neu} ({(neu/total*100):.1f}%) • Confidence Rata2 : {avg_conf:.3f} • Sumber : {sources} ✅ Data difilter & dianalisis ulang """ # Jika pakai 5 plot + 1 text (total 6 return) return time_plot, wordcloud_plot, pie_chart, source_plot, sentiment_source_pie_chart, recap_text except Exception as e: print(f"❌ Error in apply_date_filter: {e}") return None, None, None, None, None, f"Terjadi error saat apply filter: {str(e)}" # Setup Gradio Interface def setup_gradio_interface(): # Load data untuk mendapatkan sumber df = load_data() if not df.empty: sources = ["Semua"] + sorted(list(df['source'].unique())) # Konversi ke datetime object untuk Gradio min_date = datetime.combine(df['date'].min().date(), datetime.min.time()) max_date = datetime.combine(df['date'].max().date(), datetime.min.time()) else: sources = ["Semua"] min_date = datetime.now() - timedelta(days=30) max_date = datetime.now() with gr.Blocks(title="Analisis Sentimen Artikel") as demo: gr.Markdown("# 📰 Analisis Sentimen Artikel Berita") gr.Markdown("Pilih sumber dan tanggal untuk menganalisis sentimen berita.") with gr.Row(): with gr.Column(scale=1 ): gr.Markdown("### ⚙️ Pengaturan Analisis") source_dropdown = gr.Dropdown( label="Sumber Artikel", choices=sources, value="Semua", info="Pilih sumber artikel tertentu atau 'Semua'" ) analyze_btn = gr.Button("🚀 Jalankan Analisis") gr.Markdown("### 📅 Filter Tanggal (Opsional)") start_date = gr.Textbox( label="Tanggal Mulai", value=min_date.strftime('%Y-%m-%d %H:%M') if min_date else "", info="Masukkan tanggal mulai (format: YYYY-MM-DD)" ) end_date = gr.Textbox( label="Tanggal Akhir", value=max_date.strftime('%Y-%m-%d %H:%M') if max_date else "", info="Masukkan tanggal akhir (format: YYYY-MM-DD)" ) apply_filter_btn = gr.Button("🔍 Apply Filter", interactive=False) with gr.Column(scale=1): gr.Markdown(""" ### Analisis Sentimen Artikel Berbasis AI Aplikasi ini menganalisis sentimen artikel berita yang telah di-scrape sebelumnya. Gunakan fitur analisis untuk memproses data artikel dan menghasilkan visualisasi yang informatif. Sumber artikel dapat dipilih sesuai kebutuhan, dan filter tanggal dapat diterapkan setelah analisis selesai. Adapun sumber artikel berasal dari berbagai portal berita online terkemuka di Indonesia. Teknis analisis meliputi prediksi sentimen menggunakan model AI yang telah dilatih khusus untuk Bahasa Indonesia, serta ekstraksi keyword dengan menghilangkan stopword. Algoritma yang digunakan memastikan hasil analisis yang akurat dan relevan. Adapun algoritma tersebut meliputi: - **Prediksi Sentimen:** Menggunakan model transformer berbasis Roberta yang dioptimalkan untuk Bahasa Indonesia. - **Ekstraksi Keyword:** Menggunakan metode penghilangan stopword dengan daftar stopword khusus Bahasa Indonesia. Secara visual, hasil analisis disajikan dalam berbagai bentuk grafik seperti trend sentimen per tanggal, distribusi sentimen, sentimen per sumber, dan word cloud keyword. ### 🔄 Logika Aplikasi: - ✅ Jika file analisis sudah ada → tampilkan visualisasi langsung - 🔄 Jika file analisis belum ada → lakukan analisis baru - ⚠️ Filter tanggal hanya aktif setelah analisis selesai - 🔍 Apply filter akan menganalisis ulang data yang difilter """) with gr.Column(scale=1): recap_output = gr.Textbox( label="📈 Rekap Analisis", lines=23, max_lines=25, interactive=False ) # State untuk menyimpan data min_date_state = gr.State(value=min_date) max_date_state = gr.State(value=max_date) with gr.Row(): with gr.Column(): wordcloud_plot = gr.Plot(label="☁️ Word Cloud Keyword") with gr.Column(): pie_chart = gr.Plot(label="🥧 Distribusi Sentimen") with gr.Column(): pie_chart2 = gr.Plot(label="🥧 Distribusi Sumber Sentimen") with gr.Row(): with gr.Column(): source_plot = gr.Plot(label="📊 Sentimen per Sumber") with gr.Column(): time_plot = gr.Plot(label="📅 Trend Sentimen per Tanggal") # Event handlers analyze_btn.click( fn=analyze_articles, inputs=[source_dropdown], outputs=[ time_plot, wordcloud_plot, pie_chart, pie_chart2, source_plot, recap_output, min_date_state, max_date_state, start_date, end_date, apply_filter_btn ] ) apply_filter_btn.click( fn=apply_date_filter, inputs=[start_date, end_date, source_dropdown], outputs=[time_plot, wordcloud_plot, pie_chart, pie_chart2, source_plot, recap_output] ) return demo if __name__ == "__main__": print("Menjalankan aplikasi Analisis Sentimen Artikel...") print("Pastikan file 'scraped_articles.csv' ada di direktori yang sama") print("Aplikasi akan tersedia di http://localhost:7860") try: demo = setup_gradio_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False, show_error=True ) except Exception as e: print(f"Error launching app: {e}")