import pandas as pd
import numpy as np
import gradio as gr
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import pipeline
from collections import Counter
import re
from Sastrawi.StopWordRemover.StopWordRemoverFactory import StopWordRemoverFactory
from wordcloud import WordCloud
import warnings
import os
warnings.filterwarnings('ignore')

# Set style untuk matplotlib
plt.style.use('default')
sns.set_palette("husl")

# 1. Membaca file CSV
def load_data():
    try:
        df = pd.read_csv('scraped_articles.csv')
        # Konversi kolom date ke datetime
        df['date'] = pd.to_datetime(df['date'], errors='coerce')
        # Hapus row dengan date invalid
        df = df.dropna(subset=['date'])
        return df
    except FileNotFoundError:
        print("File scraped_articles.csv tidak ditemukan")
        # Return dataframe kosong dengan struktur yang benar
        return pd.DataFrame(columns=['id', 'source', 'keyword', 'date', 'title', 'content', 'author', 'thumbnail', 'url', 'hash', 'created_at'])
    except Exception as e:
        print(f"Error loading data: {e}")
        return pd.DataFrame(columns=['id', 'source', 'keyword', 'date', 'title', 'content', 'author', 'thumbnail', 'url', 'hash', 'created_at'])

# 2. Prediksi Sentimen dengan Model yang Tepat untuk Sentimen Bahasa Indonesia
class SentimentAnalyzer:
    def __init__(self):
        try:
            # Gunakan model yang khusus untuk sentiment analysis Bahasa Indonesia
            print("Memuat model sentiment analysis...")
            model_name = "w11wo/indonesian-roberta-base-sentiment-classifier"
            self.sentiment_pipeline = pipeline(
                "sentiment-analysis",
                model=model_name,
                tokenizer=model_name,
                max_length=512,
                truncation=True
            )
            print("Model sentiment analysis berhasil dimuat")
        except Exception as e:
            print(f"Error loading sentiment model: {e}")
            print("Menggunakan model fallback...")
            try:
                # Fallback model
                self.sentiment_pipeline = pipeline(
                    "sentiment-analysis",
                    model="nlptown/bert-base-multilingual-uncased-sentiment",
                    max_length=512,
                    truncation=True
                )
            except Exception as e2:
                print(f"Error loading fallback model: {e2}")
                self.sentiment_pipeline = None
    
    def predict_sentiment(self, text):
        if self.sentiment_pipeline is None:
            return "NETRAL", 0.5
        
        if pd.isna(text) or text == "" or str(text).strip() == "":
            return "NETRAL", 0.5
        
        try:
            # Batasi panjang teks untuk menghindari error
            text = str(text).strip()
            if len(text) > 1000:
                text = text[:1000]
            
            result = self.sentiment_pipeline(text)[0]
            
            # Map label berdasarkan model yang digunakan
            label = result['label'].upper()
            confidence = result['score']
            
            # Normalisasi label
            if 'POSITIF' in label or 'POSITIVE' in label or label in ['5', '4', 'LABEL_2']:
                return "POSITIF", confidence
            elif 'NEGATIF' in label or 'NEGATIVE' in label or label in ['1', '2', 'LABEL_0']:
                return "NEGATIF", confidence
            else:
                return "NETRAL", confidence
                
        except Exception as e:
            print(f"Error predicting sentiment: {e}")
            return "NETRAL", 0.5

# 3. Ekstraksi Keyword dengan Stopword Indonesia
class KeywordExtractor:
    def __init__(self):
        try:
            self.factory = StopWordRemoverFactory()
            self.stopword = self.factory.create_stop_word_remover()
            # Tambahan stopwords manual
            self.additional_stopwords = {'yang', 'dan', 'di', 'dari', 'dalam', 'untuk', 'pada', 'ke', 'dengan', 'ini', 'itu', 'saya', 'kamu', 'kami', 'kita', 'mereka', 'adalah', 'ada', 'akan', 'atau', 'juga', 'tidak', 'bukan', 'sudah', 'belum', 'saja', 'bisa', 'dapat', 'harus', 'perlu', 'ingin', 'lagi', 'lalu', 'kemudian', 'saat', 'ketika', 'karena', 'sebab', 'jika', 'apabila', 'meski', 'walaupun', 'agar', 'supaya', 'sehingga', 'maka', 'oleh', 'kepada', 'terhadap', 'atas', 'bawah', 'depan', 'belakang', 'samping', 'antara', 'demi', 'hingga', 'sampai', 'sejak', 'selama', 'tentang', 'mengenai', 'menurut', 'oleh', 'karena', 'itu', 'ini', 'situ', 'sana', 'sini', 'mana', 'apa', 'siapa', 'kapan', 'dimana', 'bagaimana', 'mengapa', 'berapa'}
        except Exception as e:
            print(f"Error initializing keyword extractor: {e}")
            self.stopword = None
    
    def clean_text(self, text):
        if pd.isna(text):
            return ""
        
        text = str(text).lower()
        # Hapus karakter khusus, angka, dan tanda baca
        text = re.sub(r'[^a-zA-Z\s]', ' ', text)
        # Hapus extra spaces
        text = re.sub(r'\s+', ' ', text).strip()
        return text
    
    def remove_stopwords(self, text):
        if self.stopword is None:
            return self.clean_text(text)
        
        cleaned_text = self.clean_text(text)
        processed_text = self.stopword.remove(cleaned_text)
        
        # Hapus additional stopwords
        words = processed_text.split()
        filtered_words = [word for word in words if word not in self.additional_stopwords and len(word) > 2]
        return ' '.join(filtered_words)
    
    def extract_keywords(self, texts, top_n=20):
        all_words = []
        for text in texts:
            if pd.isna(text):
                continue
            processed_text = self.remove_stopwords(text)
            words = processed_text.split()
            all_words.extend([word for word in words if len(word) > 2])
        
        word_freq = Counter(all_words)
        return word_freq.most_common(top_n)

# Fungsi untuk memeriksa apakah file hasil analisis sudah ada dan valid
def check_existing_analysis(sources):
    """Memeriksa apakah file hasil analisis sudah ada dan valid"""
    try:
        # File utama yang harus ada
        main_file = 'articles_with_sentiment.csv'
        
        if not os.path.exists(main_file):
            print("File analisis tidak ditemukan, perlu analisis ulang")
            return False, None, None, None, None
        
        # Baca file utama
        existing_df = pd.read_csv(main_file)
        
        if existing_df.empty:
            print("File analisis kosong, perlu analisis ulang")
            return False, None, None, None, None
        
        # Konversi kolom date
        existing_df['date'] = pd.to_datetime(existing_df['date'], errors='coerce')
        
        # Dapatkan range tanggal dari data
        min_date = existing_df['date'].min()
        max_date = existing_df['date'].max()
        
        # Filter berdasarkan sumber jika bukan "Semua"
        if sources != "Semua":
            filtered_df = existing_df[existing_df['source'] == sources].copy()
        else:
            filtered_df = existing_df.copy()
        
        if filtered_df.empty:
            print(f"Tidak ada data untuk sumber {sources}, perlu analisis ulang")
            return False, None, None, None, None
        
        # Periksa file-file pendukung
        support_files = [
            'sentiment_trend_daily.csv',
            'sentiment_by_source.csv', 
            'sentiment_distribution.csv',
            'extracted_keywords.csv'
        ]
        
        for file in support_files:
            if not os.path.exists(file):
                print(f"File pendukung {file} tidak ditemukan, perlu analisis ulang")
                return False, None, None, None, None
        
        # Baca keywords
        keywords_df = pd.read_csv('extracted_keywords.csv')
        keywords = list(zip(keywords_df['keyword'], keywords_df['frequency']))
        
        print("File analisis ditemukan dan valid, menggunakan data yang ada")
        return True, filtered_df, keywords, min_date, max_date
        
    except Exception as e:
        print(f"Error checking existing analysis: {e}")
        return False, None, None, None, None

# Fungsi untuk memfilter data berdasarkan tanggal
def filter_by_date(df, start_date, end_date):
    """
    Memfilter DataFrame berdasarkan rentang tanggal (kolom 'date').
    Kompatibel dengan input DateTime dari Gradio.
    """
    try:
        if df.empty or 'date' not in df.columns:
            print("⚠️ Data kosong atau kolom 'date' tidak ditemukan.")
            return pd.DataFrame()

        # Pastikan kolom date valid
        df['date'] = pd.to_datetime(df['date'], errors='coerce')
        df = df.dropna(subset=['date'])

        # Validasi input tanggal
        def parse_dt(value):
            if value is None or value == "":
                return None
            if isinstance(value, (datetime, pd.Timestamp)):
                return pd.Timestamp(value)
            try:
                return pd.to_datetime(str(value))
            except:
                return None

        start_dt = parse_dt(start_date)
        end_dt = parse_dt(end_date)

        if start_dt is None:
            start_dt = df['date'].min()
        if end_dt is None:
            end_dt = df['date'].max()

        # Jika range terbalik
        if start_dt > end_dt:
            start_dt, end_dt = end_dt, start_dt

        print(f"📅 Memfilter dari {start_dt:%Y-%m-%d} hingga {end_dt:%Y-%m-%d}")

        mask = (df['date'] >= start_dt) & (df['date'] <= end_dt)
        filtered_df = df.loc[mask].copy()

        print(f"✅ {len(filtered_df)} baris setelah filter tanggal.")
        return filtered_df

    except Exception as e:
        print("❌ ERROR filter_by_date:", e)
        import traceback
        traceback.print_exc()
        return df


# Fungsi untuk melakukan analisis ulang pada data yang difilter
def reanalyze_filtered_data(filtered_df):
    """Melakukan analisis ulang pada data yang sudah difilter"""
    try:
        # Inisialisasi analyzer
        #sentiment_analyzer = SentimentAnalyzer()
        keyword_extractor = KeywordExtractor()
        
        # Prediksi sentimen ulang
        #print("Memproses prediksi sentimen ulang...")
        #sentiments = []
        #confidences = []
        
        #for idx, content in enumerate(filtered_df['content']):
        #    if idx % 10 == 0:  # Progress indicator
        #        print(f"Memproses artikel {idx+1}/{len(filtered_df)}")
        #    sentiment, confidence = sentiment_analyzer.predict_sentiment(content)
        #    sentiments.append(sentiment)
        #    confidences.append(confidence)
        
        #filtered_df['sentiment'] = sentiments
        #filtered_df['confidence'] = confidences
        
        # Ekstraksi keyword ulang
        print("Memproses ekstraksi keyword ulang...")
        keywords = keyword_extractor.extract_keywords(filtered_df['content'])
        
        return filtered_df, keywords
    except Exception as e:
        print(f"Error in reanalysis: {e}")
        return filtered_df, []

# Fungsi untuk menyimpan hasil analisis ke CSV
def save_analysis_results(filtered_df, keywords):
    """Menyimpan hasil analisis ke berbagai file CSV"""
    try:
        # 1. Simpan data artikel dengan sentimen
        articles_with_sentiment = filtered_df[['id', 'source', 'keyword', 'date', 'title', 'content', 'author', 'url', 'sentiment', 'confidence']].copy()
        articles_with_sentiment['date'] = articles_with_sentiment['date'].dt.strftime('%Y-%m-%d')
        articles_with_sentiment.to_csv('articles_with_sentiment.csv', index=False, encoding='utf-8')
        print("✓ Disimpan: articles_with_sentiment.csv")
        
        # 2. Simpan rekap sentimen per tanggal
        daily_sentiment = filtered_df.groupby([filtered_df['date'].dt.date, 'sentiment']).size().unstack(fill_value=0)
        daily_sentiment.reset_index(inplace=True)
        daily_sentiment.rename(columns={'date': 'tanggal'}, inplace=True)
        daily_sentiment.to_csv('sentiment_trend_daily.csv', index=False, encoding='utf-8')
        print("✓ Disimpan: sentiment_trend_daily.csv")
        
        # 3. Simpan rekap sentimen per sumber
        source_sentiment = filtered_df.groupby(['source', 'sentiment']).size().unstack(fill_value=0)
        source_sentiment.reset_index(inplace=True)
        source_sentiment.to_csv('sentiment_by_source.csv', index=False, encoding='utf-8')
        print("✓ Disimpan: sentiment_by_source.csv")
        
        # 4. Simpan distribusi sentimen keseluruhan
        sentiment_distribution = filtered_df['sentiment'].value_counts().reset_index()
        sentiment_distribution.columns = ['sentiment', 'count']
        sentiment_distribution['percentage'] = (sentiment_distribution['count'] / len(filtered_df) * 100).round(2)
        sentiment_distribution.to_csv('sentiment_distribution.csv', index=False, encoding='utf-8')
        print("✓ Disimpan: sentiment_distribution.csv")
        
        # 5. Simpan keywords dan frekuensinya
        keywords_df = pd.DataFrame(keywords, columns=['keyword', 'frequency'])
        keywords_df.to_csv('extracted_keywords.csv', index=False, encoding='utf-8')
        print("✓ Disimpan: extracted_keywords.csv")
        
        # 6. Simpan confidence statistics
        confidence_stats = filtered_df.groupby('sentiment')['confidence'].agg(['mean', 'std', 'min', 'max']).reset_index()
        confidence_stats.columns = ['sentiment', 'confidence_mean', 'confidence_std', 'confidence_min', 'confidence_max']
        confidence_stats.to_csv('confidence_statistics.csv', index=False, encoding='utf-8')
        print("✓ Disimpan: confidence_statistics.csv")
        
        return True
    except Exception as e:
        print(f"Error saving analysis results: {e}")
        return False

# Fungsi untuk visualisasi
def create_sentiment_time_plot(filtered_df):
    fig, ax = plt.subplots(figsize=(12, 6))
    
    try:
        # Group by date dan sentiment
        filtered_df['date_only'] = filtered_df['date'].dt.date
        daily_sentiment = filtered_df.groupby(['date_only', 'sentiment']).size().unstack(fill_value=0)
        
        # Pastikan kolom POSITIF dan NEGATIF ada
        for sentiment in ['POSITIF', 'NEGATIF', 'NETRAL']:
            if sentiment not in daily_sentiment.columns:
                daily_sentiment[sentiment] = 0
        
        # Urutkan berdasarkan tanggal
        daily_sentiment = daily_sentiment.sort_index()
        
        # Buat stacked bar chart
        if not daily_sentiment.empty:
            bottom = None
            colors = {'POSITIF': '#2ecc71', 'NEGATIF': '#e74c3c', 'NETRAL': '#95a5a6'}
            
            for sentiment, color in colors.items():
                if bottom is None:
                    ax.bar(daily_sentiment.index, daily_sentiment[sentiment], 
                          label=sentiment, alpha=0.8, color=color)
                    bottom = daily_sentiment[sentiment]
                else:
                    ax.bar(daily_sentiment.index, daily_sentiment[sentiment], 
                          bottom=bottom, label=sentiment, alpha=0.8, color=color)
                    bottom += daily_sentiment[sentiment]
        
        ax.set_xlabel('Tanggal')
        ax.set_ylabel('Jumlah Artikel')
        ax.set_title('Trend Sentimen Artikel per Tanggal')
        ax.legend()
        plt.xticks(rotation=45)
        plt.tight_layout()
    except Exception as e:
        print(f"Error creating time plot: {e}")
        ax.text(0.5, 0.5, 'Error creating chart', ha='center', va='center', transform=ax.transAxes)
    
    return fig

def create_wordcloud_plot(keywords):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    try:
        if not keywords:
            ax.text(0.5, 0.5, 'Tidak ada data keyword', ha='center', va='center', transform=ax.transAxes)
            ax.set_title('Word Cloud - Keyword Terpopuler')
            return fig
        
        wordcloud = WordCloud(
            width=800, 
            height=400, 
            background_color='white',
            colormap='viridis',
            max_words=50
        ).generate_from_frequencies(dict(keywords))
        
        ax.imshow(wordcloud, interpolation='bilinear')
        ax.axis('off')
        ax.set_title('Word Cloud - Keyword Terpopuler')
        plt.tight_layout()
    except Exception as e:
        print(f"Error creating wordcloud: {e}")
        ax.text(0.5, 0.5, 'Error creating wordcloud', ha='center', va='center', transform=ax.transAxes)
    
    return fig

def create_pie_chart(filtered_df):
    fig, ax = plt.subplots(figsize=(10, 6))
    
    try:
        sentiment_counts = filtered_df['sentiment'].value_counts()
        colors = ['#2ecc71', '#e74c3c', '#95a5a6']  # hijau, merah, abu-abu
        
        # Pastikan urutan: POSITIF, NEGATIF, NETRAL
        labels = ['POSITIF', 'NEGATIF', 'NETRAL']
        counts = [sentiment_counts.get(label, 0) for label in labels]
        
        # Hanya tampilkan label dengan nilai > 0
        filtered_labels = []
        filtered_counts = []
        filtered_colors = []
        
        for label, count, color in zip(labels, counts, colors):
            if count > 0:
                filtered_labels.append(label)
                filtered_counts.append(count)
                filtered_colors.append(color)
        
        if filtered_counts:
            wedges, texts, autotexts = ax.pie(
                filtered_counts, 
                labels=filtered_labels,
                autopct=lambda p: f'{p:.1f}%' if p > 0 else '',
                colors=filtered_colors,
                startangle=90
            )
            
            # Perbaiki tampilan persentase
            for autotext in autotexts:
                autotext.set_color('white')
                autotext.set_fontweight('bold')
        else:
            ax.text(0.5, 0.5, 'Tidak ada data', ha='center', va='center', transform=ax.transAxes)
        
        ax.set_title('Distribusi Sentimen Artikel')
        plt.tight_layout()
    except Exception as e:
        print(f"Error creating pie chart: {e}")
        ax.text(0.5, 0.5, 'Error creating chart', ha='center', va='center', transform=ax.transAxes)
    
    return fig

def create_sentiment_source_pie_charts(filtered_df):
    """
    Membuat pie chart untuk masing-masing sentimen (POSITIF, NEGATIF, NETRAL)
    yang menunjukkan proporsi sumber artikel di tiap kategori sentimen.
    """
    fig, axes = plt.subplots(1, 3, figsize=(18, 7))
    sentiments = ['POSITIF', 'NEGATIF', 'NETRAL']
    colors = {
        'POSITIF': '#2ecc71',  # hijau
        'NEGATIF': '#e74c3c',  # merah
        'NETRAL': '#95a5a6'    # abu-abu
    }

    try:
        # Pastikan kolom dibutuhkan tersedia
        if 'sentiment' not in filtered_df.columns or 'source' not in filtered_df.columns:
            for ax in axes:
                ax.text(0.5, 0.5, 'Kolom "sentiment" atau "source" tidak ditemukan',
                        ha='center', va='center', transform=ax.transAxes)
            plt.tight_layout()
            return fig

        # Plot untuk masing-masing sentimen
        for i, sentiment in enumerate(sentiments):
            ax = axes[i]
            subset = filtered_df[filtered_df['sentiment'] == sentiment]

            if subset.empty:
                ax.text(0.5, 0.5, f'Tidak ada data {sentiment.lower()}',
                        ha='center', va='center', transform=ax.transAxes)
                ax.set_title(f'Sentimen {sentiment}', color=colors[sentiment])
                ax.axis('off')
                continue

            # Hitung jumlah artikel per sumber
            counts = subset['source'].value_counts()
            labels = counts.index
            sizes = counts.values

            # Buat pie chart
            wedges, texts, autotexts = ax.pie(
                sizes,
                labels=labels,
                autopct=lambda p: f'{p:.1f}%' if p > 0 else '',
                startangle=90,
                colors=plt.cm.Pastel2.colors,
                pctdistance=0.8
            )

            # Ubah style teks otomatis
            for autotext in autotexts:
                autotext.set_color('white')
                autotext.set_fontweight('bold')

            ax.set_title(f"Proporsi Sumber ({sentiment})",
                         fontsize=12, color=colors[sentiment], weight='bold')
            ax.axis('equal')  # supaya pie tidak oval

        plt.tight_layout()
        return fig

    except Exception as e:
        print(f"Error creating sentiment-source pie charts: {e}")
        for ax in axes:
            ax.text(0.5, 0.5, 'Error membuat chart', ha='center', va='center', transform=ax.transAxes)
        plt.tight_layout()
        return fig

    
def create_source_sentiment_plot(filtered_df):
    """
    Membuat grouped (clustered) bar plot
    menampilkan distribusi sentimen per sumber artikel.
    """
    fig, ax = plt.subplots(figsize=(12, 7))
    
    try:
        # Agregasi data per source dan sentiment
        source_sentiment = filtered_df.groupby(['source', 'sentiment']).size().unstack(fill_value=0)
        
        if not source_sentiment.empty:
            # Pastikan semua kategori sentiment ada
            for sentiment in ['POSITIF', 'NEGATIF', 'NETRAL']:
                if sentiment not in source_sentiment.columns:
                    source_sentiment[sentiment] = 0

            # Warna sentimen
            colors = {'POSITIF': '#2ecc71', 'NEGATIF': '#e74c3c', 'NETRAL': '#95a5a6'}
            
            # Urutan kolom untuk konsistensi
            source_sentiment = source_sentiment[['POSITIF', 'NEGATIF', 'NETRAL']]

            # Plot bar cluster (tidak stacked)
            source_sentiment.plot(
                kind='bar',
                ax=ax,
                color=[colors[c] for c in source_sentiment.columns],
                width=0.7,
                stacked=False
            )

            # Label dan judul
            ax.set_xlabel('Sumber Artikel')
            ax.set_ylabel('Jumlah Artikel')
            ax.set_title('Distribusi Sentimen per Sumber Artikel')

            # Menyesuaikan tampilan
            plt.xticks(rotation=45, ha='right')
            ax.legend(title='Sentimen')
            plt.grid(axis='y', linestyle='--', alpha=0.6)
            plt.tight_layout()

        else:
            ax.text(0.5, 0.5, 'Tidak ada data', ha='center', va='center', transform=ax.transAxes)

    except Exception as e:
        print(f"Error creating source sentiment plot: {e}")
        ax.text(0.5, 0.5, 'Error creating chart', ha='center', va='center', transform=ax.transAxes)

    return fig

# Main Gradio Interface - Analisis Utama
def analyze_articles(sources):
    try:
        # --- Periksa analisis existing ---
        analysis_exists, existing_df, existing_keywords, min_date, max_date = check_existing_analysis(sources)
        
        if analysis_exists:
            print("📦 Menggunakan data analisis yang sudah ada...")
            filtered_df = existing_df
            keywords = existing_keywords
            analysis_type = "Data Existing"
        else:
            print("🚀 Melakukan analisis baru...")
            df = load_data()
            if df.empty:
                return (
                    None, None, None, None, None,
                    "Data tidak ditemukan atau kosong", None, None,
                    gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
                )

            # Filter berdasarkan sumber
            if sources != "Semua":
                filtered_df = df[df['source'] == sources].copy()
            else:
                filtered_df = df.copy()

            if filtered_df.empty:
                return (
                    None, None, None, None, None,
                    "Tidak ada data untuk sumber dipilih", None, None,
                    gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=False)
                )

            # Prediksi sentimen dan ekstraksi keyword
            sentiment_analyzer = SentimentAnalyzer()
            keyword_extractor = KeywordExtractor()

            sentiments, confidences = [], []
            for i, content in enumerate(filtered_df['content']):
                sentiment, confidence = sentiment_analyzer.predict_sentiment(content)
                sentiments.append(sentiment)
                confidences.append(confidence)

            filtered_df['sentiment'] = sentiments
            filtered_df['confidence'] = confidences
            keywords = keyword_extractor.extract_keywords(filtered_df['content'])

            # Tentukan tanggal awal dan akhir
            min_date = filtered_df['date'].min()
            max_date = filtered_df['date'].max()
            analysis_type = "Analisis Baru"

        # --- Buat visualisasi ---
        print("📈 Membuat visualisasi...")
        time_plot = create_sentiment_time_plot(filtered_df)
        wordcloud_plot = create_wordcloud_plot(keywords)
        pie_chart = create_pie_chart(filtered_df)
        source_plot = create_source_sentiment_plot(filtered_df)
        sentiment_source_pie_chart = create_sentiment_source_pie_charts(filtered_df)

        # --- Rekapitulasi data ---
        total = len(filtered_df)
        pos = (filtered_df['sentiment'] == 'POSITIF').sum()
        neg = (filtered_df['sentiment'] == 'NEGATIF').sum()
        neu = (filtered_df['sentiment'] == 'NETRAL').sum()
        avg_conf = filtered_df['confidence'].mean()

        recap_text = f"""
REKAP ANALISIS ARTIKEL:
────────────────────────
• Tipe Analisis    : {analysis_type}
• Total Artikel    : {total}
• Positif          : {pos} ({(pos/total*100):.1f}%)
• Negatif          : {neg} ({(neg/total*100):.1f}%)
• Netral           : {neu} ({(neu/total*100):.1f}%)
• Confidence Rata2 : {avg_conf:.3f}
• Sumber           : {sources}
• Keyword Unik     : {len(keywords)} kata
        """

        # Aktifkan filter jika sukses
        filter_enabled = True

        # ✅ Return 11 nilai sesuai urutan komponen UI
        return (
            time_plot,                  # 1
            wordcloud_plot,             # 2
            pie_chart,                  # 3
            source_plot,                # 4
            sentiment_source_pie_chart, # 5
            recap_text,                 # 6
            min_date,                   # 7
            max_date,                   # 8
            gr.update(interactive=filter_enabled),  # 9 start_date
            gr.update(interactive=filter_enabled),  # 10 end_date
            gr.update(interactive=filter_enabled)   # 11 apply_filter_btn
        )

    except Exception as e:
        print(f"❌ Error in analyze_articles: {e}")
        return (
            None, None, None, None, None,
            f"Terjadi error: {str(e)}",
            None, None,
            gr.update(interactive=False),
            gr.update(interactive=False),
            gr.update(interactive=False)
        )

# Fungsi untuk apply filter tanggal
def apply_date_filter(start_date, end_date, sources):
    try:
        # Periksa file utama analisis
        if not os.path.exists('articles_with_sentiment.csv'):
            return None, None, None, None, None, "File analisis tidak ditemukan. Lakukan analisis terlebih dahulu."

        existing_df = pd.read_csv('articles_with_sentiment.csv')
        existing_df['date'] = pd.to_datetime(existing_df['date'], errors='coerce')

        if existing_df.empty:
            return None, None, None, None, None, "Data analisis kosong. Lakukan analisis terlebih dahulu."

        # Filter berdasar sumber
        if sources != "Semua":
            filtered_df = existing_df[existing_df['source'] == sources].copy()
        else:
            filtered_df = existing_df.copy()

        # Filter tanggal
        filtered_df = filter_by_date(filtered_df, start_date, end_date)
        if filtered_df.empty:
            return None, None, None, None, None, "Tidak ada data dalam rentang tanggal yang dipilih."

        # Re-analysis
        print("🔁 Melakukan analisis ulang untuk data yang difilter...")
        reanalyzed_df, keywords = reanalyze_filtered_data(filtered_df)

        # Buat ulang semua visual
        time_plot = create_sentiment_time_plot(reanalyzed_df)
        wordcloud_plot = create_wordcloud_plot(keywords)
        pie_chart = create_pie_chart(reanalyzed_df)
        source_plot = create_source_sentiment_plot(reanalyzed_df)

        # Plot tambahan: Pie per sumber (atau NER jika ada)
        sentiment_source_pie_chart = create_sentiment_source_pie_charts(reanalyzed_df)

        # Opsional: Jika kamu juga sudah menambahkan NER
        # ner_plot = analyze_named_entities(reanalyzed_df['content'])

        # Rekap teks
        total = len(reanalyzed_df)
        pos = (reanalyzed_df['sentiment'] == 'POSITIF').sum()
        neg = (reanalyzed_df['sentiment'] == 'NEGATIF').sum()
        neu = (reanalyzed_df['sentiment'] == 'NETRAL').sum()
        avg_conf = reanalyzed_df['confidence'].mean()

        recap_text = f"""
REKAP ANALISIS FILTER TANGGAL:
──────────────────────────────
• Rentang Tanggal  : {start_date} hingga {end_date}
• Total Artikel    : {total}
• POSITIF          : {pos} ({(pos/total*100):.1f}%)
• NEGATIF          : {neg} ({(neg/total*100):.1f}%)
• NETRAL           : {neu} ({(neu/total*100):.1f}%)
• Confidence Rata2 : {avg_conf:.3f}
• Sumber           : {sources}
✅ Data difilter & dianalisis ulang
        """

        # Jika pakai 5 plot + 1 text (total 6 return)
        return time_plot, wordcloud_plot, pie_chart, source_plot, sentiment_source_pie_chart, recap_text

    except Exception as e:
        print(f"❌ Error in apply_date_filter: {e}")
        return None, None, None, None, None, f"Terjadi error saat apply filter: {str(e)}"

# Setup Gradio Interface
def setup_gradio_interface():
    # Load data untuk mendapatkan sumber
    df = load_data()
    
    if not df.empty:
        sources = ["Semua"] + sorted(list(df['source'].unique()))
        # Konversi ke datetime object untuk Gradio
        min_date = datetime.combine(df['date'].min().date(), datetime.min.time())
        max_date = datetime.combine(df['date'].max().date(), datetime.min.time())
    else:
        sources = ["Semua"]
        min_date = datetime.now() - timedelta(days=30)
        max_date = datetime.now()
    
    with gr.Blocks(title="Analisis Sentimen Artikel") as demo:
        gr.Markdown("# 📰 Analisis Sentimen Artikel Berita")
        gr.Markdown("Pilih sumber dan tanggal untuk menganalisis sentimen berita.")
        with gr.Row():
            with gr.Column(scale=1 ): 
                    gr.Markdown("### ⚙️ Pengaturan Analisis")
                    source_dropdown = gr.Dropdown(
                            label="Sumber Artikel",
                            choices=sources,
                            value="Semua",
                            info="Pilih sumber artikel tertentu atau 'Semua'"
                            )
                    analyze_btn = gr.Button("🚀 Jalankan Analisis")
                             
                    gr.Markdown("### 📅 Filter Tanggal (Opsional)")
                    start_date = gr.Textbox(
                        label="Tanggal Mulai",
                        value=min_date.strftime('%Y-%m-%d %H:%M') if min_date else "",
                         info="Masukkan tanggal mulai (format: YYYY-MM-DD)"
                    )
                    end_date = gr.Textbox(
                        label="Tanggal Akhir", 
                        value=max_date.strftime('%Y-%m-%d %H:%M') if max_date else "",
                        info="Masukkan tanggal akhir (format: YYYY-MM-DD)"
                        )
                    apply_filter_btn = gr.Button("🔍 Apply Filter",  interactive=False)
            with gr.Column(scale=1):
                gr.Markdown("""
                        ### Analisis Sentimen Artikel Berbasis AI
                        Aplikasi ini menganalisis sentimen artikel berita yang telah di-scrape sebelumnya.
                        Gunakan fitur analisis untuk memproses data artikel dan menghasilkan visualisasi yang informatif.
                        Sumber artikel dapat dipilih sesuai kebutuhan, dan filter tanggal dapat diterapkan setelah analisis selesai.
                        Adapun sumber artikel berasal dari berbagai portal berita online terkemuka di Indonesia.
                            
                        Teknis analisis meliputi prediksi sentimen menggunakan model AI yang telah dilatih khusus untuk Bahasa Indonesia, serta ekstraksi keyword dengan menghilangkan stopword.
                        Algoritma yang digunakan memastikan hasil analisis yang akurat dan relevan. Adapun algoritma tersebut meliputi:
                        - **Prediksi Sentimen:** Menggunakan model transformer berbasis Roberta yang dioptimalkan untuk Bahasa Indonesia.
                        - **Ekstraksi Keyword:** Menggunakan metode penghilangan stopword dengan daftar stopword khusus Bahasa Indonesia.
                        Secara visual, hasil analisis disajikan dalam berbagai bentuk grafik seperti trend sentimen per tanggal, distribusi sentimen, sentimen per sumber, dan word cloud keyword.
                                         

                        ### 🔄 Logika Aplikasi:
                        - ✅ Jika file analisis sudah ada → tampilkan visualisasi langsung
                        - 🔄 Jika file analisis belum ada → lakukan analisis baru
                        - ⚠️ Filter tanggal hanya aktif setelah analisis selesai
                        - 🔍 Apply filter akan menganalisis ulang data yang difilter
                        """)
        
            with gr.Column(scale=1):
                recap_output = gr.Textbox(
                    label="📈 Rekap Analisis",
                    lines=23,
                    max_lines=25,
                    interactive=False
                )
        
        # State untuk menyimpan data
        min_date_state = gr.State(value=min_date)
        max_date_state = gr.State(value=max_date)
        
        with gr.Row():
            with gr.Column():
                wordcloud_plot = gr.Plot(label="☁️ Word Cloud Keyword")
                
            with gr.Column():
                pie_chart = gr.Plot(label="🥧 Distribusi Sentimen")
            with gr.Column():
                pie_chart2 = gr.Plot(label="🥧 Distribusi Sumber Sentimen")
        
        with gr.Row():
            with gr.Column():
                source_plot = gr.Plot(label="📊 Sentimen per Sumber")
            with gr.Column():
                time_plot = gr.Plot(label="📅 Trend Sentimen per Tanggal")
        
        # Event handlers
        analyze_btn.click(
            fn=analyze_articles,
            inputs=[source_dropdown],
            outputs=[
                time_plot, wordcloud_plot, pie_chart, pie_chart2, source_plot, recap_output,
                min_date_state, max_date_state, start_date, end_date, apply_filter_btn
            ]
        )
        
        apply_filter_btn.click(
            fn=apply_date_filter,
            inputs=[start_date, end_date, source_dropdown],
            outputs=[time_plot, wordcloud_plot, pie_chart, pie_chart2, source_plot, recap_output]
        )
    
    return demo

if __name__ == "__main__":
    print("Menjalankan aplikasi Analisis Sentimen Artikel...")
    print("Pastikan file 'scraped_articles.csv' ada di direktori yang sama")
    print("Aplikasi akan tersedia di http://localhost:7860")
    
    try:
        demo = setup_gradio_interface()
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False,
            show_error=True
        )
    except Exception as e:
        print(f"Error launching app: {e}")