import pandas as pd import gradio as gr from datetime import datetime, timedelta import os from data_manager import data_manager from database_manager import db_manager from analisis_media import media_analyzer import plotly.express as px import plotly.graph_objects as go # ---------------------------- # 🔹 Fungsi Validasi Tanggal # ---------------------------- def validate_date(date_string): """Validasi format tanggal YYYY-MM-DD""" try: datetime.strptime(date_string, '%Y-%m-%d') return True except ValueError: return False # ---------------------------- # 🔹 Fungsi untuk mendapatkan range tanggal tersedia # ---------------------------- def get_date_range_from_cache(): """Mendapatkan tanggal minimum dan maksimum dari cache""" cache_info = data_manager.get_cached_data_info() if cache_info: # Ambil data terbaru dari cache latest_cache = max(cache_info, key=lambda x: x['timestamp']) if 'filters' in latest_cache: filters = latest_cache['filters'] min_date = filters.get('start_date', (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')) max_date = filters.get('end_date', datetime.now().strftime('%Y-%m-%d')) return min_date, max_date # Default values jika tidak ada cache min_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') max_date = datetime.now().strftime('%Y-%m-%d') return min_date, max_date # ---------------------------- # 🔹 Fungsi Gradio - Data Management # ---------------------------- def load_and_cache_data(start_date, end_date): """Load data dari database dan simpan ke cache""" # Validasi format tanggal if start_date and not validate_date(start_date): return pd.DataFrame({"Error": [f"Format tanggal mulai tidak valid: {start_date}"]}), "❌ Format tanggal tidak valid" if end_date and not validate_date(end_date): return pd.DataFrame({"Error": [f"Format tanggal akhir tidak valid: {end_date}"]}), "❌ Format tanggal tidak valid" # Validasi logika tanggal if start_date and end_date and start_date > end_date: return pd.DataFrame({"Error": ["Tanggal mulai tidak boleh lebih besar dari tanggal akhir"]}), "❌ Tanggal tidak valid" # Coba load dari cache dulu cached_data = data_manager.load_from_cache(start_date, end_date) if cached_data is not None: data_manager.current_data = cached_data data_manager.current_filters = {'start_date': start_date, 'end_date': end_date} return cached_data, f"✅ Data loaded from cache ({len(cached_data)} rows)" # Jika tidak ada di cache, load dari database progress_msg = "🔄 Connecting to database..." yield pd.DataFrame(), progress_msg df = db_manager.load_articles(start_date, end_date) if "Error" in df.columns: return df, "❌ Error loading data from database" if len(df) == 0: return df, "⚠️ No data found for the selected date range" progress_msg = f"✅ Loaded {len(df)} rows from database. Saving to cache..." yield df, progress_msg # Simpan ke cache data_manager.save_to_cache(df, start_date, end_date) data_manager.current_data = df data_manager.current_filters = {'start_date': start_date, 'end_date': end_date} yield df, f"✅ Data loaded and cached successfully ({len(df)} rows)" def load_data_only(start_date, end_date): """Load data tanpa progress update (untuk event handlers)""" result = list(load_and_cache_data(start_date, end_date))[-1] return result def show_cached_data_info(): """Tampilkan informasi data yang ada di cache""" cache_info = data_manager.get_cached_data_info() if not cache_info: return "📭 No cached data available" info_text = "## 📂 Cached Data Information\n\n" for i, cache in enumerate(cache_info, 1): info_text += f"**Cache {i}:**\n" info_text += f"- File: `{cache['filename']}`\n" info_text += f"- Size: {cache['data_shape']}\n" info_text += f"- Date: {cache['timestamp'].strftime('%Y-%m-%d %H:%M:%S')}\n" if cache['filters']: filters = cache['filters'] info_text += f"- Filters: {filters.get('start_date', 'None')} to {filters.get('end_date', 'None')}\n" info_text += "\n" return info_text def clear_all_cache(): """Hapus semua cache""" success = data_manager.clear_cache() if success: return "✅ All cache cleared successfully" else: return "❌ Failed to clear cache" # ---------------------------- # 🔹 Fungsi Analisis Media dengan Data Cached # ---------------------------- def perform_media_analysis_cached(): """Melakukan analisis media pada data yang sudah di-cache""" if data_manager.current_data is None: return pd.DataFrame(), pd.DataFrame(), "❌ No data available. Please load data first." try: df = data_manager.current_data if "Error" in df.columns: return df, pd.DataFrame(), "❌ Error in cached data" if len(df) == 0: return pd.DataFrame(), pd.DataFrame(), "❌ No data available for analysis" # Lakukan analisis media analysis_df, stats_df = media_analyzer.analyze_media(df) message = f"✅ Analysis completed! Processed {len(analysis_df)} articles from cached data." return analysis_df, stats_df, message except Exception as e: return pd.DataFrame(), pd.DataFrame(), f"❌ Error in analysis: {str(e)}" # ---------------------------- # 🔹 Fungsi Dashboard dengan Data Cached # ---------------------------- def create_dashboard_cached(): """Membuat dashboard dari data yang sudah di-cache""" if data_manager.current_data is None: return None, None, None, "❌ No data available. Please load data first." try: df = data_manager.current_data if "Error" in df.columns or len(df) == 0: return None, None, None, "❌ No valid data available for dashboard" # Analisis media untuk dashboard analysis_df, stats_df = media_analyzer.analyze_media(df) if len(analysis_df) == 0: return None, None, None, "❌ No data available after analysis" # 1. Chart distribusi emosi if 'emotion' in analysis_df.columns: emotion_fig = px.pie( analysis_df, names='emotion', title='Distribusi Emosi dalam Berita', color_discrete_sequence=px.colors.qualitative.Set3 ) emotion_fig.update_layout(height=400) else: emotion_fig = go.Figure() emotion_fig.add_annotation(text="No emotion data available", x=0.5, y=0.5, showarrow=False) # 2. Chart timeline artikel per sumber timeline_df = df.groupby([pd.to_datetime(df['date']).dt.date, 'source']).size().reset_index(name='count') timeline_fig = px.line( timeline_df, x='date', y='count', color='source', title='Timeline Artikel per Sumber Berita', labels={'date': 'Tanggal', 'count': 'Jumlah Artikel'} ) timeline_fig.update_layout(height=400) # 3. Chart distribusi sumber berita source_fig = px.bar( df['source'].value_counts().reset_index(), x='source', y='count', title='Distribusi Sumber Berita', labels={'source': 'Sumber Berita', 'count': 'Jumlah Artikel'} ) source_fig.update_layout(height=400) # Statistik dashboard stats_text = f""" ### 📊 Statistik Dashboard (Cached Data) - **Total Artikel**: {len(df)} - **Artikel Dianalisis**: {len(analysis_df)} - **Sumber Berita**: {df['source'].nunique()} - **Rentang Tanggal**: {df['date'].min()} hingga {df['date'].max()} - **Data Source**: Cached Data """ return emotion_fig, timeline_fig, source_fig, stats_text except Exception as e: return None, None, None, f"❌ Error creating dashboard: {str(e)}" # ---------------------------- # 🔹 Gradio Interface dengan Cache System # ---------------------------- with gr.Blocks(title="Media Analysis Dashboard with Cache") as demo: gr.Markdown("# 📰 Media Analysis Dashboard with Cache") gr.Markdown("**Fitur Cache**: Data di-load sekali dari database, disimpan lokal, dan bisa digunakan berulang tanpa koneksi database!") # Dapatkan range tanggal dari cache atau default min_date, max_date = get_date_range_from_cache() # Filter tanggal global with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 🔍 Filter Tanggal & Data Management") with gr.Row(): start_date = gr.Textbox( label="Tanggal Mulai (YYYY-MM-DD)", placeholder=f"Contoh: {min_date}", value=min_date, info="Format: YYYY-MM-DD" ) end_date = gr.Textbox( label="Tanggal Akhir (YYYY-MM-DD)", placeholder=f"Contoh: {max_date}", value=max_date, info="Format: YYYY-MM-DD" ) with gr.Row(): load_data_btn = gr.Button("📥 Load & Cache Data", variant="primary") clear_cache_btn = gr.Button("🗑️ Clear All Cache") cache_info_btn = gr.Button("📂 Show Cache Info") with gr.Row(): load_status = gr.Textbox( label="Status Load Data", interactive=False, value="Klik 'Load & Cache Data' untuk mengambil data dari database" ) with gr.Tabs(): # Tab 1: Tabel dan Metadata with gr.TabItem("📊 Tabel dan Metadata"): with gr.Row(): show_data_btn = gr.Button("📄 Tampilkan Data", variant="primary") with gr.Row(): output_table = gr.Dataframe( label="Data Artikel (Cached)", interactive=False, wrap=True ) # Tab 2: Analisis Media with gr.TabItem("🔍 Analisis Media"): gr.Markdown("### 🎯 Analisis Media dari Data Cached") gr.Markdown("Pastikan data sudah di-load terlebih dahulu dari tab sebelumnya") with gr.Row(): analyze_btn = gr.Button("🎯 Jalankan Analisis Media", variant="primary", size="lg") with gr.Row(): analysis_status = gr.Textbox( label="Status Analisis", interactive=False, value="Load data terlebih dahulu, lalu klik 'Jalankan Analisis Media'" ) with gr.Row(): analysis_results = gr.Dataframe( label="Hasil Analisis Media", interactive=False, wrap=True ) with gr.Row(): analysis_stats = gr.Dataframe( label="Statistik Analisis", interactive=False ) # Tab 3: Dashboard with gr.TabItem("📈 Dashboard"): gr.Markdown("### 📊 Dashboard dari Data Cached") gr.Markdown("Visualisasi data yang sudah di-cache tanpa perlu koneksi database") with gr.Row(): dashboard_btn = gr.Button("📊 Generate Dashboard", variant="primary", size="lg") with gr.Row(): dashboard_status = gr.Markdown("Load data terlebih dahulu, lalu klik 'Generate Dashboard'") with gr.Row(): with gr.Column(scale=1): emotion_chart = gr.Plot(label="Distribusi Emosi") with gr.Column(scale=2): timeline_chart = gr.Plot(label="Timeline Artikel") with gr.Row(): source_chart = gr.Plot(label="Distribusi Sumber Berita") with gr.Row(): dashboard_stats = gr.Markdown() # Tab 4: Cache Management with gr.TabItem("⚙️ Cache Management"): gr.Markdown("### 📦 Manajemen Cache Data") gr.Markdown("Kelola data yang disimpan secara lokal") with gr.Row(): refresh_cache_btn = gr.Button("🔄 Refresh Cache Info") clear_cache_tab_btn = gr.Button("🗑️ Clear All Cache") with gr.Row(): cache_info_display = gr.Markdown() # Event handlers load_data_btn.click( fn=load_and_cache_data, inputs=[start_date, end_date], outputs=[output_table, load_status] ) show_data_btn.click( fn=load_data_only, inputs=[start_date, end_date], outputs=[output_table, load_status] ) analyze_btn.click( fn=perform_media_analysis_cached, outputs=[analysis_results, analysis_stats, analysis_status] ) dashboard_btn.click( fn=create_dashboard_cached, outputs=[emotion_chart, timeline_chart, source_chart, dashboard_stats] ) cache_info_btn.click( fn=show_cached_data_info, outputs=cache_info_display ) refresh_cache_btn.click( fn=show_cached_data_info, outputs=cache_info_display ) clear_cache_btn.click( fn=clear_all_cache, outputs=load_status ) clear_cache_tab_btn.click( fn=clear_all_cache, outputs=cache_info_display ).then( fn=show_cached_data_info, outputs=cache_info_display ) # Tampilkan cache info saat pertama kali load demo.load( fn=show_cached_data_info, outputs=cache_info_display ) if __name__ == "__main__": # Load models NLP media_analyzer.load_models() # Jalankan aplikasi demo.launch(server_name="0.0.0.0", server_port=None)