Spaces:

yureeets123
/

llm-agent

Sleeping

App Files Files Community

= commited on Dec 18, 2025

Commit

d40b9df

0 Parent(s):

Initial commit

Browse files

Files changed (8) hide show

.gitignore +50 -0
README.md +55 -0
agent_functions.py +208 -0
app.py +167 -0
dataset_handler.py +249 -0
llm_agent.py +221 -0
llm_client.py +73 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,50 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Data files
+*.json
+students.json
+exams.json
+topics.json
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+# OS
+.DS_Store
+Thumbs.db
+# Environment variables
+.env
+.env.local
+# API keys (if stored)
+*.key
+api_key.txt

README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+## Що було зроблено
+Цей проєкт реалізує LLM-агента з набором інструментів для роботи з датасетом сільськогосподарських досліджень. Ось що було реалізовано:
+### Основні компоненти:
+1. **LLM-агент з функціональним викликом (Function Calling)**
+   - Реалізовано клас `AgriculturalAgent` в `llm_agent.py`, який використовує LiteLLM для взаємодії з Groq API
+   - Агент підтримує багатоітераційну обробку запитів з автоматичним викликом функцій
+   - Реалізовано систему управління історією розмови для контекстної обробки запитів
+2. **Набір інструментів (Tools/Functions)**
+   - `search_agricultural_documents` - пошук документів за ключовими словами
+   - `get_document_details` - отримання детальної інформації про конкретний документ
+   - `browse_topics` - перегляд документів за темами або випадковий вибір
+   - `get_dataset_info` - отримання інформації про датасет
+   - Всі функції реалізовані в `agent_functions.py` з описом параметрів для LLM
+3. **Обробка датасету**
+   - Реалізовано `DatasetHandler` в `dataset_handler.py` для роботи з датасетом CGIAR
+   - Підтримка streaming-режиму для ефективної роботи з великим датасетом (45,232 документів)
+   - Реалізовано пошук за ключовими словами в заголовках, анотаціях та ключових словах
+   - Обробка помилок та обмеження часу виконання пошуку
+4. **Веб-інтерфейс**
+   - Створено інтерфейс на базі Gradio (`app.py`)
+   - Поле для введення API ключа (без зберігання в коді)
+   - Чат-інтерфейс для взаємодії з агентом
+   - Інформаційні блоки з описом можливостей та прикладами використання
+5. **Інтеграція з LLM**
+   - Використання LiteLLM для уніфікованої роботи з різними LLM провайдерами
+   - Налаштування системних промптів для спеціалізації агента на сільськогосподарській тематиці
+   - Автоматичне визначення необхідності виклику функцій на основі запиту користувача
+### Технічні особливості:
+- **Streaming режим**: Датасет завантажується в режимі streaming для швидшого старту та економії пам'яті
+- **Обробка помилок**: Реалізовано обробку помилок на всіх рівнях (завантаження датасету, пошук, виклики LLM)
+- **Обмеження пошуку**: Для streaming-режиму встановлено обмеження на кількість перевірених документів (300) для швидкої відповіді
+- **Багатоітераційна обробка**: Агент може виконувати кілька викликів функцій підряд для повної відповіді на запит
+- **Захист від циклів**: Реалізовано захист від нескінченних циклів при однакових викликах функцій
+### Відмінності від запропонованого варіанту:
+Замість агента для прийняття іспиту з курсу NLP, було обрано реалізацію агента для роботи з датасетом сільськогосподарських досліджень, що дозволяє продемонструвати:
+- Роботу з реальним великим датасетом
+- Пошук та фільтрацію інформації
+- Структуровану роботу з документами
+- Практичне застосування LLM-агентів для інформаційного пошуку
+## Acknowledgments
+- Dataset: [CGIAR/gardian-ai-ready-docs](https://huggingface.co/datasets/CGIAR/gardian-ai-ready-docs)

agent_functions.py ADDED Viewed

	@@ -0,0 +1,208 @@

+from dataset_handler import DatasetHandler
+from typing import List, Dict, Optional
+dataset_handler = DatasetHandler(use_streaming=True)
+def search_agricultural_documents(keyword: str, limit: int = 5) -> str:
+    """
+    Search for agricultural research documents by keyword.
+    This function searches the CGIAR dataset for documents containing the specified keyword
+    in their title, abstract, or keywords. Use this when the user asks about specific
+    agricultural topics, crops, techniques, or concepts.
+    Args:
+        keyword: The search keyword (e.g., "rice", "pest control", "climate adaptation")
+        limit: Maximum number of documents to return (default: 5)
+    Returns:
+        A formatted string containing information about matching documents
+    """
+    try:
+        print(f"[FUNCTION] Searching for '{keyword}' (limit: {limit})...")
+        results = dataset_handler.search_by_keyword(keyword, limit)
+        if not results:
+            return f"No documents found matching '{keyword}' after searching the dataset. The search may have been limited due to network timeouts. Try a different search term or a more specific keyword."
+        response = f"Found {len(results)} document(s) matching '{keyword}':\n\n"
+        for i, doc in enumerate(results, 1):
+            response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n"
+        return response
+    except Exception as e:
+        error_msg = str(e)
+        if "timeout" in error_msg.lower() or "timed out" in error_msg.lower():
+            return f"Search timed out while accessing the dataset. This can happen when the dataset is under heavy load. Please try again in a moment or use a more specific search term."
+        return f"Error searching documents: {error_msg}"
+def get_document_details(title: str) -> str:
+    """
+    Get detailed information about a specific document by its title.
+    Use this function when the user asks for more details about a specific research paper
+    or document that was mentioned in previous search results.
+    Args:
+        title: The exact title of the document
+    Returns:
+        Detailed information about the document including chapters and figures
+    """
+    try:
+        doc = dataset_handler.get_document_by_title(title)
+        if not doc:
+            return f"Document with title '{title}' not found. Please check the title and try again."
+        response = f"**Document Details:**\n\n"
+        response += dataset_handler.format_document_summary(doc)
+        # Add chapter information
+        if doc.get('chapters'):
+            response += f"\n**Chapters:** {len(doc['chapters'])} chapters found\n"
+            for i, chapter in enumerate(doc['chapters'][:5], 1):  # Show first 5 chapters
+                response += f"  {i}. {chapter.get('head', 'Untitled')}\n"
+        # Add figures information
+        if doc.get('figures'):
+            response += f"\n**Figures/Tables:** {len(doc['figures'])} found\n"
+        return response
+    except Exception as e:
+        return f"Error retrieving document: {str(e)}"
+def browse_topics(topic: str = None) -> str:
+    """
+    Browse agricultural research documents by topic.
+    Common topics include: crop management, pest control, climate adaptation,
+    farming systems, soil management, water management, sustainable agriculture,
+    small-scale farming, agricultural extension, food security.
+    Args:
+        topic: Optional specific topic to browse. If None, returns random documents.
+    Returns:
+        Information about documents related to the topic
+    """
+    try:
+        if topic:
+            results = dataset_handler.search_by_topic(topic, limit=5)
+            if not results:
+                return f"No documents found for topic '{topic}'. Try a different topic."
+            response = f"Documents related to '{topic}':\n\n"
+            for i, doc in enumerate(results, 1):
+                response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n"
+        else:
+            results = dataset_handler.get_random_documents(limit=3)
+            response = "Sample agricultural research documents:\n\n"
+            for i, doc in enumerate(results, 1):
+                response += f"{i}. {dataset_handler.format_document_summary(doc)}\n\n"
+        return response
+    except Exception as e:
+        return f"Error browsing topics: {str(e)}"
+def get_dataset_info() -> str:
+    """
+    Get information about the dataset.
+    Returns:
+        Information about the CGIAR dataset
+    """
+    try:
+        if not dataset_handler.loaded:
+            dataset_handler.load_dataset()
+        if dataset_handler.use_streaming:
+            total_docs = "45,232+ (streaming mode)"
+        else:
+            total_docs = f"{len(dataset_handler.dataset):,}"
+        return f"""**CGIAR Agricultural Research Dataset**
+This dataset contains {total_docs} agricultural research publications from CGIAR,
+specifically processed for AI applications in agricultural advisory services.
+**Dataset Features:**
+- Comprehensive collection of agricultural research papers
+- Topics include: crop management, pest control, climate adaptation, farming systems,
+  soil management, water management, sustainable agriculture, and more
+- Documents are structured with metadata, abstracts, keywords, chapters, and figures
+- Focus on small-scale producer contexts in low and middle-income countries
+**Source:** GARDIAN (CGIAR's agri-food data hub)
+**License:** CC-BY-4.0
+**Note:** Dataset is loaded in streaming mode for faster access.
+"""
+    except Exception as e:
+        return f"Error getting dataset info: {str(e)}"
+# List of available functions for the LLM agent
+AVAILABLE_FUNCTIONS = {
+    "search_agricultural_documents": {
+        "function": search_agricultural_documents,
+        "description": "Search for agricultural research documents by keyword. Use when user asks about specific topics, crops, or agricultural concepts.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "keyword": {
+                    "type": "string",
+                    "description": "The search keyword (e.g., 'rice', 'pest control', 'climate adaptation')"
+                },
+                "limit": {
+                    "type": "integer",
+                    "description": "Maximum number of documents to return (default: 5)",
+                    "default": 5
+                }
+            },
+            "required": ["keyword"]
+        }
+    },
+    "get_document_details": {
+        "function": get_document_details,
+        "description": "Get detailed information about a specific document by its exact title. Use when user asks for more details about a specific paper.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "title": {
+                    "type": "string",
+                    "description": "The exact title of the document"
+                }
+            },
+            "required": ["title"]
+        }
+    },
+    "browse_topics": {
+        "function": browse_topics,
+        "description": "Browse documents by agricultural topic or get random sample documents. Common topics: crop management, pest control, climate adaptation, farming systems, etc.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "topic": {
+                    "type": "string",
+                    "description": "Optional specific topic to browse. If not provided, returns random documents."
+                }
+            },
+            "required": []
+        }
+    },
+    "get_dataset_info": {
+        "function": get_dataset_info,
+        "description": "Get information about the CGIAR dataset itself. Use when user asks about the dataset, its size, or what it contains.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": []
+        }
+    }
+}

app.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import gradio as gr
+from llm_agent import AgriculturalAgent
+import os
+agent = None
+def initialize_agent(api_key: str) -> str:
+    """
+    Initialize the agent with the provided API key.
+    Args:
+        api_key: Groq API key
+    Returns:
+        Status message
+    """
+    global agent
+    if not api_key or not api_key.strip():
+        return "Please enter a valid API key."
+    try:
+        agent = AgriculturalAgent(api_key=api_key.strip())
+        return "Agent initialized successfully! Dataset loaded and ready. You can now start asking questions."
+    except Exception as e:
+        return f"Error initializing agent: {str(e)}"
+def chat_with_agent(message: str, history: list, api_key: str) -> tuple:
+    """
+    Handle chat messages with the agent.
+    Args:
+        message: User's message
+        history: Chat history (Gradio format: list of tuples [(user_msg, assistant_msg), ...])
+        api_key: Groq API key
+    Returns:
+        Tuple of (empty string, updated history in Gradio format)
+    """
+    global agent
+    print(f"\n[APP] Received message: {message[:100]}...")
+    if agent is None:
+        if not api_key or not api_key.strip():
+            history.append((message, "Please initialize the agent with an API key first."))
+            return "", history
+        print("[APP] Initializing agent...")
+        init_msg = initialize_agent(api_key)
+        if "Error" in init_msg:
+            history.append((message, init_msg))
+            return "", history
+    if agent is None:
+        history.append((message, "Please initialize the agent with an API key first."))
+        return "", history
+    print("[APP] Sending message to agent...")
+    response = agent.chat(message)
+    print(f"[APP] Received response from agent: {response[:100]}...")
+    history.append((message, response))
+    return "", history
+def reset_chat():
+    """Reset the chat conversation."""
+    global agent
+    if agent:
+        agent.reset_conversation()
+    return []
+with gr.Blocks(title="Agricultural Research AI Agent") as demo:
+    gr.Markdown("""
+    # 🌾 Agricultural Research AI Agent
+    This AI agent helps you explore and find information from a comprehensive collection of
+    **45,232 agricultural research publications** from CGIAR (Consultative Group on International Agricultural Research).
+    ## Features
+    - 🔍 Search for research documents on specific agricultural topics
+    - 📚 Browse documents by topic (crop management, pest control, climate adaptation, etc.)
+    - 📄 Get detailed information about specific research papers
+    - 💡 Get insights and answers based on the latest agricultural research
+    ## How to Use
+    1. Enter your Groq API key in the field below (get a free key at https://console.groq.com)
+    2. Click "Initialize Agent" to start
+    3. Ask questions about agricultural topics, search for documents, or browse research papers
+    **Example questions:**
+    - "What research is available on rice cultivation?"
+    - "Find documents about pest control in agriculture"
+    - "Tell me about climate adaptation strategies for small-scale farmers"
+    - "What does the dataset contain?"
+    ---
+    """)
+    with gr.Row():
+        with gr.Column(scale=3):
+            api_key_input = gr.Textbox(
+                label="Groq API Key",
+                placeholder="Enter your Groq API key here...",
+                type="password",
+                info="Your API key is not stored and is only used for this session. Get a free API key at https://console.groq.com"
+            )
+            init_btn = gr.Button("Initialize Agent", variant="primary")
+            init_status = gr.Textbox(label="Status", interactive=False)
+        with gr.Column(scale=1):
+            reset_btn = gr.Button("Reset Chat", variant="secondary")
+    chatbot = gr.Chatbot(
+        label="Chat with Agricultural Research Agent",
+        height=500
+    )
+    with gr.Row():
+        msg = gr.Textbox(
+            label="Your Question",
+            placeholder="Ask about agricultural research, search for documents, or browse topics...",
+            scale=4,
+            lines=2
+        )
+        submit_btn = gr.Button("Send", variant="primary", scale=1)
+    init_btn.click(
+        fn=initialize_agent,
+        inputs=[api_key_input],
+        outputs=[init_status]
+    )
+    submit_btn.click(
+        fn=chat_with_agent,
+        inputs=[msg, chatbot, api_key_input],
+        outputs=[msg, chatbot]
+    )
+    msg.submit(
+        fn=chat_with_agent,
+        inputs=[msg, chatbot, api_key_input],
+        outputs=[msg, chatbot]
+    )
+    reset_btn.click(
+        fn=reset_chat,
+        outputs=[chatbot]
+    )
+    gr.Markdown("""
+    ---
+    **Dataset Information:**
+    - Source: [CGIAR/gardian-ai-ready-docs](https://huggingface.co/datasets/CGIAR/gardian-ai-ready-docs) on HuggingFace
+    - Contains 45,232 structured agricultural research publications
+    - Topics: Crop management, pest control, climate adaptation, farming systems, and more
+    **Note:** This application uses Groq's free API for the LLM agent. Get your free API key at [console.groq.com](https://console.groq.com)
+    """)
+if __name__ == "__main__":
+    demo.launch(share=False, server_name="0.0.0.0", server_port=7860, theme=gr.themes.Soft())

dataset_handler.py ADDED Viewed

	@@ -0,0 +1,249 @@

+from datasets import load_dataset
+from typing import List, Dict, Optional
+import json
+class DatasetHandler:
+    """Handles loading and searching the CGIAR agricultural dataset."""
+    def __init__(self, use_streaming: bool = True, max_samples: Optional[int] = None):
+        """
+        Initialize dataset handler.
+        Args:
+            use_streaming: If True, use streaming mode (faster, doesn't download all files)
+            max_samples: Maximum number of samples to load (None = all, for testing use smaller number)
+        """
+        self.dataset = None
+        self.loaded = False
+        self.use_streaming = use_streaming
+        self.max_samples = max_samples
+    def load_dataset(self):
+        """Load the CGIAR dataset from HuggingFace."""
+        if not self.loaded:
+            try:
+                print("Loading CGIAR dataset from HuggingFace (this may take a moment)...")
+                if self.use_streaming:
+                    self.dataset = load_dataset(
+                        "CGIAR/gardian-ai-ready-docs",
+                        split="train",
+                        streaming=True
+                    )
+                    print("Dataset loaded in streaming mode (lazy loading - files downloaded on-demand only)")
+                else:
+                    if self.max_samples:
+                        self.dataset = load_dataset(
+                            "CGIAR/gardian-ai-ready-docs",
+                            split=f"train[:{self.max_samples}]"
+                        )
+                        print(f"Dataset loaded successfully! Loaded {len(self.dataset)} documents (sample)")
+                    else:
+                        self.dataset = load_dataset("CGIAR/gardian-ai-ready-docs", split="train")
+                        print(f"Dataset loaded successfully! Total documents: {len(self.dataset)}")
+                self.loaded = True
+            except Exception as e:
+                print(f"Error loading dataset: {e}")
+                raise
+        return self.dataset
+    def search_by_keyword(self, keyword: str, limit: int = 5) -> List[Dict]:
+        """
+        Search documents by keyword in title, abstract, or keywords.
+        Args:
+            keyword: Search keyword
+            limit: Maximum number of results to return
+        Returns:
+            List of matching documents
+        """
+        if not self.loaded:
+            self.load_dataset()
+        keyword_lower = keyword.lower()
+        results = []
+        checked = 0
+        max_to_check = 300 if self.use_streaming else None
+        consecutive_errors = 0
+        max_consecutive_errors = 3
+        try:
+            for doc in self.dataset:
+                try:
+                    checked += 1
+                    # Show progress every 100 documents
+                    if checked % 100 == 0:
+                        print(f"[DATASET] Checked {checked} documents, found {len(results)} matches so far...")
+                    if max_to_check and checked > max_to_check:
+                        print(f"[DATASET] Reached search limit of {max_to_check} documents")
+                        break
+                    # Search in title
+                    title = doc.get('title', '').lower()
+                    # Search in abstract
+                    abstract = doc.get('abstract', '').lower()
+                    # Search in keywords
+                    keywords = ' '.join(doc.get('keywords', [])).lower()
+                    if keyword_lower in title or keyword_lower in abstract or keyword_lower in keywords:
+                        results.append({
+                            'title': doc.get('title', ''),
+                            'abstract': doc.get('abstract', ''),
+                            'keywords': doc.get('keywords', []),
+                            'url': doc.get('metadata', {}).get('url', ''),
+                            'source': doc.get('metadata', {}).get('source', ''),
+                            'pageCount': doc.get('pageCount', 0)
+                        })
+                        consecutive_errors = 0  # Reset on success
+                        if len(results) >= limit:
+                            break
+                except Exception as e:
+                    consecutive_errors += 1
+                    if consecutive_errors >= max_consecutive_errors:
+                        print(f"[DATASET] Too many consecutive errors ({consecutive_errors}), stopping search")
+                        break
+                    # Continue to next document
+                    continue
+        except Exception as e:
+            print(f"[DATASET] Error during search: {e}")
+            # Return partial results if available
+        if results:
+            print(f"[DATASET] Found {len(results)} results after checking {checked} documents")
+        else:
+            print(f"[DATASET] No results found after checking {checked} documents")
+        return results
+    def search_by_topic(self, topic: str, limit: int = 5) -> List[Dict]:
+        """
+        Search documents by agricultural topic.
+        Args:
+            topic: Agricultural topic (e.g., "crop management", "pest control")
+            limit: Maximum number of results to return
+        Returns:
+            List of matching documents
+        """
+        return self.search_by_keyword(topic, limit)
+    def get_document_by_title(self, title: str) -> Optional[Dict]:
+        """
+        Retrieve a specific document by its title.
+        Args:
+            title: Document title
+        Returns:
+            Document data or None if not found
+        """
+        if not self.loaded:
+            self.load_dataset()
+        title_lower = title.lower()
+        checked = 0
+        max_to_check = 300 if self.use_streaming else None  # Very aggressive limit
+        consecutive_errors = 0
+        max_consecutive_errors = 3
+        try:
+            for doc in self.dataset:
+                try:
+                    checked += 1
+                    if max_to_check and checked > max_to_check:
+                        break
+                    if doc.get('title', '').lower() == title_lower:
+                        return {
+                            'title': doc.get('title', ''),
+                            'abstract': doc.get('abstract', ''),
+                            'keywords': doc.get('keywords', []),
+                            'chapters': doc.get('chapters', []),
+                            'figures': doc.get('figures', []),
+                            'url': doc.get('metadata', {}).get('url', ''),
+                            'source': doc.get('metadata', {}).get('source', ''),
+                            'pageCount': doc.get('pageCount', 0)
+                        }
+                except Exception as e:
+                    consecutive_errors += 1
+                    if consecutive_errors >= max_consecutive_errors:
+                        break
+                    continue
+        except Exception as e:
+            print(f"[DATASET] Error searching for document: {e}")
+        return None
+    def get_random_documents(self, limit: int = 3) -> List[Dict]:
+        """
+        Get random documents from the dataset.
+        Args:
+            limit: Number of documents to return
+        Returns:
+            List of random documents
+        """
+        if not self.loaded:
+            self.load_dataset()
+        import random
+        results = []
+        if self.use_streaming:
+            count = 0
+            for doc in self.dataset:
+                if count >= limit:
+                    break
+                results.append({
+                    'title': doc.get('title', ''),
+                    'abstract': doc.get('abstract', ''),
+                    'keywords': doc.get('keywords', []),
+                    'url': doc.get('metadata', {}).get('url', ''),
+                    'source': doc.get('metadata', {}).get('source', ''),
+                    'pageCount': doc.get('pageCount', 0)
+                })
+                count += 1
+        else:
+            indices = random.sample(range(len(self.dataset)), min(limit, len(self.dataset)))
+            for idx in indices:
+                doc = self.dataset[idx]
+                results.append({
+                    'title': doc.get('title', ''),
+                    'abstract': doc.get('abstract', ''),
+                    'keywords': doc.get('keywords', []),
+                    'url': doc.get('metadata', {}).get('url', ''),
+                    'source': doc.get('metadata', {}).get('source', ''),
+                    'pageCount': doc.get('pageCount', 0)
+                })
+        return results
+    def format_document_summary(self, doc: Dict) -> str:
+        """
+        Format a document for display in the chat.
+        Args:
+            doc: Document dictionary
+        Returns:
+            Formatted string representation
+        """
+        summary = f"**Title:** {doc.get('title', 'N/A')}\n"
+        summary += f"**Abstract:** {doc.get('abstract', 'N/A')[:500]}...\n"
+        if doc.get('keywords'):
+            summary += f"**Keywords:** {', '.join(doc.get('keywords', []))}\n"
+        summary += f"**Source:** {doc.get('source', 'N/A')}\n"
+        if doc.get('url'):
+            summary += f"**URL:** {doc.get('url')}\n"
+        return summary

llm_agent.py ADDED Viewed

	@@ -0,0 +1,221 @@

+import json
+from typing import List, Dict, Optional
+from litellm import completion
+from agent_functions import AVAILABLE_FUNCTIONS, dataset_handler
+from llm_client import LLMClient
+class AgriculturalAgent:
+    """LLM Agent for answering questions about agricultural research using the CGIAR dataset."""
+    def __init__(self, api_key: str, model: str = "groq/llama-3.3-70b-versatile"):
+        """
+        Initialize the agent.
+        Args:
+            api_key: Groq API key
+            model: Model to use (default: groq/llama-3.3-70b-versatile)
+        """
+        self.llm_client = LLMClient(model=model, api_key=api_key)
+        self.model = model
+        self.conversation_history: List[Dict] = []
+        print("Initializing dataset...")
+        dataset_handler.load_dataset()
+        print("Dataset ready!")
+    def get_tools_schema(self) -> List[Dict]:
+        """Get the tools schema for function calling."""
+        tools = []
+        for func_name, func_info in AVAILABLE_FUNCTIONS.items():
+            tools.append({
+                "type": "function",
+                "function": {
+                    "name": func_name,
+                    "description": func_info["description"],
+                    "parameters": func_info["parameters"]
+                }
+            })
+        return tools
+    def call_function(self, function_name: str, arguments: Dict) -> str:
+        """
+        Call a function by name with the provided arguments.
+        Args:
+            function_name: Name of the function to call
+            arguments: Arguments to pass to the function
+        Returns:
+            Function result as string
+        """
+        if function_name not in AVAILABLE_FUNCTIONS:
+            return f"Error: Function '{function_name}' not found."
+        func = AVAILABLE_FUNCTIONS[function_name]["function"]
+        try:
+            result = func(**arguments)
+            return result
+        except Exception as e:
+            return f"Error calling function {function_name}: {str(e)}"
+    def chat(self, user_message: str, system_message: Optional[str] = None) -> str:
+        """
+        Process a user message and return a response.
+        Args:
+            user_message: The user's message
+            system_message: Optional system message to override default
+        Returns:
+            Agent's response
+        """
+        print(f"[AGENT] Processing user message: {user_message[:100]}...")
+        if system_message is None:
+            system_message = """You are an AI assistant specialized in agricultural research.
+You have access to a comprehensive dataset of agricultural research publications from CGIAR.
+Your role is to:
+1. Help users find relevant agricultural research documents
+2. Answer questions about agricultural topics using information from the dataset
+3. Provide insights based on the research papers available
+When a user asks a question:
+- If they ask about a specific topic, crop, or agricultural concept, use the search_agricultural_documents function
+- If they want to browse topics, use the browse_topics function
+- If they ask about a specific document, use get_document_details
+- If they ask about the dataset itself, use get_dataset_info
+Always be helpful, accurate, and cite the sources when providing information from the dataset.
+If you don't have enough information, suggest searching for more specific documents."""
+        self.conversation_history.append({
+            "role": "user",
+            "content": user_message
+        })
+        messages = [{"role": "system", "content": system_message}]
+        messages.extend(self.conversation_history)
+        tools = self.get_tools_schema()
+        print(f"[AGENT] Calling LLM API with model: {self.model}")
+        try:
+            max_iterations = 5
+            iteration = 0
+            last_tool_calls = []
+            tool_messages = []
+            while iteration < max_iterations:
+                iteration += 1
+                print(f"[AGENT] LLM API call iteration {iteration}...")
+                response = completion(
+                    model=self.model,
+                    messages=messages,
+                    tools=tools,
+                    tool_choice="auto",
+                    temperature=0.1,
+                    max_tokens=2048
+                )
+                message = response.choices[0].message
+                if hasattr(message, 'tool_calls') and message.tool_calls:
+                    print(f"[AGENT] LLM requested {len(message.tool_calls)} tool call(s)")
+                    current_tool_calls = [(tc.function.name if hasattr(tc, 'function') else tc.get('function', {}).get('name'),
+                                          tc.function.arguments if hasattr(tc, 'function') else tc.get('function', {}).get('arguments', '{}'))
+                                         for tc in message.tool_calls]
+                    if iteration > 1 and current_tool_calls == last_tool_calls:
+                        print(f"[AGENT] Warning: Same tool calls detected, breaking loop")
+                        if tool_messages:
+                            assistant_response = tool_messages[-1].get('content', '')[:500] + "..."
+                        else:
+                            assistant_response = "I encountered an issue processing your request. Please try rephrasing your question."
+                        break
+                    last_tool_calls = current_tool_calls
+                    tool_calls_data = []
+                    for tc in message.tool_calls:
+                        tool_calls_data.append({
+                            "id": tc.id if hasattr(tc, 'id') else str(hash(str(tc))),
+                            "type": tc.type if hasattr(tc, 'type') else "function",
+                            "function": {
+                                "name": tc.function.name if hasattr(tc, 'function') else tc.get('function', {}).get('name'),
+                                "arguments": tc.function.arguments if hasattr(tc, 'function') else tc.get('function', {}).get('arguments', '{}')
+                            }
+                        })
+                    self.conversation_history.append({
+                        "role": "assistant",
+                        "content": message.content or "",
+                        "tool_calls": tool_calls_data
+                    })
+                    tool_messages = []
+                    for tc in message.tool_calls:
+                        function_name = tc.function.name if hasattr(tc, 'function') else tc.get('function', {}).get('name')
+                        try:
+                            arguments_str = tc.function.arguments if hasattr(tc, 'function') else tc.get('function', {}).get('arguments', '{}')
+                            if arguments_str is None or arguments_str == '' or arguments_str == 'null':
+                                arguments = {}
+                            else:
+                                arguments = json.loads(arguments_str)
+                        except (json.JSONDecodeError, AttributeError, TypeError) as e:
+                            print(f"[AGENT] Warning: Failed to parse arguments: {e}, using empty dict")
+                            arguments = {}
+                        print(f"[AGENT] Calling function: {function_name} with args: {arguments}")
+                        function_result = self.call_function(function_name, arguments)
+                        print(f"[AGENT] Function {function_name} returned result (length: {len(function_result)} chars)")
+                        tool_call_id = tc.id if hasattr(tc, 'id') else str(hash(str(tc)))
+                        tool_messages.append({
+                            "role": "tool",
+                            "tool_call_id": tool_call_id,
+                            "name": function_name,
+                            "content": function_result
+                        })
+                    self.conversation_history.extend(tool_messages)
+                    messages = [{"role": "system", "content": system_message}]
+                    messages.extend(self.conversation_history)
+                else:
+                    assistant_response = message.content or "I apologize, but I couldn't generate a response."
+                    print(f"[AGENT] Generated response (length: {len(assistant_response)} chars)")
+                    self.conversation_history.append({
+                        "role": "assistant",
+                        "content": assistant_response
+                    })
+                    return assistant_response
+            if tool_messages:
+                print(f"[AGENT] Max iterations reached, returning last tool result")
+                assistant_response = tool_messages[-1].get('content', '')
+                self.conversation_history.append({
+                    "role": "assistant",
+                    "content": assistant_response
+                })
+                return assistant_response
+            return "I apologize, but I encountered an issue processing your request. Please try again."
+        except Exception as e:
+            error_msg = f"Error in chat: {str(e)}"
+            print(f"[AGENT] ERROR: {error_msg}")
+            self.conversation_history.append({
+                "role": "assistant",
+                "content": error_msg
+            })
+            return error_msg
+    def reset_conversation(self):
+        """Reset the conversation history."""
+        self.conversation_history = []

llm_client.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+from typing import Optional
+from litellm import completion
+from dotenv import load_dotenv
+load_dotenv()
+class LLMClient:
+    """LLM client using LiteLLM"""
+    def __init__(
+        self,
+        model: str = "groq/llama-3.3-70b-versatile",
+        api_key: Optional[str] = None,
+        temperature: float = 0.1
+    ):
+        """
+        Initialize LLM client
+        Args:
+            model: Model identifier (e.g., "groq/llama-3.3-70b-versatile")
+            api_key: API key (if None, uses GROQ_API_KEY env var)
+            temperature: Sampling temperature
+        """
+        self.model = model
+        self.temperature = temperature
+        if api_key:
+            os.environ["GROQ_API_KEY"] = api_key
+        elif "GROQ_API_KEY" not in os.environ:
+            api_key = os.getenv("GROQ_API_KEY")
+            if not api_key:
+                raise ValueError(
+                    "GROQ_API_KEY not found. Please set it as environment variable "
+                    "or pass as api_key parameter. Get free key from https://console.groq.com/"
+                )
+    def generate(
+        self,
+        prompt: str,
+        max_tokens: int = 512,
+        system_prompt: Optional[str] = None
+    ) -> str:
+        """
+        Generate text using LLM
+        Args:
+            prompt: User prompt
+            max_tokens: Maximum tokens to generate
+            system_prompt: Optional system prompt
+        Returns:
+            Generated text
+        """
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        messages.append({"role": "user", "content": prompt})
+        try:
+            response = completion(
+                model=self.model,
+                messages=messages,
+                temperature=self.temperature,
+                max_tokens=max_tokens
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            raise Exception(f"Error calling LLM: {str(e)}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.0.0
+litellm>=1.0.0
+python-dotenv>=1.0.0
+datasets>=2.14.0
+huggingface-hub>=0.16.0