frreiss commited on Feb 12

Commit

e76b4fb

1 Parent(s): 8f2dc00

import (#1)

Browse files

- Initial import (323ed628c3feba319c4a58219402155a3c4e4cd3)

Files changed (32) hide show

.gitattributes +3 -0
.gitignore +3 -0
answerability/README.md +186 -0
answerability/gpt-oss-20b/lora/adapter_config.json +45 -0
answerability/gpt-oss-20b/lora/adapter_model.safetensors +3 -0
answerability/gpt-oss-20b/lora/io.yaml +27 -0
citations/README.md +216 -0
citations/gpt-oss-20b/lora/adapter_config.json +36 -0
citations/gpt-oss-20b/lora/adapter_model.safetensors +3 -0
citations/gpt-oss-20b/lora/chat_template.jinja +331 -0
citations/gpt-oss-20b/lora/io.yaml +97 -0
citations/gpt-oss-20b/lora/special_tokens_map.json +23 -0
citations/gpt-oss-20b/lora/tokenizer.json +3 -0
citations/gpt-oss-20b/lora/tokenizer_config.json +185 -0
hallucination_detection/README.md +117 -0
hallucination_detection/gpt-oss-20b/lora/README.md +202 -0
hallucination_detection/gpt-oss-20b/lora/adapter_config.json +36 -0
hallucination_detection/gpt-oss-20b/lora/adapter_model.safetensors +3 -0
hallucination_detection/gpt-oss-20b/lora/chat_template.jinja +331 -0
hallucination_detection/gpt-oss-20b/lora/io.yaml +81 -0
hallucination_detection/gpt-oss-20b/lora/special_tokens_map.json +23 -0
hallucination_detection/gpt-oss-20b/lora/tokenizer.json +3 -0
hallucination_detection/gpt-oss-20b/lora/tokenizer_config.json +185 -0
query_rewrite/README.md +0 -0
query_rewrite/gpt-oss-20b/lora/adapter_config.json +45 -0
query_rewrite/gpt-oss-20b/lora/adapter_model.safetensors +3 -0
query_rewrite/gpt-oss-20b/lora/chat_template.jinja +397 -0
query_rewrite/gpt-oss-20b/lora/io.yaml +22 -0
query_rewrite/gpt-oss-20b/lora/special_tokens_map.json +17 -0
query_rewrite/gpt-oss-20b/lora/tokenizer.json +3 -0
query_rewrite/gpt-oss-20b/lora/tokenizer_config.json +184 -0
run_vllm.sh +45 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+citations/gpt-oss-20b/lora/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+hallucination_detection/gpt-oss-20b/lora/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+query_rewrite/gpt-oss-20b/lora/tokenizer.json filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ **/.DS_Store
2	+ */.swp
3	+

answerability/README.md ADDED Viewed

	@@ -0,0 +1,186 @@

+---
+license: apache-2.0
+language:
+- en
+pipeline_tag: text-generation
+library_name: peft
+library_name: transformers
+---
+# Intrinsics for Answerability Classification
+## Model Summary
+This is a RAG-specific family of intrinsics fine-tuned for binary answerability
+classification task. The model takes as input a multi-turn conversation and a
+set of documents, and classifies whether the user's final query is answerable or
+unanswerable based on the available information in the documents.
+We provide two intrinsics implemented as LoRA adapters (LoRA/aLoRA) trained over
+Granite-3.3-2b-instruct, Granite-3.3-8b-instruct, and GPT-OSS 20b.
+- **Developer:** IBM Research
+- **Model type:** LoRA and aLoRA adapter for
+  [ibm-granite/granite-3.3-2b-instruct](https://huggingface.co/ibm-granite/granite-3.3-2b-instruct),
+  [ibm-granite/granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct),
+  and [openai/gpt-oss-20b](https://huggingface.co/openai/gpt-oss-20b)
+- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
+## Intended use
+This is a family of intrinsincs that enables answerability classification for
+the final user query in a multi-turn conversation, with respect to a set of
+provided documents. The model is trained to determine whether the last user
+query is answerable or unanswerable, based solely on the information present in
+the documents. This makes it suitable for applications involving RAG and
+document-grounded chatbots, where knowing whether sufficient information exists
+to answer a query is crucial. The classification output from the answerability
+model can be used in several downstream applications, including but not limited
+to:
+- Filter out unanswerable questions before sending them to generation in RAG
+setting. By classifying a query as unanswerable upfront, the system can prevent
+hallucinated or misleading responses.
+- Re-query the retriever to get more
+relevant documents. If a query is initially deemed unanswerable, the retriever
+can be re-invoked with alternate formulations to fetch more relevant documents.
+**Model input**: The input to the answerability intrinsic is an
+OpenAI-compatible chat completion request, containing a list of conversation
+turns that can alternate between the `user` and `assistant` role and ending with
+a `user` turn, as well as list of documents.
+**Model output**: The output of the answerability intrinsic is the result of the
+original chat completion request formatted as a JSON object containing the
+answerability likelihood score.
+Please see the code snippets in the Quickstart Example section below for
+examples that illustrate the intrinsic's input/output.
+## Quickstart Example
+The recommended way to call this intrinsic is through the [Mellea](https://mellea.ai) framework.
+Here is some example code for calling this intrinsic from Mellea:
+```
+from mellea.backends.huggingface import LocalHFBackend
+from mellea.stdlib.base import ChatContext, Document
+from mellea.stdlib.chat import Message
+from mellea.stdlib.intrinsics import rag
+backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
+context = ChatContext().add(Message("assistant", "Hello there, how can I help you?"))
+next_user_turn = "What is the square root of 4?"
+documents_answerable = [Document("The square root of 4 is 2.")]
+documents_unanswerable = [Document("The square root of 8 is not 2.")]
+result = rag.check_answerability(next_user_turn, documents_answerable, context, backend)
+print(f"Result of answerability check when answer is in documents: {result}")
+result = rag.check_answerability(
+    next_user_turn, documents_unanswerable, context, backend
+)
+print(f"Result of answerability check when answer is not in documents: {result}")
+```
+## Training Details
+### Training Data
+The training data uses the publicly available Government corpus from
+[MT-RAG](https://arxiv.org/pdf/2501.03468) as the source of documents. Based on
+this corpus, we constructed a dataset consisting of a mix of human-created and
+synthetically generated multi-turn conversations. It includes two types of
+examples: (1) Answerable queries, where the final user question can be answered
+based on the provided documents. These examples teach the adapter to recognize
+when sufficient information is present to support an answer. (2) Unanswerable
+queries, where the documents lack the necessary information to answer the final
+user query. We used Mixtral as an automatic judge to validate the answerability
+labels and filter out noisy samples.
+#### Training Hyperparameters
+The LoRA adapter was fine-tuned using PEFT under the following regime: rank =
+32, learning rate = 5e-6, number of epochs = 25, with early stopping based on
+validation set, and 90/10 split between training and validation.
+## Evaluation
+### Answerability Classification
+We evaluated the model on binary answerability classification using MT-RAG
+Benchmark. In this setting, the model is given the full multi-turn conversation
+history along with the supporting documents. This benchmark evaluates the
+model's ability to assess answerability when the final user query can also
+depend on prior turns for context. The following table presents results
+comparing baselines and frontier models with task-specific answerability
+intrinsics on the answerability classification task on MT-RAG data. The LoRAs
+consistently outperform frontier models, converging near \~90% accuracy
+regardless of base model size. Even small models like Granite 3.3-2B, once
+fine-tuned, match or surpass much larger models, including GPT-4o. The
+difference between LoRA and aLoRA is minimal, indicating both are effective
+fine-tuning strategies.
+|                                      |    Models |     Unanswerable     F1    |     Answerable        F1    |     Classification        Accuracy    |     Weighted        F1    |
+|:--------------------------------------------:|:----------------------------------------------:|:--------------------------:|:---------------------------:|:-------------------------------------:|:-------------------------:|
+|                   Baselines                  |     BigBird (pre-trained embeddings) w/ MLP    |             73.4           |             65.2            |                  69.8                 |            69.6           |
+|                                              |       llama2-7b   as classifier (Full SFT)     |             88.2           |             85.9            |                  87.1                 |            87.1           |
+|     Frontier   Models      out-of-the-box    |            Granite   3.3-2b-instruct           |             48.7           |             70.4            |                  62.4                 |            58.7           |
+|                                              |            Granite   3.3-8b-instruct           |             62.8           |             65.2            |                  64.5                 |            63.9           |
+|                                              |                   GPT-OSS-20b                  |             77.3           |             58.3            |                  70.7                 |            68.5           |
+|                                              |                   GPT-OSS-120b                 |             70.2           |             68.9            |                  69.8                 |            69.6           |
+|                                              |                    GPT4o-mini                  |             82.7           |             78.1            |                  80.8                 |            80.6           |
+|                                              |                      GPT4o                     |             85.7           |             77.5            |                  82.5                 |            81.9           |
+|          Trained        LoRAs/aLoRAs         |              Granite   3.3-2b LoRA             |             91.2           |             89.6            |                  90.4                 |            90.5           |
+|                                              |              Granite   3.3-8b LoRA             |             91.1           |             90.3            |                  90.6                 |            90.7           |
+|                                              |                GPT-OSS-20b   LoRA              |             91.6           |             89.8            |                  90.8                 |            90.8           |
+|                                              |              Granite   3.3-2b aLoRA            |             89.8           |             88.6            |                  89.1                 |            89.2           |
+|                                              |              Granite   3.3-8b aLoRA            |             90.1           |             89.6            |                  89.5                 |            89.9           |
+|                                              |               GPT-OSS-20b   aLoRA              |             90.4           |             88.6            |                  89.6                 |            89.6           |
+### Comparing the Answerability Intrinsics vs. Vanilla Granite Models for Answer Quality
+We compare the performance of Granite 3.3-2b, Granite 3.3-8b Instruct
+vs. answerability intrinsics implemented as LoRA adapters on a subset of MT-RAG
+Benchmark. In this setup, each query is paired with only 5 retrieved passages as
+context.
+- Answerability Classification Performance: The answerability intrinsics
+  outperform the vanilla model in overall F1 on both answerables and
+  unanswerables. The answerability intrinsics achieves higher recall on
+  unanswerable queries, making it better at identifying questions that should
+  not be answered. However, this comes at the cost of lower recall on answerable
+  queries.
+- Joint Answerability-Faithfulness Score computed as: \> = 1 (if model
+  prediction = IDK/unanswerable ∩ ground truth = unanswerable)
+  > = RAGAS Faithfulness (if model prediction = non-IDK/answerable ∩ ground
+  > truth = answerable)
+  > = 0 (otherwise)
+  This score rewards the model for correctly abstaining on unanswerable queries
+  (full credit) and for providing faithful answers on answerable queries
+  (partial credit based on RAGAS Faithfulness). No credit is given for incorrect
+  or unfaithful predictions.
+The answerability intrinsics for granite-2b and granite-8b achieves 8% and 13%
+lifts on this metric respectively. This rewards the model for correctly
+abstaining on unanswerable queries and for being faithful when it chooses to
+answer.
+|                         | F1 Score Unanswerable | F1 Score Answerable | Recall Unanswerable | Recall Answerable | Joint Answerability- Faithfulness Score |
+|:-----------------------:|:---------------------:|:-------------------:|:-------------------:|:-----------------:|:---------------------------------------:|
+| Granite 3.3-2b Instruct | 13                    | 77                  | 7                   | 99                | 48                                      |
+| Granite 3.3-2b LoRA     | 48                    | 78                  | 37                  | 89                | 56                                      |
+| Granite 3.3-8b Instruct |           17          |          77         |          10         |         99        | 49                                      |
+|   Granite 3.3-8b LoRA   |           65          |          81         |          60         |         86        | 62                                      |
+## Model Card Authors
+[Vraj Shah](mailto:vraj@ibm.com)
+### Framework versions
+- PEFT 0.14.0

answerability/gpt-oss-20b/lora/adapter_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "openai/gpt-oss-20b",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "target_parameters": [
+    "7.mlp.experts.gate_up_proj",
+    "7.mlp.experts.down_proj",
+    "15.mlp.experts.gate_up_proj",
+    "15.mlp.experts.down_proj",
+    "23.mlp.experts.gate_up_proj",
+    "23.mlp.experts.down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

answerability/gpt-oss-20b/lora/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ee17447d06e0327bed4cd6811da1fa2607520285bb35c712bccb8d6d7f9e772
+size 219238968

answerability/gpt-oss-20b/lora/io.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+# Model name string, or null to use whatever is provided in the chat completion request.
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "type": "string",
+    "enum": ["answerable", "unanswerable"]
+  }
+transformations:
+  # Convert categorical answer to continuous value by decoding logprobs
+  - type: likelihood
+    categories_to_values:
+      "answerable": 1.0
+      "unanswerable": 0.0
+    input_path: []
+  # Convert scalar value to a record for consistency with other intrinsics
+  - type: nest
+    input_path: []
+    field_name: "answerability_likelihood"
+instruction: ~
+parameters:
+  # "unanswerable" can be 6 tokens at high temperatures
+  max_completion_tokens: 6
+# No sentence boundary detection
+sentence_boundaries: ~
+# RAG documents go in first message
+docs_as_message: string

citations/README.md ADDED Viewed

	@@ -0,0 +1,216 @@

+---
+license: apache-2.0
+language:
+- en
+pipeline_tag: text-generation
+library_name: peft
+library_name: transformers
+---
+# Intrinsics for Citation Generation
+## Model Summary
+This is a RAG-specific family of intrinsics fine-tuned for the citation generation task. Given a multi-turn conversation between a user and an AI assistant ending with an assistant response and a set of documents/passages on which the last assistant response is supposed to be based, each intrinsic in the family generates citations for the last assistant response from the provided documents/passages. The intrinsic has the following features:
+1. **Fine-grained citations:** The intrinsic generates citations for each sentence in the assistant response (when available). Moreover, each citation consists of a set of sentences from the documents/passages that support the corresponding sentence in the assistant response.
+2. **Post-hoc citation generation:** Since the intrinsic takes the assistant response as input, it can generate citations for responses generated by any LLM. Pick your favorite LLM and use the intrinsic to generate post-hoc citations!
+We provide two intrinsics implemented as LoRA adapters trained over Granite-3.3-2b-instruct and Granite-3.3-8b-instruct, respectively.
+</br>
+- **Developer:** IBM Research
+- **Model type:** LoRA adapter for [ibm-granite/granite-3.3-2b-instruct](https://huggingface.co/ibm-granite/granite-3.3-2b-instruct) and [ibm-granite/granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct)
+- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
+## Intended use
+This is a family of citation generation intrinsics that give the ability to generate citations for the last assistant response in a multi-turn RAG conversation based on a set of provided documents/passages. They can be used to generate post-hoc citations for assistant responses generated by any LLM in a RAG setting.
+> [!TIP]
+> Note: While you can invoke a citation generation intrinsic directly, it is strongly recommended to call it through [granite-common](https://github.com/ibm-granite/granite-common), which wraps the model with a tailored I/O processor, enabling a friendlier development interface. The I/O processor takes care of several data transformation/validation tasks that would be otherwise required (incl. splitting the input documents and assistant response into sentences before calling the intrinsic as well as validating the intrinsic's output and transforming the returned sentence IDs into spans over the documents and the response). We next describe the input/output of the citation generation intrinsics when invoked through granite-common.
+**Intrinsic input**: The input to the citation generation intrinsic is an OpenAI-compatible chat completion request, containing a list of conversation turns ending with the assistant response for which the citations should be generated as well as the list of documents from which the citations should be drawn. Please see the code snippets in the Quickstart Example section below for examples on how to specify the chat completion request as a JSON object.
+**Intrinsic output**: The output of the citation generation intrinsic is formatted as the result of the original chat completion request containing the citations for the last assistant response. The citations are provided in the form of a JSON array, whose items include the text and begin/end of a response span together with the text, document id and begin/end of a document span that serves as a citation for the response span. When there are more than one document spans that serve as citations for a single response span, they are represented as separate objects in the JSON array.
+**Going from input to output**: When calling the intrinsic through granite-common one should follow the steps below to transform the intrinsic input to the corresponding output. These steps are also exemplified in the code snippets included in the Quickstart Example section below. Given an input chat completion request, the request should be  passed to the corresponding input processor (also referred to as IntrinsicsRewriter) provided by granite-common. The input processor converts the request to the appropriate format expected by the underlying citation generation model. This includes, among others, splitting the last assistant response and the documents into sentences and prepending them with sentence IDs as well as introducing an appropriate task-specific instruction. The input processor's result should then be passed to the underlying citation generation model for inference. The model generates citations using a compact representation consisting of sentence IDs in the last assistant response and documents. This output should finally be passed to the appropriate output processor (also referred to as IntrinsicsResultProcessor) provided by granite-common. The output processor converts the low-level raw model output to the final output by, among others, mapping the sentence IDs back to response and document spans. The result is an application-friendly format ready for consumption by downstream applications.
+## Quickstart Example
+The recommended way to call this intrinsic is through the [Mellea](https://mellea.ai) framework.
+Here is some example code for calling this intrinsic from Mellea:
+```
+from mellea.backends.huggingface import LocalHFBackend
+from mellea.stdlib.base import ChatContext, Document
+from mellea.stdlib.chat import Message
+from mellea.stdlib.intrinsics import rag
+import json
+backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
+context = ChatContext().add(
+    Message(
+        "user",
+        "How does Murdoch's expansion in Australia compare to his expansion "
+        "in New Zealand?",
+    )
+)
+assistant_response = (
+    "Murdoch expanded in Australia and New Zealand by acquiring and expanding local "
+    "newspapers. I do not have information about his expansion in New Zealand after "
+    "purchasing The Dominion."
+)
+documents = [
+    Document(
+        doc_id="1",
+        text="Keith Rupert Murdoch was born on 11 March 1931 in Melbourne, Australia, "
+        "the son of Sir Keith Murdoch (1885-1952) and Dame Elisabeth Murdoch (nee "
+        "Greene; 1909-2012). He is of English, Irish, and Scottish ancestry. Murdoch's "
+        "parents were also born in Melbourne. Keith Murdoch was a war correspondent "
+        "and later a regional newspaper magnate owning two newspapers in Adelaide, "
+        "South Australia, and a radio station in a faraway mining town. Following his "
+        "father's death, when he was 21, Murdoch returned from Oxford to take charge "
+        "of the family business News Limited, which had been established in 1923. "
+        "Rupert Murdoch turned its Adelaide newspaper, The News, its main asset, into "
+        "a major success. He began to direct his attention to acquisition and "
+        "expansion, buying the troubled Sunday Times in Perth, Western Australia "
+        "(1956) and over the next few years acquiring suburban and provincial "
+        "newspapers in New South Wales, Queensland, Victoria and the Northern "
+        "Territory, including the Sydney afternoon tabloid, The Daily Mirror (1960). "
+        'The Economist describes Murdoch as "inventing the modern tabloid", as he '
+        "developed a pattern for his newspapers, increasing sports and scandal "
+        "coverage and adopting eye-catching headlines. Murdoch's first foray outside "
+        "Australia involved the purchase of a controlling interest in the New Zealand "
+        "daily The Dominion. In January 1964, while touring New Zealand with friends "
+        "in a rented Morris Minor after sailing across the Tasman, Murdoch read of a "
+        "takeover bid for the Wellington paper by the British-based Canadian newspaper "
+        "magnate, Lord Thomson of Fleet. On the spur of the moment, he launched a "
+        "counter-bid. A four-way battle for control ensued in which the 32-year-old "
+        "Murdoch was ultimately successful. Later in 1964, Murdoch launched The "
+        "Australian, Australia's first national daily newspaper, which was based "
+        "first in Canberra and later in Sydney. In 1972, Murdoch acquired the Sydney "
+        "morning tabloid The Daily Telegraph from Australian media mogul Sir Frank "
+        "Packer, who later regretted selling it to him. In 1984, Murdoch was appointed "
+        "Companion of the Order of Australia (AC) for services to publishing. In 1999, "
+        "Murdoch significantly expanded his music holdings in Australia by acquiring "
+        "the controlling share in a leading Australian independent label, Michael "
+        "Gudinski's Mushroom Records; he merged that with Festival Records, and the "
+        "result was Festival Mushroom Records (FMR). Both Festival and FMR were "
+        "managed by Murdoch's son James Murdoch for several years.",
+    ),
+    Document(
+        doc_id="2",
+        text="This document has nothing to do with Rupert Murdoch. This document is "
+        "two sentences long.",
+    ),
+]
+result = rag.find_citations(assistant_response, documents, context, backend)
+print(f"Result of citations intrinsic:\n{json.dumps(result, indent=2)}")
+```
+## Training Details
+The citation generation intrinsics were trained on synthetically-generated citation datasets. The process of generating the training data consisted of two main steps:
+- **Multi-turn RAG conversation generation:** Starting from publicly available document corpora, we generated a set of multi-turn RAG data, consisting of multi-turn conversations grounded on passages retrieved from the corpora. For details on the RAG conversation generation process please refer to the [Granite Technical Report](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/paper.pdf) and [Lee, Young-Suk, et al.](https://arxiv.org/pdf/2409.11500).
+- **Citation generation:** For each turn of the multi-turn RAG conversations from the previous step, we used a multi-step synthetic citation generation pipeline to generate citations for the assistant response.
+The resulting data instances were used to train the citation generation intrinsics.
+### Training Data
+The following public datasets were used as seed datasets for the multi-turn RAG conversation generation process:
+- [CoQA](https://stanfordnlp.github.io/coqa/) - Wikipedia passages
+- [MultiDoc2Dial](https://huggingface.co/datasets/IBM/multidoc2dial)
+- [QuAC](https://huggingface.co/datasets/allenai/quac)
+## Evaluation
+We evaluate the citation generation intrinsics on two citation benchmarks:
+- [ALCE](https://aclanthology.org/2023.emnlp-main.398/): Evaluates the ability of models to produce document/passage-level citations (i.e., identify the documents/passages that support a statement in the response).
+- [LongBench-Cite](https://arxiv.org/abs/2409.02897): Evaluates the ability of models to produce fine-grained span-level citations (i.e., identify the spans within the input documents/passages that support a statement in the response) with a focus on long contexts.
+Since the intrinsics correspond to a post-hoc citation generation approach, their performance on the two benchmarks depends on the assistant responses for which they are asked to generate citations. To facilitate an apples-to-apples comparison, for each experiment, we keep the assistant responses the same and change the model that is used to generate the citations. In particular, we prompt an LLM to create an assistant response together with citations and evaluate the generated citations on the corresponding benchmark. Then, we compute and evaluate the citations generated for the same LLM response by each of the citation generation intrinsics. We provide results for the two intrinsics, implemented as LoRA adapters over Granite-3.3-2b-instruct and Granite-3.3-8b-instruct, respectively.
+### Evaluation on ALCE
+For the ALCE evaluation, we prompt Llama-3.1-70B-Instruct and Mixtral-8x22B-Instruct to generate both the assistant response and corresponding passage-level citations. We first calculate the performance of the citations generated by these models on ALCE. Subsequently, we feed the responses of these models (leaving out the citations) to the citation generation intrinsics and evaluate their generated citations. The results are shown in the table below:
+Model used to generate response | Model used to generate citations                      | Recall          | Precision         |  F1       |
+|--------------| ----------------------------- | --------------- | ----------------- | --------- |
+| Llama-3.1-70B-Instruct | Llama-3.1-70B-Instruct        | 61.4            | 58.1              | 59.7      |
+| Llama-3.1-70B-Instruct | Granite-3.3-2B LoRA citations | 51.5            | 64.2              | 57.2      |
+| Llama-3.1-70B-Instruct | Granite-3.3-8B LoRA citations | 55.4            | 64.2              | 59.5      |
+| Mixtral-8x22B-Instruct | Mixtral-8x22B-Instruct        | 62.2            | 62.5              | 62.3      |
+| Mixtral-8x22B-Instruct | Granite-3.3-2B LoRA citations | 51.4            | 67.3              | 58.3      |
+| Mixtral-8x22B-Instruct | Granite-3.3-8B LoRA citations | 55.8            | 68.5              | 61.5      |
+We observe that the LoRA adapter over Granite-3.3-8b-instruct performs on par with much bigger models when those are prompted to create passage-level citations (with the LoRA adapter over over Granite-3.3-2b-instruct being slightly worse). It is interesting to note that while the adapter's F1 performance is similar to the baselines, it exhibits a different precision-recall trade-off, trading lower recall for higher precision.
+Notes:
+- All results are reported on the ELI5 dataset using the ORACLE (5-psg) setting.
+- To prompt Llama and Mixtral, we employ a setting similar to the one proposed in the ALCE paper; in particular we use a two-shot prompt comprised of two of the ICL examples from ALCE as well as a slightly modified version of the instruction from the paper.
+- Sentence splitting of context/response is performed using NLTK.
+- Finally, since ALCE expects passage-level citations, we elevate the finer-grained citations produced by the LoRA adapter to the passage level before running the ALCE evaluation.
+### Evaluation on LongBench-Cite
+For the LonBench-Cite evaluation, we prompt Llama-3.1-70B-Instruct to generate both the assistant response and corresponding citations. Then we evaluate the citations generated by Llama as well as the post-hoc citations generated by the citation generation intrinsics when invoked on the Llama responses. The results are shown in the table below:
+<table>
+<tr>
+    <th>Model used to generate response</th>
+    <th>Model used to generate citations</th>
+    <th colspan="3">Longbench-Chat (en)</th>
+    <th colspan="3">MultifieldQA (en)</th>
+    <th colspan="3">HotpotQA</th>
+    <th colspan="3">GovReport</th>
+</tr>
+<tr>
+    <th></th>
+    <th></th>
+    <th>R</th><th>P</th><th>F1</th>
+    <th>R</th><th>P</th><th>F1</th>
+    <th>R</th><th>P</th><th>F1</th>
+    <th>R</th><th>P</th><th>F1</th>
+</tr>
+<tr>
+    <td>Llama-3.1-70B-Instruct</td>
+    <td>Llama-3.1-70B-Instruct</td>
+    <td>27.0</td><td>34.4</td><td>26.1</td>
+    <td>46.1</td><td>63.3</td><td>49.7</td>
+    <td>34.0</td><td>39.4</td><td>30.2</td>
+    <td>55.0</td><td>77.5</td><td>62.0</td>
+</tr>
+<tr>
+    <td>Llama-3.1-70B-Instruct</td>
+    <td>Granite-3.3-2B LoRA citations</td>
+    <td>38.7</td><td>47.4</td><td>39.3</td>
+    <td>66.4</td><td>81.8</td><td>70.4</td>
+    <td>60.7</td><td>68.5</td><td>59.7</td>
+    <td>60.1</td><td>72.4</td><td>64.7</td>
+</tr>
+<tr>
+    <td>Llama-3.1-70B-Instruct</td>
+    <td>Granite-3.3-8B LoRA citations</td>
+    <td>54.5</td><td>59.9</td><td>55.6</td>
+    <td>73.0</td><td>82.9</td><td>75.7</td>
+    <td>68.5</td><td>73.8</td><td>66.4</td>
+    <td>73.5</td><td>84.6</td><td>78.2</td>
+</tr>
+</table>
+We observe that both variants of the LoRA adapter (even the one trained over Granite-3.3-2b-instruct) perform across the board significantly better than Llama-3.1-70B-Instruct when prompted to create span-level citations. This demonstrates the value of the adapter to create post-hoc citations even for assistant responses generated by much bigger LLMs.
+Notes:
+- The evaluation results are reported on the English subset of LongBench-Cite (i.e., restricted to instances whose `language` field equals to `en`).
+- To prompt Llama to generate a response with citations, we use the one-shot prompt described in the paper.
+- For the LoRA adapter, sentence splitting of the context is performed using NLTK. For the response, we reuse the splitting in Llama's output (since the LongBench-Cite prompt instructs the model to output a response split into sentences/statements).
+## Model Card Authors
+[Yannis Katsis](mailto:yannis.katsis@ibm.com)</br>
+[Chulaka Gunasekara](mailto:chulaka.gunasekara@ibm.com)

citations/gpt-oss-20b/lora/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "openai/gpt-oss-20b",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "v_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

citations/gpt-oss-20b/lora/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d27a9b6f66f37e21e36c0248eefd1d4284f92c6e7cccf5dd544b14e32fbd71f0
+size 31876192

citations/gpt-oss-20b/lora/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,331 @@

+{#-
+  In addition to the normal inputs of `messages` and `tools`, this template also accepts the
+  following kwargs:
+  - "builtin_tools": A list, can contain "browser" and/or "python".
+  - "model_identity": A string that optionally describes the model identity.
+  - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium".
+ #}
+{#- Tool Definition Rendering ============================================== #}
+{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
+    {%- if param_spec.type == "array" -%}
+        {%- if param_spec['items'] -%}
+            {%- if param_spec['items']['type'] == "string" -%}
+                {{- "string[]" }}
+            {%- elif param_spec['items']['type'] == "number" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "integer" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "boolean" -%}
+                {{- "boolean[]" }}
+            {%- else -%}
+                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
+                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
+                    {{- "any[]" }}
+                {%- else -%}
+                    {{- inner_type + "[]" }}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- else -%}
+            {{- "any[]" }}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
+        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
+        {%- if param_spec.type | length > 1 -%}
+            {{- param_spec.type | join(" | ") }}
+        {%- else -%}
+            {{- param_spec.type[0] }}
+        {%- endif -%}
+    {%- elif param_spec.oneOf -%}
+        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
+        {%- set has_object_variants = false -%}
+        {%- for variant in param_spec.oneOf -%}
+            {%- if variant.type == "object" -%}
+                {%- set has_object_variants = true -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
+            {{- "any" }}
+        {%- else -%}
+            {%- for variant in param_spec.oneOf -%}
+                {{- render_typescript_type(variant, required_params) -}}
+                {%- if variant.description %}
+                    {{- "// " + variant.description }}
+                {%- endif -%}
+                {%- if variant.default is defined %}
+                    {{ "// default: " + variant.default|tojson }}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- " | " }}
+                {% endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "string" -%}
+        {%- if param_spec.enum -%}
+            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
+        {%- else -%}
+            {{- "string" }}
+            {%- if param_spec.nullable %}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "number" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "integer" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "boolean" -%}
+        {{- "boolean" }}
+    {%- elif param_spec.type == "object" -%}
+        {%- if param_spec.properties -%}
+            {{- "{\n" }}
+            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
+                {{- prop_name -}}
+                {%- if prop_name not in (param_spec.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
+                {%- if not loop.last -%}
+                    {{-", " }}
+                {%- endif -%}
+            {%- endfor -%}
+            {{- "}" }}
+        {%- else -%}
+            {{- "object" }}
+        {%- endif -%}
+    {%- else -%}
+        {{- "any" }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro render_tool_namespace(namespace_name, tools) -%}
+    {{- "## " + namespace_name + "\n\n" }}
+    {{- "namespace " + namespace_name + " {\n\n" }}
+    {%- for tool in tools %}
+        {%- set tool = tool.function %}
+        {{- "// " + tool.description + "\n" }}
+        {{- "type "+ tool.name + " = " }}
+        {%- if tool.parameters and tool.parameters.properties %}
+            {{- "(_: {\n" }}
+            {%- for param_name, param_spec in tool.parameters.properties.items() %}
+                {%- if param_spec.description %}
+                    {{- "// " + param_spec.description + "\n" }}
+                {%- endif %}
+                {{- param_name }}
+                {%- if param_name not in (tool.parameters.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+                {%- if param_spec.default is defined -%}
+                    {%- if param_spec.enum %}
+                        {{- ", // default: " + param_spec.default }}
+                    {%- elif param_spec.oneOf %}
+                        {{- "// default: " + param_spec.default }}
+                    {%- else %}
+                        {{- ", // default: " + param_spec.default|tojson }}
+                    {%- endif -%}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- ",\n" }}
+                {%- else %}
+                    {{- ",\n" }}
+                {%- endif -%}
+            {%- endfor %}
+            {{- "}) => any;\n\n" }}
+        {%- else -%}
+            {{- "() => any;\n\n" }}
+        {%- endif -%}
+    {%- endfor %}
+    {{- "} // namespace " + namespace_name }}
+{%- endmacro -%}
+{%- macro render_builtin_tools(browser_tool, python_tool) -%}
+    {%- if browser_tool %}
+        {{- "## browser\n\n" }}
+        {{- "// Tool for browsing.\n" }}
+        {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }}
+        {{- "// Cite information from the tool using the following format:\n" }}
+        {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }}
+        {{- "// Do not quote more than 10 words directly from the tool output.\n" }}
+        {{- "// sources=web (default: web)\n" }}
+        {{- "namespace browser {\n\n" }}
+        {{- "// Searches for information related to `query` and displays `topn` results.\n" }}
+        {{- "type search = (_: {\n" }}
+        {{- "query: string,\n" }}
+        {{- "topn?: number, // default: 10\n" }}
+        {{- "source?: string,\n" }}
+        {{- "}) => any;\n\n" }}
+        {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }}
+        {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }}
+        {{- "// If `cursor` is not provided, the most recent page is implied.\n" }}
+        {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }}
+        {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }}
+        {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }}
+        {{- "type open = (_: {\n" }}
+        {{- "id?: number | string, // default: -1\n" }}
+        {{- "cursor?: number, // default: -1\n" }}
+        {{- "loc?: number, // default: -1\n" }}
+        {{- "num_lines?: number, // default: -1\n" }}
+        {{- "view_source?: boolean, // default: false\n" }}
+        {{- "source?: string,\n" }}
+        {{- "}) => any;\n\n" }}
+        {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }}
+        {{- "type find = (_: {\n" }}
+        {{- "pattern: string,\n" }}
+        {{- "cursor?: number, // default: -1\n" }}
+        {{- "}) => any;\n\n" }}
+        {{- "} // namespace browser\n\n" }}
+    {%- endif -%}
+    {%- if python_tool %}
+        {{- "## python\n\n" }}
+        {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }}
+        {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- System Message Construction ============================================ #}
+{%- macro build_system_message() -%}
+    {%- if model_identity is not defined %}
+        {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %}
+    {%- endif %}
+    {{- model_identity + "\n" }}
+    {{- "Knowledge cutoff: 2024-06\n" }}
+    {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }}
+    {%- if reasoning_effort is not defined %}
+        {%- set reasoning_effort = "medium" %}
+    {%- endif %}
+    {{- "Reasoning: " + reasoning_effort + "\n\n" }}
+    {%- if builtin_tools %}
+        {{- "# Tools\n\n" }}
+        {%- set available_builtin_tools = namespace(browser=false, python=false) %}
+        {%- for tool in builtin_tools %}
+            {%- if tool == "browser" %}
+                {%- set available_builtin_tools.browser = true %}
+            {%- elif tool == "python" %}
+                {%- set available_builtin_tools.python = true %}
+            {%- endif %}
+        {%- endfor %}
+        {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }}
+    {%- endif -%}
+    {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }}
+    {%- if tools -%}
+        {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- Main Template Logic ================================================= #}
+{#- Set defaults #}
+{#- Render system message #}
+{{- "<|start|>system<|message|>" }}
+{{- build_system_message() }}
+{{- "<|end|>" }}
+{#- Extract developer message #}
+{%- if messages[0].role == "developer" or messages[0].role == "system" %}
+    {%- set developer_message = messages[0].content %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set developer_message = "" %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{#- Render developer message #}
+{%- if developer_message or tools %}
+    {{- "<|start|>developer<|message|>" }}
+    {%- if developer_message %}
+        {{- "# Instructions\n\n" }}
+        {{- developer_message }}
+        {{- "\n\n" }}
+    {%- endif %}
+    {%- if tools -%}
+        {{- "# Tools\n\n" }}
+        {{- render_tool_namespace("functions", tools) }}
+    {%- endif -%}
+    {{- "<|end|>" }}
+{%- endif %}
+{#- Render messages #}
+{%- set last_tool_call = namespace(name=none) %}
+{%- for message in loop_messages -%}
+    {#- At this point only assistant/user/tool messages should remain #}
+    {%- if message.role == 'assistant' -%}
+        {#- Checks to ensure the messages are being passed in the format we expect #}
+        {%- if "content" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "thinking" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "tool_calls" in message %}
+            {#- We need very careful handling here - we want to drop the tool call analysis message if the model #}
+            {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #}
+            {#- when we render CoT/analysis messages in inference. #}
+            {%- set future_final_message = namespace(found=false) %}
+            {%- for future_message in loop_messages[loop.index:] %}
+                {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %}
+                    {%- set future_final_message.found = true %}
+                {%- endif %}
+            {%- endfor %}
+            {#- We assume max 1 tool call per message, and so we infer the tool call name #}
+            {#- in "tool" messages from the most recent assistant tool call name #}
+            {%- set tool_call = message.tool_calls[0] %}
+            {%- if tool_call.function %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {%- if message.content and message.thinking %}
+                {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
+            {%- elif message.content and not future_final_message.found %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
+            {%- elif message.thinking and not future_final_message.found %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
+            {{- "<|start|>assistant to=" }}
+            {{- "functions." + tool_call.name + "<|channel|>commentary " }}
+            {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
+            {{- tool_call.arguments|tojson }}
+            {{- "<|call|>" }}
+            {%- set last_tool_call.name = tool_call.name %}
+        {%- elif loop.last and not add_generation_prompt %}
+            {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
+            {#- This is a situation that should only occur in training, never in inference. #}
+            {%- if "thinking" in message %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
+            {#- <|return|> indicates the end of generation, but <|end|> does not #}
+            {#- <|return|> should never be an input to the model, but we include it as the final token #}
+            {#- when training, so the model learns to emit it. #}
+            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
+        {%- else %}
+            {#- CoT is dropped during all previous turns, so we never render it for inference #}
+            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
+            {%- set last_tool_call.name = none %}
+        {%- endif %}
+    {%- elif message.role == 'tool' -%}
+        {%- if last_tool_call.name is none %}
+            {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
+        {%- endif %}
+        {{- "<|start|>functions." + last_tool_call.name }}
+        {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }}
+    {%- elif message.role == 'user' -%}
+        {{- "<|start|>user<|message|>" + message.content + "<|end|>" }}
+    {%- endif -%}
+{%- endfor -%}
+{#- Generation prompt #}
+{%- if add_generation_prompt -%}
+<|start|>assistant
+{%- endif -%}

citations/gpt-oss-20b/lora/io.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "$defs": {
+      "_MODEL_OUTPUT_ENTRY": {
+        "properties": {
+          "r": {
+            "minimum": 0,
+            "title": "R",
+            "type": "integer"
+          },
+          "c": {
+            "items": {
+              "minimum": 0,
+              "type": "integer"
+            },
+            "title": "C",
+            "type": "array"
+          }
+        },
+        "required": [
+          "r",
+          "c"
+        ],
+        "title": "_MODEL_OUTPUT_ENTRY",
+        "type": "object"
+      }
+    },
+    "items": {
+      "$ref": "#/$defs/_MODEL_OUTPUT_ENTRY"
+    },
+    "title": "_MODEL_OUTPUT",
+    "type": "array"
+  }
+transformations:
+  # Explode the list of document sentences in each citation
+  - type: explode
+    input_path: []  # Zero-length path means match root element
+    target_field: "c"
+  # Model may repeat itself; drop the resulting duplicates.
+  - type: drop_duplicates
+    input_path: []  # Zero-length path means match root element
+    target_fields: ["r", "c"]
+  # Replace sentence number with sentence location and contents.
+  # Do this first for sentences from the last turn, then for sentences from documents.
+  - type: decode_sentences
+    source: "last_message"
+    input_path: [~, "r"]  # Null in path means wildcard
+    # New fields to add for each sentence
+    output_names:
+      begin: "response_begin"
+      end: "response_end"
+      text: "response_text"
+  - type: decode_sentences
+    source: "documents"
+    input_path: [~, "c"]  # Null in path means wildcard
+    # New fields to add for each sentence
+    output_names:
+      document_id: "citation_doc_id"
+      begin: "citation_begin"
+      end: "citation_end"
+      text: "citation_text"
+  # Remove fields that we no longer need
+  - type: project
+    input_path: []
+    retained_fields:
+    - "response_begin"
+    - "response_end"
+    - "response_text"
+    - "citation_doc_id"
+    - "citation_begin"
+    - "citation_end"
+    - "citation_text"
+  # Merge adjacent document spans
+  - type: merge_spans
+    input_path: []
+    group_fields: ["response_begin", "response_end", "response_text", "citation_doc_id"]
+    begin_field: "citation_begin"
+    end_field: "citation_end"
+    text_field: "citation_text"
+instruction: >
+  Split the last assistant response into individual sentences.
+  For each sentence in the response, identify the statement IDs from the below
+  documents that it references. Ensure that your output includes all response
+  sentence IDs, and for each response sentence ID, provide the list of corresponding
+  referring document sentence IDs. The output must be a json structure.
+parameters:
+  max_completion_tokens: 4096
+sentence_boundaries:
+  # Mapping from string location to sentence delimiter prefix
+  last_message: "r"  # <r0>, <r1>, etc.
+  documents: "c"
+# gpt-oss base models have no "documents" argument
+docs_as_message: json

citations/gpt-oss-20b/lora/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|return|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

citations/gpt-oss-20b/lora/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0614fe83cadab421296e664e1f48f4261fa8fef6e03e63bb75c20f38e37d07d3
+size 27868174

citations/gpt-oss-20b/lora/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,185 @@

+{
+  "added_tokens_decoder": {
+    "199998": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "199999": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200000": {
+      "content": "<|reserved_200000|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200001": {
+      "content": "<|reserved_200001|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200002": {
+      "content": "<|return|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200003": {
+      "content": "<|constrain|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200004": {
+      "content": "<|reserved_200004|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200005": {
+      "content": "<|channel|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200006": {
+      "content": "<|start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200008": {
+      "content": "<|message|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200009": {
+      "content": "<|reserved_200009|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200010": {
+      "content": "<|reserved_200010|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200011": {
+      "content": "<|reserved_200011|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200012": {
+      "content": "<|call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200013": {
+      "content": "<|reserved_200013|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200014": {
+      "content": "<|reserved_200014|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200015": {
+      "content": "<|reserved_200015|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200016": {
+      "content": "<|reserved_200016|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200017": {
+      "content": "<|reserved_200017|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200018": {
+      "content": "<|endofprompt|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|return|>",
+  "extra_special_tokens": {},
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}

hallucination_detection/README.md ADDED Viewed

	@@ -0,0 +1,117 @@

+---
+license: apache-2.0
+language:
+- en
+pipeline_tag: text-generation
+library_name: peft
+library_name: transformers
+---
+# Intrinsics for Hallucination Detection
+## Model Summary
+This is a RAG-specific family of intrinsics fine-tuned for the hallucination detection task. Given a multi-turn conversation between a user and an AI assistant, ending with an assistant response and a set of documents/passages on which the last assistant response is supposed to be based, the adapter outputs a hallucination label (faithful/partial/unfaithful/NA) for each sentence in the assistant response.
+We provide two intrinsics implemented as LoRA adapters trained over Granite-3.3-2b-instruct and Granite-3.3-8b-instruct, respectively.
+</br>
+- **Developer:** IBM Research
+- **Model type:** LoRA adapter for [ibm-granite/granite-3.3-2b-instruct](https://huggingface.co/ibm-granite/granite-3.3-2b-instruct) and [ibm-granite/granite-3.3-8b-instruct](https://huggingface.co/ibm-granite/granite-3.3-8b-instruct)
+- **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
+## Intended use
+This is a family of hallucination detection intrinsics that gives the ability to identify hallucination risks for the sentences in the last assistant response in a multi-turn RAG conversation based on a set of provided documents/passages.
+> [!TIP]
+> Note: While you can invoke the hallucination detection intrinsic directly, it is strongly recommended to call it through [granite-common](https://github.com/ibm-granite/granite-common), which wraps the model with a tailored I/O processor, enabling a friendlier development interface. The I/O processor takes care of several data transformation/validation tasks that would be otherwise required (incl. splitting the input documents and assistant response into sentences before calling the intrinsic as well as validating the intrinsic's output). We next describe the input/output of the hallucination detection intrinsics when invoked through granite-common.
+**Intrinsic input**: The hallucination detection intrinsic takes as input an OpenAI-compatible chat completion request. This request includes: a list of conversation turns that ends with the assistant’s response (the response to be checked for hallucinations) and a list of reference documents that the final assistant response should be grounded on. See the code snippets in the Quickstart Example section below for examples of how to format the chat completion request as a JSON object.
+**Intrinsic output**: The output of the hallucination detection intrinsic is formatted as the result of the original chat completion request containing the hallucinations detected for the last assistant response. The hallucinations are provided in the form of a JSON array, whose items include the text and begin/end of a response span (sentence) together with the text, faithfulness_likelihood of the response sentence, and the explanation for the faithfulness_likelihood.
+**Going from input to output**: When calling the intrinsic through granite-common one should follow the steps below to transform the intrinsic input to the corresponding output. These steps are also exemplified in the code snippets included in the Quickstart Example section below. Given an input chat completion request, the request should be  passed to the corresponding input processor (also referred to as IntrinsicsRewriter) provided by granite-common. The input processor converts the request to the appropriate format expected by the underlying hallucination detection model. This includes, among others, splitting the last assistant response and the documents into sentences and prepending them with sentence IDs as well as introducing an appropriate task-specific instruction. The input processor's result should then be passed to the underlying hallucination detection model for inference. The model identifies hallucinations using a compact representation consisting of sentence IDs in the last assistant response and documents. This output should finally be passed to the appropriate output processor (also referred to as IntrinsicsResultProcessor) provided by granite-common. The output processor converts the low-level raw model output to the final output by, among others, mapping the sentence IDs back to response and document spans. The result is an application-friendly format ready for consumption by downstream applications.
+## Quickstart Example
+The recommended way to call this intrinsic is through the [Mellea](https://mellea.ai) framework.
+Here is some example code for calling this intrinsic from Mellea:
+```
+from mellea.backends.huggingface import LocalHFBackend
+from mellea.stdlib.base import ChatContext, Document
+from mellea.stdlib.chat import Message
+from mellea.stdlib.intrinsics import rag
+import json
+backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-2b-instruct")
+context = (
+    ChatContext()
+    .add(Message("assistant", "Hello there, how can I help you?"))
+    .add(Message("user", "Tell me about some yellow fish."))
+)
+assistant_response = "Purple bumble fish are yellow. Green bumble fish are also yellow."
+documents = [
+    Document(
+        doc_id="1",
+        text="The only type of fish that is yellow is the purple bumble fish.",
+    )
+]
+result = rag.flag_hallucinated_content(assistant_response, documents, context, backend)
+print(f"Result of hallucination check: {json.dumps(result, indent=2)}")
+```
+## Training Details
+The process of generating the training data for the hallucination detection intrinsic consisted of two main steps:
+-  **Multi-turn RAG conversation generation:** Starting from publicly available document corpora, we generated a set of multi-turn RAG data, consisting of multi-turn conversations grounded on passages retrieved from the corpus. For details on the RAG conversation generation process, please refer to the [Granite Technical Report](https://github.com/ibm-granite/granite-3.0-language-models/blob/main/paper.pdf) and [Lee, Young-Suk, et al.](https://arxiv.org/pdf/2409.11500).
+-  **Faithfulness label generation:** For creating the faithfulness labels for the responses, we used a multi-step synthetic hallucination label and reasoning generation pipeline.
+This process resulted in ~50K data instances, which were used to train the LoRA adapter.
+### Training Data
+The following public datasets were used as seed datasets for the multi-turn RAG conversation generation process:
+- [CoQA](https://stanfordnlp.github.io/coqa/) - Wikipedia passages
+- [MultiDoc2Dial](https://huggingface.co/datasets/IBM/multidoc2dial)
+- [QuAC](https://huggingface.co/datasets/allenai/quac)
+## Evaluation
+We evaluated the LoRA adapter on the QA portion of the [RAGTruth](https://aclanthology.org/2024.acl-long.585/) benchmark. We compare the response-level hallucination detection performance between the LoRA adapter and the methods reported in the RAGTruth paper. The responses that obtain a faithfulness labels `partial` or `unfaithful` for at least one sentence are considered as hallucinated responses.
+The results are shown in the table below. The results for the baselines are extracted from the [RAGTruth](https://aclanthology.org/2024.acl-long.585/) paper.
+| Model | Precision | Recall | F1 |
+|---|---|---|---|
+| GPT 4o mini (prompted) | 46.8 | 59.6 | 52.4 |
+| GPT 4o (prompted) | 49.5 | 60.1 | 54.3 |
+| gpt-4-turbo (prompted) | 33.2 | 90.6 | 45.6 |
+| [SelfCheckGPT](https://aclanthology.org/2023.emnlp-main.557.pdf) | 35.0 | 58.0 | 43.7 |
+| [LMvLM](https://aclanthology.org/2023.emnlp-main.778.pdf) | 18.7 | 76.9 | 30.1 |
+| Granite 3.3-2b_hallucination-detection_LoRA | 55.8 | 74.9 | 63.9 |
+| Granite 3.3-8b_hallucination-detection_LoRA | 58.1 | 77.6 | 66.5 |
+## Model Card Author
+[Chulaka Gunasekara](mailto:chulaka.gunasekara@ibm.com)

hallucination_detection/gpt-oss-20b/lora/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: openai/gpt-oss-20b
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

hallucination_detection/gpt-oss-20b/lora/adapter_config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "openai/gpt-oss-20b",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

hallucination_detection/gpt-oss-20b/lora/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d324b0a7fad875c444762e7681d4ae1ebce10aa138c2823be6617f7a214a6bf
+size 31876192

hallucination_detection/gpt-oss-20b/lora/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,331 @@

+{#-
+  In addition to the normal inputs of `messages` and `tools`, this template also accepts the
+  following kwargs:
+  - "builtin_tools": A list, can contain "browser" and/or "python".
+  - "model_identity": A string that optionally describes the model identity.
+  - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium".
+ #}
+{#- Tool Definition Rendering ============================================== #}
+{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
+    {%- if param_spec.type == "array" -%}
+        {%- if param_spec['items'] -%}
+            {%- if param_spec['items']['type'] == "string" -%}
+                {{- "string[]" }}
+            {%- elif param_spec['items']['type'] == "number" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "integer" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "boolean" -%}
+                {{- "boolean[]" }}
+            {%- else -%}
+                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
+                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
+                    {{- "any[]" }}
+                {%- else -%}
+                    {{- inner_type + "[]" }}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- else -%}
+            {{- "any[]" }}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
+        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
+        {%- if param_spec.type | length > 1 -%}
+            {{- param_spec.type | join(" | ") }}
+        {%- else -%}
+            {{- param_spec.type[0] }}
+        {%- endif -%}
+    {%- elif param_spec.oneOf -%}
+        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
+        {%- set has_object_variants = false -%}
+        {%- for variant in param_spec.oneOf -%}
+            {%- if variant.type == "object" -%}
+                {%- set has_object_variants = true -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
+            {{- "any" }}
+        {%- else -%}
+            {%- for variant in param_spec.oneOf -%}
+                {{- render_typescript_type(variant, required_params) -}}
+                {%- if variant.description %}
+                    {{- "// " + variant.description }}
+                {%- endif -%}
+                {%- if variant.default is defined %}
+                    {{ "// default: " + variant.default|tojson }}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- " | " }}
+                {% endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "string" -%}
+        {%- if param_spec.enum -%}
+            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
+        {%- else -%}
+            {{- "string" }}
+            {%- if param_spec.nullable %}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "number" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "integer" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "boolean" -%}
+        {{- "boolean" }}
+    {%- elif param_spec.type == "object" -%}
+        {%- if param_spec.properties -%}
+            {{- "{\n" }}
+            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
+                {{- prop_name -}}
+                {%- if prop_name not in (param_spec.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
+                {%- if not loop.last -%}
+                    {{-", " }}
+                {%- endif -%}
+            {%- endfor -%}
+            {{- "}" }}
+        {%- else -%}
+            {{- "object" }}
+        {%- endif -%}
+    {%- else -%}
+        {{- "any" }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro render_tool_namespace(namespace_name, tools) -%}
+    {{- "## " + namespace_name + "\n\n" }}
+    {{- "namespace " + namespace_name + " {\n\n" }}
+    {%- for tool in tools %}
+        {%- set tool = tool.function %}
+        {{- "// " + tool.description + "\n" }}
+        {{- "type "+ tool.name + " = " }}
+        {%- if tool.parameters and tool.parameters.properties %}
+            {{- "(_: {\n" }}
+            {%- for param_name, param_spec in tool.parameters.properties.items() %}
+                {%- if param_spec.description %}
+                    {{- "// " + param_spec.description + "\n" }}
+                {%- endif %}
+                {{- param_name }}
+                {%- if param_name not in (tool.parameters.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+                {%- if param_spec.default is defined -%}
+                    {%- if param_spec.enum %}
+                        {{- ", // default: " + param_spec.default }}
+                    {%- elif param_spec.oneOf %}
+                        {{- "// default: " + param_spec.default }}
+                    {%- else %}
+                        {{- ", // default: " + param_spec.default|tojson }}
+                    {%- endif -%}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- ",\n" }}
+                {%- else %}
+                    {{- ",\n" }}
+                {%- endif -%}
+            {%- endfor %}
+            {{- "}) => any;\n\n" }}
+        {%- else -%}
+            {{- "() => any;\n\n" }}
+        {%- endif -%}
+    {%- endfor %}
+    {{- "} // namespace " + namespace_name }}
+{%- endmacro -%}
+{%- macro render_builtin_tools(browser_tool, python_tool) -%}
+    {%- if browser_tool %}
+        {{- "## browser\n\n" }}
+        {{- "// Tool for browsing.\n" }}
+        {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.\n" }}
+        {{- "// Cite information from the tool using the following format:\n" }}
+        {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.\n" }}
+        {{- "// Do not quote more than 10 words directly from the tool output.\n" }}
+        {{- "// sources=web (default: web)\n" }}
+        {{- "namespace browser {\n\n" }}
+        {{- "// Searches for information related to `query` and displays `topn` results.\n" }}
+        {{- "type search = (_: {\n" }}
+        {{- "query: string,\n" }}
+        {{- "topn?: number, // default: 10\n" }}
+        {{- "source?: string,\n" }}
+        {{- "}) => any;\n\n" }}
+        {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.\n" }}
+        {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.\n" }}
+        {{- "// If `cursor` is not provided, the most recent page is implied.\n" }}
+        {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.\n" }}
+        {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.\n" }}
+        {{- "// Use this function without `id` to scroll to a new location of an opened page.\n" }}
+        {{- "type open = (_: {\n" }}
+        {{- "id?: number | string, // default: -1\n" }}
+        {{- "cursor?: number, // default: -1\n" }}
+        {{- "loc?: number, // default: -1\n" }}
+        {{- "num_lines?: number, // default: -1\n" }}
+        {{- "view_source?: boolean, // default: false\n" }}
+        {{- "source?: string,\n" }}
+        {{- "}) => any;\n\n" }}
+        {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.\n" }}
+        {{- "type find = (_: {\n" }}
+        {{- "pattern: string,\n" }}
+        {{- "cursor?: number, // default: -1\n" }}
+        {{- "}) => any;\n\n" }}
+        {{- "} // namespace browser\n\n" }}
+    {%- endif -%}
+    {%- if python_tool %}
+        {{- "## python\n\n" }}
+        {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).\n\n" }}
+        {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.\n\n" }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- System Message Construction ============================================ #}
+{%- macro build_system_message() -%}
+    {%- if model_identity is not defined %}
+        {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %}
+    {%- endif %}
+    {{- model_identity + "\n" }}
+    {{- "Knowledge cutoff: 2024-06\n" }}
+    {{- "Current date: " + strftime_now("%Y-%m-%d") + "\n\n" }}
+    {%- if reasoning_effort is not defined %}
+        {%- set reasoning_effort = "medium" %}
+    {%- endif %}
+    {{- "Reasoning: " + reasoning_effort + "\n\n" }}
+    {%- if builtin_tools %}
+        {{- "# Tools\n\n" }}
+        {%- set available_builtin_tools = namespace(browser=false, python=false) %}
+        {%- for tool in builtin_tools %}
+            {%- if tool == "browser" %}
+                {%- set available_builtin_tools.browser = true %}
+            {%- elif tool == "python" %}
+                {%- set available_builtin_tools.python = true %}
+            {%- endif %}
+        {%- endfor %}
+        {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }}
+    {%- endif -%}
+    {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }}
+    {%- if tools -%}
+        {{- "\nCalls to these tools must go to the commentary channel: 'functions'." }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- Main Template Logic ================================================= #}
+{#- Set defaults #}
+{#- Render system message #}
+{{- "<|start|>system<|message|>" }}
+{{- build_system_message() }}
+{{- "<|end|>" }}
+{#- Extract developer message #}
+{%- if messages[0].role == "developer" or messages[0].role == "system" %}
+    {%- set developer_message = messages[0].content %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set developer_message = "" %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{#- Render developer message #}
+{%- if developer_message or tools %}
+    {{- "<|start|>developer<|message|>" }}
+    {%- if developer_message %}
+        {{- "# Instructions\n\n" }}
+        {{- developer_message }}
+        {{- "\n\n" }}
+    {%- endif %}
+    {%- if tools -%}
+        {{- "# Tools\n\n" }}
+        {{- render_tool_namespace("functions", tools) }}
+    {%- endif -%}
+    {{- "<|end|>" }}
+{%- endif %}
+{#- Render messages #}
+{%- set last_tool_call = namespace(name=none) %}
+{%- for message in loop_messages -%}
+    {#- At this point only assistant/user/tool messages should remain #}
+    {%- if message.role == 'assistant' -%}
+        {#- Checks to ensure the messages are being passed in the format we expect #}
+        {%- if "content" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "thinking" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "tool_calls" in message %}
+            {#- We need very careful handling here - we want to drop the tool call analysis message if the model #}
+            {#- has output a later <|final|> message, but otherwise we want to retain it. This is the only case #}
+            {#- when we render CoT/analysis messages in inference. #}
+            {%- set future_final_message = namespace(found=false) %}
+            {%- for future_message in loop_messages[loop.index:] %}
+                {%- if future_message.role == 'assistant' and "tool_calls" not in future_message %}
+                    {%- set future_final_message.found = true %}
+                {%- endif %}
+            {%- endfor %}
+            {#- We assume max 1 tool call per message, and so we infer the tool call name #}
+            {#- in "tool" messages from the most recent assistant tool call name #}
+            {%- set tool_call = message.tool_calls[0] %}
+            {%- if tool_call.function %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {%- if message.content and message.thinking %}
+                {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
+            {%- elif message.content and not future_final_message.found %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
+            {%- elif message.thinking and not future_final_message.found %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
+            {{- "<|start|>assistant to=" }}
+            {{- "functions." + tool_call.name + "<|channel|>commentary " }}
+            {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
+            {{- tool_call.arguments|tojson }}
+            {{- "<|call|>" }}
+            {%- set last_tool_call.name = tool_call.name %}
+        {%- elif loop.last and not add_generation_prompt %}
+            {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
+            {#- This is a situation that should only occur in training, never in inference. #}
+            {%- if "thinking" in message %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
+            {#- <|return|> indicates the end of generation, but <|end|> does not #}
+            {#- <|return|> should never be an input to the model, but we include it as the final token #}
+            {#- when training, so the model learns to emit it. #}
+            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
+        {%- else %}
+            {#- CoT is dropped during all previous turns, so we never render it for inference #}
+            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
+            {%- set last_tool_call.name = none %}
+        {%- endif %}
+    {%- elif message.role == 'tool' -%}
+        {%- if last_tool_call.name is none %}
+            {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
+        {%- endif %}
+        {{- "<|start|>functions." + last_tool_call.name }}
+        {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }}
+    {%- elif message.role == 'user' -%}
+        {{- "<|start|>user<|message|>" + message.content + "<|end|>" }}
+    {%- endif -%}
+{%- endfor -%}
+{#- Generation prompt #}
+{%- if add_generation_prompt -%}
+<|start|>assistant
+{%- endif -%}

hallucination_detection/gpt-oss-20b/lora/io.yaml ADDED Viewed

	@@ -0,0 +1,81 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "$defs": {
+      "HallucinationOutputEntry": {
+        "properties": {
+          "r": {
+            "minimum": 0,
+            "title": "Sentence Num",
+            "type": "integer"
+          },
+          "f": {
+            "title": "Is Faithful",
+            "type": "string",
+            "enum": ["faithful", "partial", "unfaithful"]
+          },
+          "e": {
+            "title": "Reasoning",
+            "type": "string"
+          }
+        },
+        "required": [
+          "r",
+          "e",
+          "f"
+        ],
+        "title": "HallucinationOutputEntry",
+        "type": "object"
+      }
+    },
+    "items": {
+      "$ref": "#/$defs/HallucinationOutputEntry"
+    },
+    "title": "HallucinationOutput",
+    "type": "array"
+  }
+transformations:
+  # Use logprobs to replace "f" flag with a probability
+  - type: likelihood
+    categories_to_values:
+      "faithful": 1.0
+      "partial": 0.5
+      "unfaithful": 0.0
+    input_path: [~, "f"]  # Null in path means wildcard
+  # Replace sentence number with sentence location and contents
+  - type: decode_sentences
+    source: "last_message"
+    input_path: [~, "r"]  # Null in path means wildcard
+    # New fields to add for each sentence
+    output_names:
+      begin: "response_begin"
+      end: "response_end"
+      text: "response_text"
+  # Remove fields that we no longer need and rename some of the fields.
+  - type: project
+    input_path: []
+    retained_fields:
+      "response_begin": "response_begin"
+      "response_end": "response_end"
+      "response_text": "response_text"
+      "f": "faithfulness_likelihood"
+      "e": "explanation"
+instruction: >
+  Split the last assistant response into individual sentences.
+  For each sentence in the last assistant response, identify the faithfulness
+  by comparing with the provided documents and generate the faithfulness reasoning
+  and faithfulness decision.
+  Ensure that your output includes all response sentence IDs,
+  and for each response sentence ID, provide the corresponding faithfulness
+  reasoning and faithfulness decision.
+  The output must be a json structure.
+parameters:
+  # Current LoRA can be quite verbose in its explanations.
+  max_completion_tokens: 4096
+sentence_boundaries:
+  last_message: "i"
+# gpt-oss base model has no "documents" argument
+docs_as_message: json

hallucination_detection/gpt-oss-20b/lora/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|return|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

hallucination_detection/gpt-oss-20b/lora/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0614fe83cadab421296e664e1f48f4261fa8fef6e03e63bb75c20f38e37d07d3
+size 27868174

hallucination_detection/gpt-oss-20b/lora/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,185 @@

+{
+  "added_tokens_decoder": {
+    "199998": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "199999": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200000": {
+      "content": "<|reserved_200000|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200001": {
+      "content": "<|reserved_200001|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200002": {
+      "content": "<|return|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200003": {
+      "content": "<|constrain|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200004": {
+      "content": "<|reserved_200004|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200005": {
+      "content": "<|channel|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200006": {
+      "content": "<|start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200008": {
+      "content": "<|message|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200009": {
+      "content": "<|reserved_200009|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200010": {
+      "content": "<|reserved_200010|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200011": {
+      "content": "<|reserved_200011|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200012": {
+      "content": "<|call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200013": {
+      "content": "<|reserved_200013|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200014": {
+      "content": "<|reserved_200014|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200015": {
+      "content": "<|reserved_200015|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200016": {
+      "content": "<|reserved_200016|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200017": {
+      "content": "<|reserved_200017|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200018": {
+      "content": "<|endofprompt|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|return|>",
+  "extra_special_tokens": {},
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}

query_rewrite/README.md ADDED Viewed

The diff for this file is too large to render. See raw diff

query_rewrite/gpt-oss-20b/lora/adapter_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "openai/gpt-oss-20b",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "target_parameters": [
+    "7.mlp.experts.gate_up_proj",
+    "7.mlp.experts.down_proj",
+    "15.mlp.experts.gate_up_proj",
+    "15.mlp.experts.down_proj",
+    "23.mlp.experts.gate_up_proj",
+    "23.mlp.experts.down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

query_rewrite/gpt-oss-20b/lora/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fa15f38f74bc8f34b42bd252cc9b557455bcb33370bec17ea3cd38305d6acc0b
+size 219238968

query_rewrite/gpt-oss-20b/lora/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,397 @@

+{#-
+  In addition to the normal inputs of `messages` and `tools`, this template also accepts the
+  following kwargs:
+  - "builtin_tools": A list, can contain "browser" and/or "python".
+  - "model_identity": A string that optionally describes the model identity.
+  - "reasoning_effort": A string that describes the reasoning effort, defaults to "medium".
+ #}
+{#- Tool Definition Rendering ============================================== #}
+{%- macro render_typescript_type(param_spec, required_params, is_nullable=false) -%}
+    {%- if param_spec.type == "array" -%}
+        {%- if param_spec['items'] -%}
+            {%- if param_spec['items']['type'] == "string" -%}
+                {{- "string[]" }}
+            {%- elif param_spec['items']['type'] == "number" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "integer" -%}
+                {{- "number[]" }}
+            {%- elif param_spec['items']['type'] == "boolean" -%}
+                {{- "boolean[]" }}
+            {%- else -%}
+                {%- set inner_type = render_typescript_type(param_spec['items'], required_params) -%}
+                {%- if inner_type == "object | object" or inner_type|length > 50 -%}
+                    {{- "any[]" }}
+                {%- else -%}
+                    {{- inner_type + "[]" }}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- else -%}
+            {{- "any[]" }}
+            {%- if param_spec.nullable -%}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type is defined and param_spec.type is iterable and param_spec.type is not string and param_spec.type is not mapping and param_spec.type[0] is defined -%}
+        {#- Handle array of types like ["object", "object"] from Union[dict, list] #}
+        {%- if param_spec.type | length > 1 -%}
+            {{- param_spec.type | join(" | ") }}
+        {%- else -%}
+            {{- param_spec.type[0] }}
+        {%- endif -%}
+    {%- elif param_spec.oneOf -%}
+        {#- Handle oneOf schemas - check for complex unions and fallback to any #}
+        {%- set has_object_variants = false -%}
+        {%- for variant in param_spec.oneOf -%}
+            {%- if variant.type == "object" -%}
+                {%- set has_object_variants = true -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {%- if has_object_variants and param_spec.oneOf|length > 1 -%}
+            {{- "any" }}
+        {%- else -%}
+            {%- for variant in param_spec.oneOf -%}
+                {{- render_typescript_type(variant, required_params) -}}
+                {%- if variant.description %}
+                    {{- "// " + variant.description }}
+                {%- endif -%}
+                {%- if variant.default is defined %}
+                    {{ "// default: " + variant.default|tojson }}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- " | " }}
+                {% endif -%}
+            {%- endfor -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "string" -%}
+        {%- if param_spec.enum -%}
+            {{- '"' + param_spec.enum|join('" | "') + '"' -}}
+        {%- else -%}
+            {{- "string" }}
+            {%- if param_spec.nullable %}
+                {{- " | null" }}
+            {%- endif -%}
+        {%- endif -%}
+    {%- elif param_spec.type == "number" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "integer" -%}
+        {{- "number" }}
+    {%- elif param_spec.type == "boolean" -%}
+        {{- "boolean" }}
+    {%- elif param_spec.type == "object" -%}
+        {%- if param_spec.properties -%}
+            {{- "{
+" }}
+            {%- for prop_name, prop_spec in param_spec.properties.items() -%}
+                {{- prop_name -}}
+                {%- if prop_name not in (param_spec.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{ render_typescript_type(prop_spec, param_spec.required or []) }}
+                {%- if not loop.last -%}
+                    {{-", " }}
+                {%- endif -%}
+            {%- endfor -%}
+            {{- "}" }}
+        {%- else -%}
+            {{- "object" }}
+        {%- endif -%}
+    {%- else -%}
+        {{- "any" }}
+    {%- endif -%}
+{%- endmacro -%}
+{%- macro render_tool_namespace(namespace_name, tools) -%}
+    {{- "## " + namespace_name + "
+" }}
+    {{- "namespace " + namespace_name + " {
+" }}
+    {%- for tool in tools %}
+        {%- set tool = tool.function %}
+        {{- "// " + tool.description + "
+" }}
+        {{- "type "+ tool.name + " = " }}
+        {%- if tool.parameters and tool.parameters.properties %}
+            {{- "(_: {
+" }}
+            {%- for param_name, param_spec in tool.parameters.properties.items() %}
+                {%- if param_spec.description %}
+                    {{- "// " + param_spec.description + "
+" }}
+                {%- endif %}
+                {{- param_name }}
+                {%- if param_name not in (tool.parameters.required or []) -%}
+                    {{- "?" }}
+                {%- endif -%}
+                {{- ": " }}
+                {{- render_typescript_type(param_spec, tool.parameters.required or []) }}
+                {%- if param_spec.default is defined -%}
+                    {%- if param_spec.enum %}
+                        {{- ", // default: " + param_spec.default }}
+                    {%- elif param_spec.oneOf %}
+                        {{- "// default: " + param_spec.default }}
+                    {%- else %}
+                        {{- ", // default: " + param_spec.default|tojson }}
+                    {%- endif -%}
+                {%- endif -%}
+                {%- if not loop.last %}
+                    {{- ",
+" }}
+                {%- else %}
+                    {{- "
+" }}
+                {%- endif -%}
+            {%- endfor %}
+            {{- "}) => any;
+" }}
+        {%- else -%}
+            {{- "() => any;
+" }}
+        {%- endif -%}
+    {%- endfor %}
+    {{- "} // namespace " + namespace_name }}
+{%- endmacro -%}
+{%- macro render_builtin_tools(browser_tool, python_tool) -%}
+    {%- if browser_tool %}
+        {{- "## browser
+" }}
+        {{- "// Tool for browsing.
+" }}
+        {{- "// The `cursor` appears in brackets before each browsing display: `[{cursor}]`.
+" }}
+        {{- "// Cite information from the tool using the following format:
+" }}
+        {{- "// `【{cursor}†L{line_start}(-L{line_end})?】`, for example: `【6†L9-L11】` or `【8†L3】`.
+" }}
+        {{- "// Do not quote more than 10 words directly from the tool output.
+" }}
+        {{- "// sources=web (default: web)
+" }}
+        {{- "namespace browser {
+" }}
+        {{- "// Searches for information related to `query` and displays `topn` results.
+" }}
+        {{- "type search = (_: {
+" }}
+        {{- "query: string,
+" }}
+        {{- "topn?: number, // default: 10
+" }}
+        {{- "source?: string,
+" }}
+        {{- "}) => any;
+" }}
+        {{- "// Opens the link `id` from the page indicated by `cursor` starting at line number `loc`, showing `num_lines` lines.
+" }}
+        {{- "// Valid link ids are displayed with the formatting: `【{id}†.*】`.
+" }}
+        {{- "// If `cursor` is not provided, the most recent page is implied.
+" }}
+        {{- "// If `id` is a string, it is treated as a fully qualified URL associated with `source`.
+" }}
+        {{- "// If `loc` is not provided, the viewport will be positioned at the beginning of the document or centered on the most relevant passage, if available.
+" }}
+        {{- "// Use this function without `id` to scroll to a new location of an opened page.
+" }}
+        {{- "type open = (_: {
+" }}
+        {{- "id?: number | string, // default: -1
+" }}
+        {{- "cursor?: number, // default: -1
+" }}
+        {{- "loc?: number, // default: -1
+" }}
+        {{- "num_lines?: number, // default: -1
+" }}
+        {{- "view_source?: boolean, // default: false
+" }}
+        {{- "source?: string,
+" }}
+        {{- "}) => any;
+" }}
+        {{- "// Finds exact matches of `pattern` in the current page, or the page given by `cursor`.
+" }}
+        {{- "type find = (_: {
+" }}
+        {{- "pattern: string,
+" }}
+        {{- "cursor?: number, // default: -1
+" }}
+        {{- "}) => any;
+" }}
+        {{- "} // namespace browser
+" }}
+    {%- endif -%}
+    {%- if python_tool %}
+        {{- "## python
+" }}
+        {{- "Use this tool to execute Python code in your chain of thought. The code will not be shown to the user. This tool should be used for internal reasoning, but not for code that is intended to be visible to the user (e.g. when creating plots, tables, or files).
+" }}
+        {{- "When you send a message containing Python code to python, it will be executed in a stateful Jupyter notebook environment. python will respond with the output of the execution or time out after 120.0 seconds. The drive at '/mnt/data' can be used to save and persist user files. Internet access for this session is UNKNOWN. Depends on the cluster.
+" }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- System Message Construction ============================================ #}
+{%- macro build_system_message() -%}
+    {%- if model_identity is not defined %}
+        {%- set model_identity = "You are ChatGPT, a large language model trained by OpenAI." %}
+    {%- endif %}
+    {{- model_identity + "
+" }}
+    {{- "Knowledge cutoff: 2024-06
+" }}
+    {{- "Current date: " + strftime_now("%Y-%m-%d") + "
+" }}
+    {%- if reasoning_effort is not defined %}
+        {%- set reasoning_effort = "medium" %}
+    {%- endif %}
+    {{- "Reasoning: " + reasoning_effort + "
+" }}
+    {%- if builtin_tools %}
+        {{- "# Tools
+" }}
+        {%- set available_builtin_tools = namespace(browser=false, python=false) %}
+        {%- for tool in builtin_tools %}
+            {%- if tool == "browser" %}
+                {%- set available_builtin_tools.browser = true %}
+            {%- elif tool == "python" %}
+                {%- set available_builtin_tools.python = true %}
+            {%- endif %}
+        {%- endfor %}
+        {{- render_builtin_tools(available_builtin_tools.browser, available_builtin_tools.python) }}
+    {%- endif -%}
+    {{- "# Valid channels: analysis, commentary, final. Channel must be included for every message." }}
+    {%- if tools -%}
+        {{- "
+Calls to these tools must go to the commentary channel: 'functions'." }}
+    {%- endif -%}
+{%- endmacro -%}
+{#- Main Template Logic ================================================= #}
+{#- Set defaults #}
+{#- Render system message #}
+{{- "<|start|>system<|message|>" }}
+{{- build_system_message() }}
+{{- "<|end|>" }}
+{#- Extract developer message #}
+{%- if messages[0].role == "developer" or messages[0].role == "system" %}
+    {%- set developer_message = messages[0].content %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set developer_message = "" %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{#- Render developer message #}
+{%- if developer_message or tools %}
+    {{- "<|start|>developer<|message|>" }}
+    {%- if developer_message %}
+        {{- "# Instructions
+" }}
+        {{- developer_message }}
+    {%- endif %}
+    {%- if tools -%}
+        {{- "
+" }}
+        {{- "# Tools
+" }}
+        {{- render_tool_namespace("functions", tools) }}
+    {%- endif -%}
+    {{- "<|end|>" }}
+{%- endif %}
+{#- Render messages #}
+{%- set last_tool_call = namespace(name=none) %}
+{%- for message in loop_messages -%}
+    {#- At this point only assistant/user/tool messages should remain #}
+    {%- if message.role == 'assistant' -%}
+        {#- Checks to ensure the messages are being passed in the format we expect #}
+        {%- if "content" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.content or "<|channel|>final<|message|>" in message.content %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the content field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "thinking" in message %}
+            {%- if "<|channel|>analysis<|message|>" in message.thinking or "<|channel|>final<|message|>" in message.thinking %}
+                {{- raise_exception("You have passed a message containing <|channel|> tags in the thinking field. Instead of doing this, you should pass analysis messages (the string between '<|message|>' and '<|end|>') in the 'thinking' field, and final messages (the string between '<|message|>' and '<|end|>') in the 'content' field.") }}
+            {%- endif %}
+        {%- endif %}
+        {%- if "tool_calls" in message %}
+            {#- We assume max 1 tool call per message, and so we infer the tool call name #}
+            {#- in "tool" messages from the most recent assistant tool call name #}
+            {%- set tool_call = message.tool_calls[0] %}
+            {%- if tool_call.function %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {%- if message.content and message.thinking %}
+                {{- raise_exception("Cannot pass both content and thinking in an assistant message with tool calls! Put the analysis message in one or the other, but not both.") }}
+            {%- elif message.content %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.content + "<|end|>" }}
+            {%- elif message.thinking %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
+            {{- "<|start|>assistant to=" }}
+            {{- "functions." + tool_call.name + "<|channel|>commentary " }}
+            {{- (tool_call.content_type if tool_call.content_type is defined else "json") + "<|message|>" }}
+            {{- tool_call.arguments|tojson }}
+            {{- "<|call|>" }}
+            {%- set last_tool_call.name = tool_call.name %}
+        {%- elif loop.last and not add_generation_prompt %}
+            {#- Only render the CoT if the final turn is an assistant turn and add_generation_prompt is false #}
+            {#- This is a situation that should only occur in training, never in inference. #}
+            {%- if "thinking" in message %}
+                {{- "<|start|>assistant<|channel|>analysis<|message|>" + message.thinking + "<|end|>" }}
+            {%- endif %}
+            {#- <|return|> indicates the end of generation, but <|end|> does not #}
+            {#- <|return|> should never be an input to the model, but we include it as the final token #}
+            {#- when training, so the model learns to emit it. #}
+            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|return|>" }}
+        {%- else %}
+            {#- CoT is dropped during all previous turns, so we never render it for inference #}
+            {{- "<|start|>assistant<|channel|>final<|message|>" + message.content + "<|end|>" }}
+            {%- set last_tool_call.name = none %}
+        {%- endif %}
+    {%- elif message.role == 'tool' -%}
+        {%- if last_tool_call.name is none %}
+            {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
+        {%- endif %}
+        {{- "<|start|>functions." + last_tool_call.name }}
+        {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }}
+    {%- elif message.role == 'user' -%}
+        {{- "<|start|>user<|message|>" + message.content + "<|end|>" }}
+    {%- endif -%}
+{%- endfor -%}
+{#- Generation prompt #}
+{%- if add_generation_prompt -%}
+<|start|>assistant
+{%- endif -%}

query_rewrite/gpt-oss-20b/lora/io.yaml ADDED Viewed

	@@ -0,0 +1,22 @@

+# Model name string, or null to use whatever is provided in the chat completion request
+model: ~
+# JSON schema of the model's output
+response_format: |
+  {
+    "properties": {
+      "rewritten_question": {
+        "title": "Rewritten Question",
+        "type": "string"
+      }
+    },
+    "required": [
+      "rewritten_question"
+    ],
+    "title": "QueryRewriteOutput",
+    "type": "object"
+  }
+transformations: ~
+instruction: ~
+parameters:
+  max_completion_tokens: 1024
+sentence_boundaries: false

query_rewrite/gpt-oss-20b/lora/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|return|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|return|>"
+}

query_rewrite/gpt-oss-20b/lora/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0614fe83cadab421296e664e1f48f4261fa8fef6e03e63bb75c20f38e37d07d3
+size 27868174

query_rewrite/gpt-oss-20b/lora/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,184 @@

+{
+  "added_tokens_decoder": {
+    "199998": {
+      "content": "<|startoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "199999": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200000": {
+      "content": "<|reserved_200000|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200001": {
+      "content": "<|reserved_200001|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200002": {
+      "content": "<|return|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200003": {
+      "content": "<|constrain|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200004": {
+      "content": "<|reserved_200004|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200005": {
+      "content": "<|channel|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200006": {
+      "content": "<|start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200007": {
+      "content": "<|end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200008": {
+      "content": "<|message|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200009": {
+      "content": "<|reserved_200009|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200010": {
+      "content": "<|reserved_200010|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200011": {
+      "content": "<|reserved_200011|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200012": {
+      "content": "<|call|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200013": {
+      "content": "<|reserved_200013|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200014": {
+      "content": "<|reserved_200014|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200015": {
+      "content": "<|reserved_200015|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200016": {
+      "content": "<|reserved_200016|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200017": {
+      "content": "<|reserved_200017|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "200018": {
+      "content": "<|endofprompt|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|startoftext|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|return|>",
+  "extra_special_tokens": {},
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|return|>",
+  "padding_side": "left",
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}

run_vllm.sh ADDED Viewed

	@@ -0,0 +1,45 @@

+#! /bin/bash
+################################################################################
+# Shell script that starts a copy of vLLM with a base model plus all the
+# available LoRA adapters in this repository.
+#
+# To run this script:
+# 1. Install an appropriate build of vLLM for your machine (`pip install vllm`)
+# 2. Install the Hugging Face CLI (`pip install -U "huggingface_hub[cli]"`)
+# 3. Download the intrinsics library by running:
+#    hf download ibm-granite/rag-intrinsics-lib --local-dir ./rag-intrinsics-lib
+# 4. Edit the constants BASE_MODEL_NAME, BASE_MODEL_ORG, and PORT as needed
+# 5. Run this script from the root of your local copy of rag-intrinsics-lib.
+################################################################################
+BASE_MODEL_NAME=gpt-oss-20b
+BASE_MODEL_ORG=openai
+PORT=55555
+export VLLM_API_KEY=rag_intrinsics_1234
+# Find all LoRA adapters for the target base model.
+LORAS=""
+for item in "."/*; do
+    # Remove the "./"
+    name=$(basename -- "${item}")
+    if [ -d "./${name}/${BASE_MODEL_NAME}/lora" ]; then
+        LORAS+="${name}=./${name}/${BASE_MODEL_NAME}/lora "
+    fi
+done
+CMD="vllm serve ${BASE_MODEL_ORG}/${BASE_MODEL_NAME} \
+    --port ${PORT} \
+    --gpu-memory-utilization 0.45 \
+    --max-model-len 8192 \
+    --enable-lora \
+    --max_lora_rank 64 \
+    --lora-modules $LORAS"
+echo $CMD
+$CMD