File size: 6,622 Bytes
43deb41
16e8ca7
dd4af65
 
 
 
 
 
 
 
2777673
dd4af65
 
 
 
b1ea9b8
3847776
 
a6d735c
096bfbe
d6a8c19
c113853
0d79fa8
60e94d4
 
 
 
a6d735c
c10bb8e
fea213a
63e2f0b
 
 
 
0d79fa8
a6d735c
9f2413e
 
 
 
 
cc74931
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
---
license: apache-2.0
datasets:
- UniverseTBD/arxiv-qa-astro-ph
language:
- en
metrics:
- bleu
base_model:
- unsloth/Qwen3-1.7B-unsloth-bnb-4bit
pipeline_tag: text-generation
tags:
- astrophysics
- unsloth
- PEFT
library_name: peft
---

## Information
- **Developer:** [luminolous](https://huggingface.co/luminolous)
- **Finetuned from model:** [Qwen3-1.7B-unsloth-bnb-4bit](https://huggingface.co/unsloth/Qwen3-1.7B-unsloth-bnb-4bit)
- **License:** `Apache license 2.0`

This model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth).

[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)

## Evaluation Score
| Metrics       | Base  | LoRA   |
|--------------|:-----:|:------:|
| BLEU     | 16.10 | **38.95** |
| ROUGE-1  | 0.299 | **0.323** |
| ROUGE-2  | 0.090 | **0.091** |
| ROUGE-L  | 0.218 | **0.230** |

## How to use
1) You need to install some libraries
```markdown
!pip install -U bitsandbytes transformers peft accelerate
```
2) You can run this code
```python
import re, torch
from typing import List, Optional
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig
from peft import PeftModel, PeftConfig

_THINK_RE     = re.compile(r"(?is)<think>.*?</think>")
_LEAD_ASSIST  = re.compile(r"(?is)^.*?\bassistant\b[:\-]?\s*")

def clean_text(s: str) -> str:
    s = _THINK_RE.sub("", s)
    s = _LEAD_ASSIST.sub("", s.strip(), count=1)
    return s.strip()

def load_model_and_tokenizer(
    base_model: Optional[str],
    adapter_repo: str,
    max_seq_len: int = 4096,
    load_in_4bit: bool = True,
    compute_dtype: torch.dtype = torch.bfloat16,
):
    peft_cfg = PeftConfig.from_pretrained(adapter_repo)
    suggested_base = getattr(peft_cfg, "base_model_name_or_path", None)

    if base_model is None:
        base_model = suggested_base
        print(f"[info] Using base from adapter config: {base_model}")
    elif suggested_base and (base_model != suggested_base):
        print(f"[warn] Adapter expects base '{suggested_base}', "
              f"but you set '{base_model}'. Make sure they match!")

    quant_cfg = None
    if load_in_4bit:
        quant_cfg = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_compute_dtype=compute_dtype,
            bnb_4bit_quant_type="nf4",
        )

    tok = AutoTokenizer.from_pretrained(base_model, use_fast=True, trust_remote_code=True)

    if tok.pad_token_id is None and tok.eos_token_id is not None:
        tok.pad_token = tok.eos_token
        tok.pad_token_id = tok.eos_token_id

    tok.padding_side    = "left"
    tok.truncation_side = "left"

    model = AutoModelForCausalLM.from_pretrained(
        base_model,
        device_map="auto",
        torch_dtype="auto" if not load_in_4bit else None,
        quantization_config=quant_cfg,
        trust_remote_code=True,
    )

    model = PeftModel.from_pretrained(model, adapter_repo)

    if getattr(model, "generation_config", None) is not None and tok.pad_token_id is not None:
        model.generation_config.pad_token_id = tok.pad_token_id

    return model, tok

def build_chat(tok, user_text: str, system_text: Optional[str] = None) -> str:
    messages = []
    if system_text:
        messages.append({"role": "system", "content": system_text})
    messages.append({"role": "user", "content": user_text})
    prompt = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return prompt

@torch.inference_mode()
def generate_one(
    model,
    tok,
    user_text: str,
    system_text: str = "Answer concisely, straight to the point, no <think>.",
    max_new_tokens: int = 200,
    temperature: float = 0.7,
    top_p: float = 0.9,
):
    prompt = build_chat(tok, user_text, system_text)
    device = next(model.parameters()).device
    inputs = tok(prompt, return_tensors="pt").to(device)
    in_len = inputs["input_ids"].shape[1]

    im_end_id = tok.convert_tokens_to_ids("<|im_end|>")
    eos_ids = [i for i in [tok.eos_token_id, im_end_id] if i is not None]
    eos_ids = eos_ids[0] if len(eos_ids) == 1 else eos_ids

    out = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=(temperature is not None and temperature > 0),
        temperature=temperature,
        top_p=top_p,
        num_beams=1,
        eos_token_id=eos_ids,
        pad_token_id=tok.pad_token_id,
        use_cache=True,
        no_repeat_ngram_size=3,
        repetition_penalty=1.1,
    )[0]

    gen_ids = out[in_len:]
    text = tok.decode(gen_ids, skip_special_tokens=True)
    return clean_text(text)

@torch.inference_mode()
def generate_batch(
    model,
    tok,
    user_texts: List[str],
    system_text: Optional[str] = None,
    max_new_tokens: int = 200,
    temperature: float = 0.7,
    top_p: float = 0.9,
    batch_size: int = 8,
):
    device = next(model.parameters()).device
    im_end_id = tok.convert_tokens_to_ids("<|im_end|>")
    eos_ids = [i for i in [tok.eos_token_id, im_end_id] if i is not None]
    eos_ids = eos_ids[0] if len(eos_ids) == 1 else eos_ids

    answers = []
    for i in range(0, len(user_texts), batch_size):
        chunk = user_texts[i:i + batch_size]
        prompts = [build_chat(tok, u, system_text) for u in chunk]
        toks = tok(prompts, return_tensors="pt", padding=True).to(device)
        in_lens = toks["attention_mask"].sum(dim=1).tolist()

        outs = model.generate(
            **toks,
            max_new_tokens=max_new_tokens,
            do_sample=(temperature is not None and temperature > 0),
            temperature=temperature,
            top_p=top_p,
            num_beams=1,
            eos_token_id=eos_ids,
            pad_token_id=tok.pad_token_id,
            use_cache=True,
            no_repeat_ngram_size=3,
            repetition_penalty=1.1,
        )

        for out, L in zip(outs, in_lens):
            ans = tok.decode(out[L:], skip_special_tokens=True)
            answers.append(clean_text(ans))
    return answers

if __name__ == "__main__":
    adapter = "luminolous/astropher-lora"
    base = "unsloth/Qwen3-1.7B-unsloth-bnb-4bit"

    model, tok = load_model_and_tokenizer(
        base_model=base,
        adapter_repo=adapter,
        max_seq_len=4096,
        load_in_4bit=True,
        compute_dtype=torch.bfloat16,
    )

    q = "What is inside a black hole?" # <- You can change the question here
    print(f"\nModel output: {generate_one(model, tok, q, max_new_tokens=180)}")

```