06-大语言模型（LLM）与应用——指令微调（Instruction Tuning）

指令微调（Instruction Tuning）是通过对预训练模型进行微调，使其能够理解和执行人类指令的技术。本文介绍了指令微调的核心概念、数据构建方法和应用场景。主要内容包括：1）指令微调与预训练的区别，前者使模型能遵循指令而非仅补全文本；2）指令数据的常见格式（纯指令、指令+输入、对话格式）和构建方法（人工标注、Self-Instruct等）；3）任务多样性涵盖生成、理解、推理等多种类型。典型

xiaotao131

178人浏览 · 2026-05-02 00:10:46

xiaotao131 · 2026-05-02 00:10:46 发布

在这里插入图片描述

指令微调（Instruction Tuning）（指令数据构建、SFT、Alpaca/Vicuna）

一、指令微调概述

1.1 什么是指令微调？

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle, FancyBboxPatch
import warnings
warnings.filterwarnings('ignore')

print("=" * 60)
print("指令微调：让模型理解人类指令")
print("=" * 60)

# 预训练 vs 指令微调对比
fig, axes = plt.subplots(1, 2, figsize=(14, 6))

# 预训练模型
ax1 = axes[0]
ax1.axis('off')
ax1.set_title('预训练模型', fontsize=11)

# 示例
ax1.text(0.5, 0.8, '预训练: 完形填空/文本补全', ha='center', fontsize=9,
        bbox=dict(boxstyle='round', facecolor='lightgray'))

pre_train_examples = [
    ("输入: 中国的首都是[MASK]", "输出: 北京"),
    ("输入: I love you because you are [MASK]", "输出: kind"),
]

y_pos = 0.6
for text, output in pre_train_examples:
    ax1.text(0.1, y_pos, text, fontsize=8)
    ax1.text(0.7, y_pos, output, fontsize=8, color='green')
    y_pos -= 0.1

ax1.text(0.5, 0.3, "模型学会了语言模式\n但不会遵循指令", ha='center', fontsize=8,
        bbox=dict(boxstyle='round', facecolor='lightcoral'))

# 指令微调模型
ax2 = axes[1]
ax2.axis('off')
ax2.set_title('指令微调模型', fontsize=11)

ax2.text(0.5, 0.8, '指令微调: 遵循人类指令', ha='center', fontsize=9,
        bbox=dict(boxstyle='round', facecolor='lightgray'))

instruction_examples = [
    ("指令: 请告诉我中国的首都是什么？", "回答: 中国的首都是北京。"),
    ("指令: 将'Hello'翻译成中文", "回答: 你好"),
]

y_pos = 0.6
for text, output in instruction_examples:
    ax2.text(0.1, y_pos, text, fontsize=8)
    ax2.text(0.7, y_pos, output, fontsize=8, color='green')
    y_pos -= 0.1

ax2.text(0.5, 0.3, "模型能理解并执行人类指令", ha='center', fontsize=8,
        bbox=dict(boxstyle='round', facecolor='lightgreen'))

plt.suptitle('预训练模型 vs 指令微调模型', fontsize=14)
plt.tight_layout()
plt.show()

print("\n💡 指令微调定义:")
print("   使用(指令, 输出)对微调预训练模型，使其能够理解并遵循人类指令")
print("\n📊 核心价值:")
print("   - 让模型学会\"听人话\"")
print("   - 提升零样本能力")
print("   - 对齐人类意图")

二、指令数据构建

2.1 数据格式

def instruction_data_format():
    """指令数据格式"""
    
    print("\n" + "=" * 60)
    print("指令数据格式")
    print("=" * 60)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 数据格式
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('指令数据格式', fontsize=11)
    
    formats = """
    📝 格式1: 仅指令
    {
        "instruction": "解释什么是机器学习",
        "output": "机器学习是人工智能的一个分支..."
    }
    
    📝 格式2: 指令+输入
    {
        "instruction": "翻译以下句子",
        "input": "Hello, world!",
        "output": "你好，世界！"
    }
    
    📝 格式3: 对话格式
    {
        "conversations": [
            {"role": "user", "content": "什么是AI？"},
            {"role": "assistant", "content": "AI是人工智能..."}
        ]
    }
    """
    
    ax1.text(0.05, 0.95, formats, transform=ax1.transAxes, fontsize=8,
            verticalalignment='top', fontfamily='monospace')
    
    # 任务多样性
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('任务类型多样性', fontsize=11)
    
    tasks = {
        '生成': ['写作', '翻译', '摘要', '代码生成'],
        '理解': ['分类', '情感分析', '实体识别'],
        '推理': ['数学', '逻辑推理', '常识推理'],
        '对话': ['问答', '闲聊', '角色扮演'],
        '转换': ['格式转换', '风格转换', '语言转换']
    }
    
    y_pos = 0.75
    for task_type, examples in tasks.items():
        ax2.text(0.05, y_pos, f"• {task_type}:", fontsize=9, fontweight='bold')
        ax2.text(0.25, y_pos, ', '.join(examples), fontsize=8)
        y_pos -= 0.1
    
    plt.suptitle('指令数据格式与任务多样性', fontsize=12)
    plt.tight_layout()
    plt.show()

instruction_data_format()

2.2 数据构建方法

def data_construction():
    """指令数据构建方法"""
    
    print("\n" + "=" * 60)
    print("指令数据构建方法")
    print("=" * 60)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # 构建方法
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('数据构建方法', fontsize=11)
    
    methods = [
        ('人工标注', '高质量，成本高', 'lightgreen'),
        ('Self-Instruct', '自动化，需种子', 'lightblue'),
        ('Evol-Instruct', '迭代进化，多样性高', 'lightcoral'),
        ('公开数据集', '现成可用，质量不一', 'lightyellow'),
    ]
    
    y_pos = 0.75
    for name, desc, color in methods:
        box = FancyBboxPatch((0.1, y_pos-0.04), 0.8, 0.08,
                            boxstyle="round,pad=0.02",
                            facecolor=color, ec='black')
        ax1.add_patch(box)
        ax1.text(0.2, y_pos, name, fontsize=9, fontweight='bold')
        ax1.text(0.5, y_pos, desc, fontsize=8)
        y_pos -= 0.12
    
    # Self-Instruct流程
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('Self-Instruct流程', fontsize=11)
    
    steps = [
        ("种子指令", 0.2, 0.7),
        ("LLM生成\n新指令", 0.5, 0.7),
        ("过滤", 0.5, 0.45),
        ("生成输出", 0.8, 0.7),
        ("加入种子集", 0.8, 0.45),
    ]
    
    for label, x, y in steps:
        circle = plt.Circle((x, y), 0.07, color='lightblue', ec='black')
        ax2.add_patch(circle)
        ax2.text(x, y, label, ha='center', va='center', fontsize=6)
    
    # 连接
    ax2.annotate('', xy=(0.43, 0.7), xytext=(0.27, 0.7), arrowprops=dict(arrowstyle='->', lw=1))
    ax2.annotate('', xy=(0.57, 0.7), xytext=(0.5, 0.57), arrowprops=dict(arrowstyle='->', lw=1))
    ax2.annotate('', xy=(0.73, 0.7), xytext=(0.57, 0.7), arrowprops=dict(arrowstyle='->', lw=1))
    ax2.annotate('', xy=(0.73, 0.45), xytext=(0.73, 0.63), arrowprops=dict(arrowstyle='->', lw=1))
    ax2.annotate('', xy=(0.57, 0.45), xytext=(0.63, 0.45), arrowprops=dict(arrowstyle='->', lw=1))
    
    plt.suptitle('指令数据构建方法', fontsize=12)
    plt.tight_layout()
    plt.show()

data_construction()

三、监督微调（SFT）

3.1 SFT原理

def sft_principle():
    """SFT原理"""
    
    print("\n" + "=" * 60)
    print("监督微调（Supervised Fine-Tuning, SFT）")
    print("=" * 60)
    
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.axis('off')
    
    # 训练流程
    steps = [
        ("预训练模型", 0.2, 0.7),
        ("指令数据集", 0.5, 0.7),
        ("SFT训练", 0.5, 0.4),
        ("指令模型", 0.8, 0.4),
    ]
    
    for label, x, y in steps:
        circle = plt.Circle((x, y), 0.1, color='lightblue', ec='black')
        ax.add_patch(circle)
        ax.text(x, y, label, ha='center', va='center', fontsize=8)
    
    # 箭头
    ax.annotate('', xy=(0.4, 0.7), xytext=(0.3, 0.7), arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.5, 0.6), xytext=(0.5, 0.5), arrowprops=dict(arrowstyle='->', lw=2))
    ax.annotate('', xy=(0.7, 0.4), xytext=(0.6, 0.4), arrowprops=dict(arrowstyle='->', lw=2))
    
    # 损失函数
    ax.text(0.5, 0.2, '损失函数: L = -Σ log P(output_i | instruction_i, input_i)', 
            ha='center', fontsize=9,
            bbox=dict(boxstyle='round', facecolor='lightyellow'))
    
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_title('SFT训练流程', fontsize=14)
    
    plt.tight_layout()
    plt.show()
    
    print("\n📐 SFT损失函数:")
    print("   L = - Σ log P(y_i | x_i, θ)")
    print("   其中 x_i 是指令/输入，y_i 是期望输出")
    print("\n💡 SFT关键点:")
    print("   - 使用交叉熵损失")
    print("   - 只计算输出部分的损失")
    print("   - 可冻结部分层")

sft_principle()

四、Alpaca

4.1 Alpaca原理

def alpaca():
    """Alpaca模型"""
    
    print("\n" + "=" * 60)
    print("Alpaca：斯坦福的指令微调模型")
    print("=" * 60)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Alpaca流程
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('Alpaca数据生成流程', fontsize=11)
    
    steps = [
        ("种子任务\n(175个)", 0.2, 0.7),
        ("text-davinci-003\n生成指令", 0.5, 0.7),
        ("52K指令数据", 0.8, 0.7),
        ("LLaMA 7B\nSFT", 0.8, 0.4),
        ("Alpaca模型", 0.8, 0.2),
    ]
    
    for label, x, y in steps:
        circle = plt.Circle((x, y), 0.08, color='lightblue', ec='black')
        ax.add_patch(circle)
        ax.text(x, y, label, ha='center', va='center', fontsize=6)
    
    # 箭头
    ax1.annotate('', xy=(0.42, 0.7), xytext=(0.28, 0.7), arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.72, 0.7), xytext=(0.58, 0.7), arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.8, 0.62), xytext=(0.8, 0.48), arrowprops=dict(arrowstyle='->', lw=1))
    ax1.annotate('', xy=(0.8, 0.32), xytext=(0.8, 0.28), arrowprops=dict(arrowstyle='->', lw=1))
    
    # 数据统计
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('Alpaca数据统计', fontsize=11)
    
    stats = """
    📊 Alpaca数据统计:
    
    ┌─────────────────────────────────────────┐
    │  总指令数: 52,000                       │
    │  种子任务: 175                          │
    │  训练时间: 3小时 (8×A100)               │
    │  模型大小: 7B                           │
    │  成本: < $600                          │
    └─────────────────────────────────────────┘
    
    📝 指令示例:
    {
        "instruction": "列出5个职业",
        "input": "",
        "output": "1. 医生\\n2. 工程师..."
    }
    """
    
    ax2.text(0.05, 0.95, stats, transform=ax2.transAxes, fontsize=8,
            verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('Alpaca：低成本指令微调', fontsize=12)
    plt.tight_layout()
    plt.show()
    
    print("\n💡 Alpaca贡献:")
    print("   - 证明小模型+高质量指令数据可达好效果")
    print("   - 低成本复现ChatGPT能力")
    print("   - 开源模型生态的里程碑")

alpaca()

五、Vicuna

5.1 Vicuna原理

def vicuna():
    """Vicuna模型"""
    
    print("\n" + "=" * 60)
    print("Vicuna：对话优化模型")
    print("=" * 60)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 6))
    
    # Vicuna vs Alpaca
    ax1 = axes[0]
    ax1.axis('off')
    ax1.set_title('Vicuna vs Alpaca', fontsize=11)
    
    comparison = """
    ╔══════════════╦════════════════════════════════════════╗
    ║    方面      ║              对比                      ║
    ╠══════════════╬════════════════════════════════════════╣
    ║ 基础模型     ║ 都是 LLaMA                             ║
    ║ 数据来源     ║ Alpaca: GPT-3.5生成                   ║
    ║              ║ Vicuna: ShareGPT用户对话              ║
    ║ 数据量       ║ Alpaca: 52K                           ║
    ║              ║ Vicuna: 70K对话                       ║
    ║ 训练方式     ║ Alpaca: 仅指令                        ║
    ║              ║ Vicuna: 多轮对话                      ║
    ║ 对话能力     ║ Vicuna 更强                          ║
    ╚══════════════╩════════════════════════════════════════╝
    """
    
    ax1.text(0.05, 0.95, comparison, transform=ax1.transAxes, fontsize=8,
            verticalalignment='top', fontfamily='monospace')
    
    # 训练细节
    ax2 = axes[1]
    ax2.axis('off')
    ax2.set_title('Vicuna训练细节', fontsize=11)
    
    details = """
    📊 Vicuna训练配置:
    
    • 基础模型: LLaMA 13B
    • 数据: ShareGPT 70K对话
    • 训练: 8×A100, 1天
    • 上下文: 2048 tokens
    • 成本: ~$300
    
    🔧 关键改进:
    • 多轮对话格式
    • 记忆增强
    • 长上下文处理
    """
    
    ax2.text(0.05, 0.95, details, transform=ax2.transAxes, fontsize=8,
            verticalalignment='top', fontfamily='monospace')
    
    plt.suptitle('Vicuna：对话优化模型', fontsize=12)
    plt.tight_layout()
    plt.show()
    
    print("\n💡 Vicuna创新:")
    print("   - 使用真实用户对话数据")
    print("   - 多轮对话训练")
    print("   - 更强的对话能力")

vicuna()

六、SFT实战

6.1 微调代码示例

def sft_code():
    """SFT代码示例"""
    
    print("\n" + "=" * 60)
    print("SFT代码示例")
    print("=" * 60)
    
    code = """
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset

# 1. 加载模型和分词器
model_name = "meta-llama/Llama-2-7b-hf"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

# 2. 添加特殊token
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
model.resize_token_embeddings(len(tokenizer))

# 3. 准备数据
def format_instruction(example):
    """格式化指令数据"""
    if example.get("input"):
        text = f"### Instruction:\\n{example['instruction']}\\n\\n"
        text += f"### Input:\\n{example['input']}\\n\\n"
        text += f"### Response:\\n{example['output']}"
    else:
        text = f"### Instruction:\\n{example['instruction']}\\n\\n"
        text += f"### Response:\\n{example['output']}"
    return text

# 4. 数据预处理
def preprocess_function(examples):
    texts = [format_instruction(example) for example in examples]
    tokenized = tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )
    tokenized["labels"] = tokenized["input_ids"].clone()
    return tokenized

# 5. 加载数据集
dataset = Dataset.from_list(train_data)
tokenized_dataset = dataset.map(preprocess_function, batched=True)

# 6. 训练参数
training_args = TrainingArguments(
    output_dir="./sft_model",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=100,
    logging_steps=10,
    save_steps=500,
    learning_rate=2e-5,
    fp16=True,
    report_to="tensorboard"
)

# 7. 创建Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)

# 8. 训练
trainer.train()

# 9. 保存模型
model.save_pretrained("./sft_model")
tokenizer.save_pretrained("./sft_model")

# 10. 推理
def generate_response(instruction, input_text=""):
    prompt = f"### Instruction:\\n{instruction}\\n\\n"
    if input_text:
        prompt += f"### Input:\\n{input_text}\\n\\n"
    prompt += "### Response:\\n"
    
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        temperature=0.7,
        do_sample=True,
        top_p=0.9
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("### Response:\\n")[-1]
"""
    
    print(code)

sft_code()

七、总结

模型	基础模型	数据来源	数据量	特点
Alpaca	LLaMA 7B	GPT-3.5生成	52K	低成本、指令跟随
Vicuna	LLaMA 13B	ShareGPT对话	70K	对话能力强
WizardLM	LLaMA	Evol-Instruct	70K	复杂指令

指令微调关键要素：

数据质量 > 数据数量
任务多样性重要
对话数据提升交互能力
训练细节影响效果

AI Agent技术社区

Agent 垂直技术社区，欢迎活跃、内容共建。

更多推荐

Agent 工程中的模型缓存优化经验分享

AI Agent技术社区

CC-Switch不只是切换API：从GitHub更新日志看懂它的功能和底层原理

CC Switch：从配置切换器到AI编程统一管理平台摘要： CC Switch已从最初的Claude Code/Codex供应商切换工具，发展为功能全面的AI编程管理平台。它通过统一界面管理多个AI编程工具（Claude Code、Codex、Gemini CLI等）的配置，支持供应商切换、本地代理路由、跨工具能力同步等功能。核心演进包括：采用SSOT架构集中管理供应商数据、扩展支持6+工具、

AI Agent技术社区

转载--Hermes Agent 05 | 记忆系统（上）：内置记忆的冻结快照模式与 agent-curated 策展

找到匹配的会话后，不是直接把原始对话扔回给主模型——那太长了。它用一个辅助模型（Gemini Flash）对每个匹配的会话做摘要（）：加载匹配会话的完整对话记录以匹配位置为中心，截断到 ~100,000 字符（发给 Gemini Flash，用一个聚焦的 summarization prompt 生成摘要返回带元数据的摘要结果用便宜的辅助模型（Gemini Flash）来压缩长对话，再把短摘要喂给