书生实战营第四期-基础岛第五关-XTuner 微调个人小助手认知

基础任务

使用 XTuner 微调 InternLM2-Chat-7B 实现自己的小助手认知

一、环境配置与数据准备

1.构建虚拟环境

cd ~
#git clone 本repo
git clone https://github.com/InternLM/Tutorial.git -b camp4
mkdir -p /root/finetune && cd /root/finetune
conda create -n xtuner-env python=3.10 -y
conda activate xtuner-env

2.安装XTuner

git clone https://github.com/InternLM/xtuner.git
cd /root/finetune/xtuner

pip install -e '.[all]'
pip install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121
pip install transformers==4.39.0

3.验证安装

xtuner list-cfg

二、修改提供的数据

1.创建存储微调数据

mkdir -p /root/finetune/data && cd /root/finetune/data
cp -r /root/Tutorial/data/assistant_Tuner.jsonl /root/finetune/data

2.创建修改脚本

# 创建 `change_script.py` 文件
touch /root/finetune/data/change_script.py

3.执行脚本

# usage：python change_script.py {input_file.jsonl} {output_file.jsonl}
cd ~/finetune/data
python change_script.py ./assistant_Tuner.jsonl ./assistant_Tuner_change.jsonl

4.查看数据

cat assistant_Tuner_change.jsonl | head -n 3

三、训练启动

1.复制模型

mkdir /root/finetune/models

ln -s /root/share/new_models/Shanghai_AI_Laboratory/internlm2_5-7b-chat /root/finetune/models/internlm2_5-7b-chat

2.修改 Config

# cd {path/to/finetune}
cd /root/finetune
mkdir ./config
cd config
xtuner copy-cfg internlm2_5_chat_7b_qlora_alpaca_e3 ./

3.启动微调

cd /root/finetune
conda activate xtuner-env

xtuner train ./config/internlm2_5_chat_7b_qlora_alpaca_e3_copy.py --deepspeed deepspeed_zero2 --work-dir ./work_dirs/assistTuner

4.权重转换

cd /root/finetune/work_dirs/assistTuner

conda activate xtuner-env

# 先获取最后保存的一个pth文件
pth_file=`ls -t /root/finetune/work_dirs/assistTuner/*.pth | head -n 1`
export MKL_SERVICE_FORCE_INTEL=1
export MKL_THREADING_LAYER=GNU
xtuner convert pth_to_hf ./internlm2_5_chat_7b_qlora_alpaca_e3_copy.py ${pth_file} ./hf

5.模型合并

cd /root/finetune/work_dirs/assistTuner
conda activate xtuner-env

export MKL_SERVICE_FORCE_INTEL=1
export MKL_THREADING_LAYER=GNU
xtuner convert merge /root/finetune/models/internlm2_5-7b-chat ./hf ./merged --max-shard-size 2GB

四、模型 WebUI 对话

1.修改微调后的模型的路径

cd ~/Tutorial/tools/L1_XTuner_code

# 直接修改脚本文件第18行
- model_name_or_path = "Shanghai_AI_Laboratory/internlm2_5-7b-chat"
+ model_name_or_path = "/root/finetune/work_dirs/assistTuner/merged"