ER-NeRF 用于高保真谈话人像合成的高效区域感知神经辐射场
Install dependency
安装依赖
git clone https://github.com/Fictionarry/ER-NeRF.git
git clone https://github.com/YudongGuo/AD-NeRF.git
git clone https://github.com/facebookresearch/pytorch3d.git
cd ER-NeRF
python3 -m venv .venv
source .venv/bin/activate
sudo apt install nvidia-cuda-toolkit
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# cuda12
# pip install torch torchvision torchaudio
sudo apt install gcc g++ portaudio19-dev
pip install -r requirements.txt
pip install ../pytorch3d
pip install tensorflow
sed -i 's/-std=c++14/-std=c++17/g' freqencoder/setup.py shencoder/setup.py gridencoder/setup.py raymarching/setup.py
pip install ./freqencoder \
&& pip install ./shencoder \
&& pip install ./gridencoder \
&& pip install ./raymarching
Preparation
# Prepare face-parsing model.
cp ../AD-NeRF/data_util/face_parsing/79999_iter.pth data_utils/face_parsing/
# Prepare the 3DMM model for head pose estimation.
mkdir -p data_utils/face_tracking/3DMM
cp ../AD-NeRF/data_util/face_tracking/3DMM/exp_info.npy data_utils/face_tracking/3DMM/
cp ../AD-NeRF/data_util/face_tracking/3DMM/keys_info.npy data_utils/face_tracking/3DMM/
cp ../AD-NeRF/data_util/face_tracking/3DMM/sub_mesh.obj data_utils/face_tracking/3DMM/
cp ../AD-NeRF/data_util/face_tracking/3DMM/topology_info.npy data_utils/face_tracking/3DMM/
# Download 3DMM model from Basel Face Model 2009:
cp ../BaseFaceModel/01_MorphableModel.mat data_utils/face_tracking/3DMM/
cd data_utils/face_tracking
python convert_BFM.py
cd ../../
mkdir -p data/ob
cp ../AD-NeRF/dataset/vids/Ob.mp4 data/ob/
Datasets and pretrained models
data process 数据处理
# 处理全部
python data_utils/process.py data/ob/Ob.mp4
# 分步处理
python data_utils/process.py data/ob/Ob.mp4 --task 1
path
path to video file--task
-1 means all-
--asr
wav2vec or deepspeechwav_path = os.path.join(base_dir, ‘aud.wav’) ori_imgs_dir = os.path.join(base_dir, ‘ori_imgs’) parsing_dir = os.path.join(base_dir, ‘parsing’) gt_imgs_dir = os.path.join(base_dir, ‘gt_imgs’) torso_imgs_dir = os.path.join(base_dir, ‘torso_imgs’)
1. extract audio 分离音频
extract_audio(opt.path, wav_path)
ffmpeg -i {path} -f wav -ar {sample_rate} {out_path}
output:
- aud.wav
2. extract audio features 生成 aud_eo.npy
extract_audio_features(wav_path, mode=opt.asr)
if wav2wec
python nerf/asr.py --wav {path} --save_feats
if deepspeech
python data_utils/deepspeech_features/extract_ds_features.py --input {path}
output:
- aud.npy
3 extract images 把视频拆分成图像
extract_images(opt.path, ori_imgs_dir)
ffmpeg -i {path} -vf fps={fps} -qmin 1 -q:v 1 -start_number 0 {os.path.join(out_path, "%d.jpg")}
output:
- ori_images
4 face parsing 分割人像
extract_semantics(ori_imgs_dir, parsing_dir)
python data_utils/face_parsing/test.py --respath={parsing_dir} --imgpath={ori_imgs_dir}'
output:
- parsing
5 extract bg 提取背图像
extract_background(base_dir, ori_imgs_dir)
input:
- ori_images
output:
- bc.jpg
6 extract torso images and gt_images 分割出身体部分
extract_torso_and_gt(base_dir, ori_imgs_dir)
input:
- bg.jpg
- ori_images
output:
- gt_imgs
- torso_imgs
7 extract face landmarks 获取人脸 landmarks lms文件
extract_landmarks(ori_imgs_dir)
input:
- bg.jpg
- ori_images
- parsing
output:
- gt_imgs
- torso_imgs
8 face tracking 获取人脸跟踪数据,这步要训练一个追踪模型,会很慢
face_tracking(ori_imgs_dir)
python data_utils/face_tracking/face_tracker.py --path={ori_imgs_dir} --img_h={h} --img_w={w} --frame_num={len(image_paths)}
output:
- track_params.pt
9 save transforms.json 保存所有数据
save_transforms(base_dir, ori_imgs_dir):
input:
- /ori_imgs_dir/*.jpg
- track_params.pt
output:
- transforms_train.json
- transforms_val.json
Train
cd data/ob
mv aud.npy aud_ds.npy
cd ../../
python main.py data/ob/ --workspace trial_ob/ -O --iters 100000
python 命令后台执行
nohup python -u data_utils/process.py data/ob/Ob.mp4 --task 8 > nohup.log 2>&1 &