neuralchen · StevenCyb · Dec 22, 2022
diff --git a/docs/guidance/usage.md b/docs/guidance/usage.md
@@ -98,18 +98,19 @@ Difference between using mask and not using mask can be found [here](https://img
 
 
 ### Parameters
-|  Parameters   | Function  |
-|  :----  | :----  |
-| --name  | The SimSwap training logs name |
-| --pic_a_path  | Path of image with the target face |
-| --pic_b_path  | Path of image with the source face to swap |
-| --pic_specific_path  | Path of image with the specific face to be swapped |
-|--multisepcific_dir  |Path of image folder for multi specific face swapping|
-| --video_path  | Path of video with the source face to swap |
-| --temp_path  | Path to store intermediate files  |
-| --output_path  | Path of directory to store the face swapping result  |
-| --no_simswaplogo  |The hyper parameter to control whether to remove watermark |
-| --use_mask  |The hyper parameter to control whether to use face parsing for the better visual effects(I recommend to use)|
+| Parameters | Function |
+| :---- | :---- |
+| --name | The SimSwap training logs name |
+| --pic_a_path | Path of image with the target face |
+| --pic_b_path | Path of image with the source face to swap |
+| --pic_specific_path | Path of image with the specific face to be swapped |
+| --multisepcific_dir |Path of image folder for multi specific face swapping |
+| --video_path | Path of video with the source face to swap |
+| --temp_path | Path to store intermediate files |
+| --output_path | Path of directory to store the face swapping result  |
+| --no_simswaplogo | The hyper parameter to control whether to remove watermark |
+| --use_mask | The hyper parameter to control whether to use face parsing for the better visual effects(I recommend to use) |
+| --skip_existing_frames | Skip frame index if already exist in temp_path (will not compare if the same video) |
 
 ### Note
 We expect users to have GPU with at least 3G memory.the For those who do not, we will provide Colab Notebook implementation in the future.
diff --git a/options/test_options.py b/options/test_options.py
@@ -34,5 +34,6 @@ def initialize(self):
         self.parser.add_argument('--no_simswaplogo', action='store_true', help='Remove the watermark')
         self.parser.add_argument('--use_mask', action='store_true', help='Use mask for better result')
         self.parser.add_argument('--crop_size', type=int, default=224, help='Crop of size of input image')
+        self.parser.add_argument('--skip_existing_frames', action='store_true', help='Skip frame index if already exist in temp_path')
 
         self.isTrain = False
diff --git a/test_video_swap_multispecific.py b/test_video_swap_multispecific.py
@@ -95,5 +95,5 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
 
 
         video_swap(opt.video_path, target_id_norm_list,source_specific_id_nonorm_list, opt.id_thres, \
-            model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size)
+            model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size,skip_existing_frames=opt.skip_existing_frames)
 
diff --git a/test_video_swapmulti.py b/test_video_swapmulti.py
@@ -84,5 +84,5 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
         latend_id = F.normalize(latend_id, p=2, dim=1)
 
         video_swap(opt.video_path, latend_id, model, app, opt.output_path,temp_results_dir=opt.temp_path,\
-            no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size)
+            no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size,skip_existing_frames=opt.skip_existing_frames)
 
diff --git a/test_video_swapsingle.py b/test_video_swapsingle.py
@@ -83,5 +83,5 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
         latend_id = F.normalize(latend_id, p=2, dim=1)
 
         video_swap(opt.video_path, latend_id, model, app, opt.output_path,temp_results_dir=opt.temp_path,\
-            no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size)
+            no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size,skip_existing_frames=opt.skip_existing_frames)
 
diff --git a/test_video_swapspecific.py b/test_video_swapspecific.py
@@ -94,5 +94,5 @@ def lcm(a, b): return abs(a * b) / fractions.gcd(a, b) if a and b else 0
         specific_person_id_nonorm = model.netArc(specific_person_downsample)
 
         video_swap(opt.video_path, latend_id,specific_person_id_nonorm, opt.id_thres, \
-            model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size)
+            model, app, opt.output_path,temp_results_dir=opt.temp_path,no_simswaplogo=opt.no_simswaplogo,use_mask=opt.use_mask,crop_size=crop_size,skip_existing_frames=opt.skip_existing_frames)
 
diff --git a/util/videoswap.py b/util/videoswap.py
@@ -27,7 +27,7 @@ def _totensor(array):
     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
     return img.float().div(255)
 
-def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
+def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False, use_mask = False, skip_existing_frames = False):
     video_forcheck = VideoFileClip(video_path)
     if video_forcheck.audio is None:
         no_audio = True
@@ -51,8 +51,8 @@ def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_r
     # video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
     fps = video.get(cv2.CAP_PROP_FPS)
-    if  os.path.exists(temp_results_dir):
-            shutil.rmtree(temp_results_dir)
+    if not skip_existing_frames and os.path.exists(temp_results_dir):
+        shutil.rmtree(temp_results_dir)
 
     spNorm =SpecificNorm()
     if use_mask:
@@ -64,17 +64,22 @@ def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_r
         net.eval()
     else:
         net =None
+
+    if not os.path.exists(temp_results_dir):
+        os.mkdir(temp_results_dir)
 
     # while ret:
     for frame_index in tqdm(range(frame_count)): 
         ret, frame = video.read()
+
+        if skip_existing_frames and os.path.exists(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index))):
+            continue
+
         if  ret:
             detect_results = detect_model.get(frame,crop_size)
 
             if detect_results is not None:
                 # print(frame_index)
-                if not os.path.exists(temp_results_dir):
-                        os.mkdir(temp_results_dir)
                 frame_align_crop_list = detect_results[0]
                 frame_mat_list = detect_results[1]
                 swap_result_list = []
@@ -97,8 +102,6 @@ def video_swap(video_path, id_vetor, swap_model, detect_model, save_path, temp_r
                     os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask=use_mask, norm = spNorm)
 
             else:
-                if not os.path.exists(temp_results_dir):
-                    os.mkdir(temp_results_dir)
                 frame = frame.astype(np.uint8)
                 if not no_simswaplogo:
                     frame = logoclass.apply_frames(frame)

diff --git a/util/videoswap_multispecific.py b/util/videoswap_multispecific.py
@@ -20,7 +20,7 @@ def _totensor(array):
     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
     return img.float().div(255)
 
-def video_swap(video_path, target_id_norm_list,source_specific_id_nonorm_list,id_thres, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
+def video_swap(video_path, target_id_norm_list,source_specific_id_nonorm_list,id_thres, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False, use_mask =False, skip_existing_frames = False):
     video_forcheck = VideoFileClip(video_path)
     if video_forcheck.audio is None:
         no_audio = True
@@ -44,8 +44,8 @@ def video_swap(video_path, target_id_norm_list,source_specific_id_nonorm_list,id
     # video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
     fps = video.get(cv2.CAP_PROP_FPS)
-    if  os.path.exists(temp_results_dir):
-            shutil.rmtree(temp_results_dir)
+    if not skip_existing_frames and os.path.exists(temp_results_dir):
+        shutil.rmtree(temp_results_dir)
 
     spNorm =SpecificNorm()
     mse = torch.nn.MSELoss().cuda()
@@ -60,16 +60,21 @@ def video_swap(video_path, target_id_norm_list,source_specific_id_nonorm_list,id
     else:
         net =None
 
+    if not os.path.exists(temp_results_dir):
+        os.mkdir(temp_results_dir)
+
     # while ret:
     for frame_index in tqdm(range(frame_count)): 
         ret, frame = video.read()
+
+        if skip_existing_frames and os.path.exists(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index))):
+            continue
+
         if  ret:
             detect_results = detect_model.get(frame,crop_size)
 
             if detect_results is not None:
                 # print(frame_index)
-                if not os.path.exists(temp_results_dir):
-                        os.mkdir(temp_results_dir)
                 frame_align_crop_list = detect_results[0]
                 frame_mat_list = detect_results[1]
 
@@ -113,16 +118,12 @@ def video_swap(video_path, target_id_norm_list,source_specific_id_nonorm_list,id
                     reverse2wholeimage(swap_result_ori_pic_list,swap_result_list, swap_result_matrix_list, crop_size, frame, logoclass,\
                         os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask=use_mask, norm = spNorm)
                 else:
-                    if not os.path.exists(temp_results_dir):
-                        os.mkdir(temp_results_dir)
                     frame = frame.astype(np.uint8)
                     if not no_simswaplogo:
                         frame = logoclass.apply_frames(frame)
                     cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
 
             else:
-                if not os.path.exists(temp_results_dir):
-                    os.mkdir(temp_results_dir)
                 frame = frame.astype(np.uint8)
                 if not no_simswaplogo:
                     frame = logoclass.apply_frames(frame)

diff --git a/util/videoswap_specific.py b/util/videoswap_specific.py
@@ -20,7 +20,7 @@ def _totensor(array):
     img = tensor.transpose(0, 1).transpose(0, 2).contiguous()
     return img.float().div(255)
 
-def video_swap(video_path, id_vetor,specific_person_id_nonorm,id_thres, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False,use_mask =False):
+def video_swap(video_path, id_vetor,specific_person_id_nonorm,id_thres, swap_model, detect_model, save_path, temp_results_dir='./temp_results', crop_size=224, no_simswaplogo = False, use_mask =False, skip_existing_frames = False):
     video_forcheck = VideoFileClip(video_path)
     if video_forcheck.audio is None:
         no_audio = True
@@ -44,8 +44,8 @@ def video_swap(video_path, id_vetor,specific_person_id_nonorm,id_thres, swap_mod
     # video_HEIGHT = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
     fps = video.get(cv2.CAP_PROP_FPS)
-    if  os.path.exists(temp_results_dir):
-            shutil.rmtree(temp_results_dir)
+    if not skip_existing_frames and os.path.exists(temp_results_dir):
+        shutil.rmtree(temp_results_dir)
 
     spNorm =SpecificNorm()
     mse = torch.nn.MSELoss().cuda()
@@ -60,16 +60,21 @@ def video_swap(video_path, id_vetor,specific_person_id_nonorm,id_thres, swap_mod
     else:
         net =None
 
+    if not os.path.exists(temp_results_dir):
+        os.mkdir(temp_results_dir)
+
     # while ret:
     for frame_index in tqdm(range(frame_count)): 
         ret, frame = video.read()
+
+        if skip_existing_frames and os.path.exists(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index))):
+            continue
+
         if  ret:
             detect_results = detect_model.get(frame,crop_size)
 
             if detect_results is not None:
                 # print(frame_index)
-                if not os.path.exists(temp_results_dir):
-                        os.mkdir(temp_results_dir)
                 frame_align_crop_list = detect_results[0]
                 frame_mat_list = detect_results[1]
 
@@ -97,16 +102,12 @@ def video_swap(video_path, id_vetor,specific_person_id_nonorm,id_thres, swap_mod
                     reverse2wholeimage([frame_align_crop_tenor_list[min_index]], [swap_result], [frame_mat_list[min_index]], crop_size, frame, logoclass,\
                         os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)),no_simswaplogo,pasring_model =net,use_mask= use_mask, norm = spNorm)
                 else:
-                    if not os.path.exists(temp_results_dir):
-                        os.mkdir(temp_results_dir)
                     frame = frame.astype(np.uint8)
                     if not no_simswaplogo:
                         frame = logoclass.apply_frames(frame)
                     cv2.imwrite(os.path.join(temp_results_dir, 'frame_{:0>7d}.jpg'.format(frame_index)), frame)
 
             else:
-                if not os.path.exists(temp_results_dir):
-                    os.mkdir(temp_results_dir)
                 frame = frame.astype(np.uint8)
                 if not no_simswaplogo:
                     frame = logoclass.apply_frames(frame)