From bfe0450c95cc0419331b1176ce06bfa6d75e5079 Mon Sep 17 00:00:00 2001
From: TimothyHTimothy <2711123841@qq.com>
Date: Mon, 19 Dec 2022 17:10:29 +0800
Subject: [PATCH] fix issue #2

---
 default_infer.py            | 132 ++++++++++++++++++++++--------------
 evaluate_a_set_of_videos.py |  89 ++++++++++++------------
 evaluate_one_video.py       |  21 +++---
 3 files changed, 140 insertions(+), 102 deletions(-)

diff --git a/default_infer.py b/default_infer.py
index 4ded7eb..a81bfbc 100644
--- a/default_infer.py
+++ b/default_infer.py
@@ -1,37 +1,37 @@
 import torch
-import cv2
-import random
-import os.path as osp
-import dover.models as models
-import dover.datasets as datasets
 
 import argparse
+import math
+import os.path as osp
+import pickle
+import random
+from time import time
 
-from scipy.stats import spearmanr, pearsonr
-from scipy.stats.stats import kendalltau as kendallr
+import cv2
 import numpy as np
-
-from time import time
+import torch
+import yaml
+from scipy.stats import kendalltau as kendallr
+from scipy.stats import pearsonr, spearmanr
+from thop import profile
 from tqdm import tqdm
-import pickle
-import math
 
+import dover.datasets as datasets
+import dover.models as models
 import wandb
-import yaml
-
-from thop import profile
 
 
 def rescale(pr, gt=None):
     if gt is None:
-        print(np.mean(pr), np.std(pr))
+        print("mean", np.mean(pr), "std", np.std(pr))
         pr = (pr - np.mean(pr)) / np.std(pr)
     else:
         print(np.mean(pr), np.std(pr), np.std(gt), np.mean(gt))
         pr = ((pr - np.mean(pr)) / np.std(pr)) * np.std(gt) + np.mean(gt)
     return pr
 
-sample_types=["aesthetic", "technical"]
+
+sample_types = ["aesthetic", "technical"]
 
 
 def profile_inference(inf_set, model, device):
@@ -41,17 +41,33 @@ def profile_inference(inf_set, model, device):
         if key in data:
             video[key] = data[key].to(device)
             c, t, h, w = video[key].shape
-            video[key] = video[key].reshape(1, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape( data["num_clips"][key], c, t // data["num_clips"][key], h, w) 
+            video[key] = (
+                video[key]
+                .reshape(
+                    1, c, data["num_clips"][key], t // data["num_clips"][key], h, w
+                )
+                .permute(0, 2, 1, 3, 4, 5)
+                .reshape(data["num_clips"][key], c, t // data["num_clips"][key], h, w)
+            )
     with torch.no_grad():
-        flops, params = profile(model, (video, ))
-    print(f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M.")
+        flops, params = profile(model, (video,))
+    print(
+        f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M."
+    )
+
 
-def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s', set_name="na"):
+def inference_set(
+    inf_loader, model, device, best_, save_model=False, suffix="s", set_name="na"
+):
     print(f"Validating for {set_name}.")
     results = []
+    try:
+        model = torch.compile(model)
+    except:
+        print("You may try to accelerate your model with torch 2.0")
 
     best_s, best_p, best_k, best_r = best_
-    
+
     keys = []
 
     for i, data in enumerate(tqdm(inf_loader, desc="Validating")):
@@ -63,9 +79,18 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s'
             if key in data:
                 video[key] = data[key].to(device)
                 b, c, t, h, w = video[key].shape
-                video[key] = video[key].reshape(b, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape(b * data["num_clips"][key], c, t // data["num_clips"][key], h, w) 
+                video[key] = (
+                    video[key]
+                    .reshape(
+                        b, c, data["num_clips"][key], t // data["num_clips"][key], h, w
+                    )
+                    .permute(0, 2, 1, 3, 4, 5)
+                    .reshape(
+                        b * data["num_clips"][key], c, t // data["num_clips"][key], h, w
+                    )
+                )
         with torch.no_grad():
-            labels = model(video,reduce_scores=False)
+            labels = model(video, reduce_scores=False)
             labels = [np.mean(l.cpu().numpy()) for l in labels]
             result["pr_labels"] = labels
         result["gt_label"] = data["gt_label"].item()
@@ -74,7 +99,6 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s'
         # del data
         results.append(result)
 
-    
     ## generate the demo video for video quality localization
     gt_labels = [r["gt_label"] for r in results]
     pr_labels = 0
@@ -84,22 +108,28 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s'
         key_pr_labels = rescale([np.mean(r["pr_labels"][i]) for r in results])
         pr_labels += key_pr_labels * w
         pr_dict[key] = key_pr_labels
-       
-    #with open(f"dover_predictions/{set_name}.pkl", "wb") as f:
+
+    # with open(f"dover_predictions/{set_name}.pkl", "wb") as f:
     #    pickle.dump(pr_dict, f)
-        
+    print(pr_labels)
     pr_labels = rescale(pr_labels, gt_labels)
 
     s = spearmanr(gt_labels, pr_labels)[0]
     p = pearsonr(gt_labels, pr_labels)[0]
     k = kendallr(gt_labels, pr_labels)[0]
     r = np.sqrt(((gt_labels - pr_labels) ** 2).mean())
-    
-    
+
     results = sorted(results, key=lambda x: x["pr_labels"])
 
     try:
-        wandb.log({f"val/SRCC-{suffix}": s, f"val/PLCC-{suffix}": p, f"val/KRCC-{suffix}": k, f"val/RMSE-{suffix}": r})
+        wandb.log(
+            {
+                f"val/SRCC-{suffix}": s,
+                f"val/PLCC-{suffix}": p,
+                f"val/KRCC-{suffix}": k,
+                f"val/RMSE-{suffix}": r,
+            }
+        )
     except:
         pass
 
@@ -127,6 +157,7 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s'
 
     return best_s, best_p, best_k, best_r, pr_labels
 
+
 def main():
 
     parser = argparse.ArgumentParser()
@@ -138,44 +169,45 @@ def main():
     with open(args.opt, "r") as f:
         opt = yaml.safe_load(f)
     print(opt)
-    
-    
-    
 
     ## adaptively choose the device
 
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    #device = "cpu"
+    # device = "cpu"
 
     ## defining model and loading checkpoint
 
     bests_ = []
-    
+
     model = getattr(models, opt["model"]["type"])(**opt["model"]["args"]).to(device)
 
-    state_dict = torch.load(opt["test_load_path"], map_location=device)#["state_dict"]
-    
+    state_dict = torch.load(
+        opt["test_load_path"], map_location=device
+    )  # ["state_dict"]
+
     model.load_state_dict(state_dict, strict=True)
-    
+
     for key in opt["data"].keys():
-        
+
         if "val" not in key and "test" not in key:
             continue
-        
+
         run = wandb.init(
             project=opt["wandb"]["project_name"],
-            name=opt["name"]+"_Test_"+key,
+            name=opt["name"] + "_Test_" + key,
             reinit=True,
         )
-        
-        val_dataset = getattr(datasets, opt["data"][key]["type"])(opt["data"][key]["args"])
 
-
-        val_loader =  torch.utils.data.DataLoader(
-            val_dataset, batch_size=1, num_workers=opt["num_workers"], pin_memory=True,
+        val_dataset = getattr(datasets, opt["data"][key]["type"])(
+            opt["data"][key]["args"]
         )
 
-
+        val_loader = torch.utils.data.DataLoader(
+            val_dataset,
+            batch_size=1,
+            num_workers=opt["num_workers"],
+            pin_memory=True,
+        )
 
         profile_inference(val_dataset, model, device)
 
@@ -184,11 +216,11 @@ def main():
 
         best_ = -1, -1, -1, 1000
 
-
         best_ = inference_set(
             val_loader,
             model,
-            device, best_,
+            device,
+            best_,
             set_name=key,
         )
 
@@ -199,11 +231,9 @@ def main():
             KROCC: {best_[2]:.4f}
             RMSE:  {best_[3]:.4f}."""
         )
-        
 
         run.finish()
 
 
-
 if __name__ == "__main__":
     main()
diff --git a/evaluate_a_set_of_videos.py b/evaluate_a_set_of_videos.py
index 46ab00d..871b511 100644
--- a/evaluate_a_set_of_videos.py
+++ b/evaluate_a_set_of_videos.py
@@ -1,17 +1,19 @@
+import torch
+
 import argparse
 import os
 import pickle as pkl
 
 import decord
 import numpy as np
-import torch
 import yaml
 from tqdm import tqdm
 
-from dover.datasets import (UnifiedFrameSampler,
-                            spatial_temporal_view_decomposition,
-                            ViewDecompositionDataset,
-                           )
+from dover.datasets import (
+    UnifiedFrameSampler,
+    ViewDecompositionDataset,
+    spatial_temporal_view_decomposition,
+)
 from dover.models import DOVER
 
 mean, std = (
@@ -23,12 +25,11 @@
 def fuse_results(results: list):
     a, t = (results[0] - 0.1107) / 0.07355, (results[1] + 0.08285) / 0.03774
     x = a * 0.6104 + t * 0.3896
-    return {"aesthetic": 1/(1 + np.exp(-a)), 
-            "technical": 1/(1 + np.exp(-t)), 
-            "overall": 1/(1 + np.exp(-x))
-           }
-
-
+    return {
+        "aesthetic": 1 / (1 + np.exp(-a)),
+        "technical": 1 / (1 + np.exp(-t)),
+        "overall": 1 / (1 + np.exp(-x)),
+    }
 
 
 if __name__ == "__main__":
@@ -41,11 +42,19 @@ def fuse_results(results: list):
 
     ## can be your own
     parser.add_argument(
-        "-in", "--input_video_dir", type=str, default="./demo", help="the input video dir"
+        "-in",
+        "--input_video_dir",
+        type=str,
+        default="./demo",
+        help="the input video dir",
     )
-    
+
     parser.add_argument(
-        "-out", "--output_result_csv", type=str, default="./dover_predictions/demo.csv", help="the input video dir"
+        "-out",
+        "--output_result_csv",
+        type=str,
+        default="./dover_predictions/demo.csv",
+        help="the input video dir",
     )
 
     parser.add_argument(
@@ -66,35 +75,33 @@ def fuse_results(results: list):
     video_paths = []
     all_results = {}
 
-    with open(
-        args.output_result_csv, "w"
-    ) as w:
-        w.write(f'path, aesthetic score, technical score, overall/final score\n')
+    with open(args.output_result_csv, "w") as w:
+        w.write(f"path, aesthetic score, technical score, overall/final score\n")
 
     dopt = opt["data"]["val-l1080p"]["args"]
-    
+
     dopt["anno_file"] = None
     dopt["data_prefix"] = args.input_video_dir
-    
+
     dataset = ViewDecompositionDataset(dopt)
-    
-    
+
     dataloader = torch.utils.data.DataLoader(
-            dataset,
-            batch_size=1,
-            num_workers=opt["num_workers"],
-            pin_memory=True,
+        dataset,
+        batch_size=1,
+        num_workers=opt["num_workers"],
+        pin_memory=True,
     )
-            
+
     try:
         with open(
-                f"dover_predictions/val-custom_{args.input_video_dir.split('/')[-1]}.pkl", "rb"
-            ) as rf:
+            f"dover_predictions/val-custom_{args.input_video_dir.split('/')[-1]}.pkl",
+            "rb",
+        ) as rf:
             all_results = pkl.dump(all_results, rf)
         print(f"Starting from {len(all_results)}.")
     except:
         print("Starting over.")
-        
+
     sample_types = ["aesthetic", "technical"]
 
     for i, data in enumerate(tqdm(dataloader, desc="Testing")):
@@ -117,22 +124,20 @@ def fuse_results(results: list):
                         b * data["num_clips"][key], c, t // data["num_clips"][key], h, w
                     )
                 )
-        
+
         with torch.no_grad():
             results = evaluator(video, reduce_scores=False)
             results = [np.mean(l.cpu().numpy()) for l in results]
 
         rescaled_results = fuse_results(results)
-        #all_results[data["name"][0]] = rescaled_results
+        # all_results[data["name"][0]] = rescaled_results
 
-        #with open(
+        # with open(
         #    f"dover_predictions/val-custom_{args.input_video_dir.split('/')[-1]}.pkl", "wb"
-        #) as wf:
-            #pkl.dump(all_results, wf)
-            
-       
-        with open(
-            args.output_result_csv, "a"
-        ) as w:
-            w.write(f'{data["name"][0]}, {rescaled_results["aesthetic"]*100:4f}, {rescaled_results["technical"]*100:4f},{rescaled_results["overall"]*100:4f}\n')
-        
+        # ) as wf:
+        # pkl.dump(all_results, wf)
+
+        with open(args.output_result_csv, "a") as w:
+            w.write(
+                f'{data["name"][0]}, {rescaled_results["aesthetic"]*100:4f}, {rescaled_results["technical"]*100:4f},{rescaled_results["overall"]*100:4f}\n'
+            )
diff --git a/evaluate_one_video.py b/evaluate_one_video.py
index 8e50fa2..f1441b2 100644
--- a/evaluate_one_video.py
+++ b/evaluate_one_video.py
@@ -1,23 +1,26 @@
+import torch
+
 import argparse
 import pickle as pkl
 
 import decord
 import numpy as np
-import torch
 import yaml
 
-from dover.datasets import (UnifiedFrameSampler,
-                            spatial_temporal_view_decomposition)
+from dover.datasets import UnifiedFrameSampler, spatial_temporal_view_decomposition
 from dover.models import DOVER
 
 mean, std = torch.FloatTensor([123.675, 116.28, 103.53]), torch.FloatTensor(
     [58.395, 57.12, 57.375]
 )
 
+
 def fuse_results(results: list):
-    x = (results[0] - 0.1107) / 0.07355 * 0.6104 + (results[1] + 0.08285) / 0.03774 * 0.3896
+    x = (results[0] - 0.1107) / 0.07355 * 0.6104 + (
+        results[1] + 0.08285
+    ) / 0.03774 * 0.3896
     print(x)
-    return 1/(1 + np.exp(-x))
+    return 1 / (1 + np.exp(-x))
 
 
 def gaussian_rescale(pr):
@@ -81,13 +84,13 @@ def rescale_results(results: list, vname="undefined"):
     )
 
     parser.add_argument(
-        "-f", "--fusion", action="store_true",
+        "-f",
+        "--fusion",
+        action="store_true",
     )
 
     args = parser.parse_args()
 
-    video_reader = decord.VideoReader(args.video_path)
-
     with open(args.opt, "r") as f:
         opt = yaml.safe_load(f)
 
@@ -136,7 +139,7 @@ def rescale_results(results: list, vname="undefined"):
     if args.fusion:
         # predict fused overall score, with default score-level fusion parameters
         print("Normalized fused overall score (scale in [0,1]):", fuse_results(results))
-        
+
     else:
         # predict disentangled scores
         rescale_results(results, vname=args.video_path)