From bfe0450c95cc0419331b1176ce06bfa6d75e5079 Mon Sep 17 00:00:00 2001 From: TimothyHTimothy <2711123841@qq.com> Date: Mon, 19 Dec 2022 17:10:29 +0800 Subject: [PATCH] fix issue #2 --- default_infer.py | 132 ++++++++++++++++++++++-------------- evaluate_a_set_of_videos.py | 89 ++++++++++++------------ evaluate_one_video.py | 21 +++--- 3 files changed, 140 insertions(+), 102 deletions(-) diff --git a/default_infer.py b/default_infer.py index 4ded7eb..a81bfbc 100644 --- a/default_infer.py +++ b/default_infer.py @@ -1,37 +1,37 @@ import torch -import cv2 -import random -import os.path as osp -import dover.models as models -import dover.datasets as datasets import argparse +import math +import os.path as osp +import pickle +import random +from time import time -from scipy.stats import spearmanr, pearsonr -from scipy.stats.stats import kendalltau as kendallr +import cv2 import numpy as np - -from time import time +import torch +import yaml +from scipy.stats import kendalltau as kendallr +from scipy.stats import pearsonr, spearmanr +from thop import profile from tqdm import tqdm -import pickle -import math +import dover.datasets as datasets +import dover.models as models import wandb -import yaml - -from thop import profile def rescale(pr, gt=None): if gt is None: - print(np.mean(pr), np.std(pr)) + print("mean", np.mean(pr), "std", np.std(pr)) pr = (pr - np.mean(pr)) / np.std(pr) else: print(np.mean(pr), np.std(pr), np.std(gt), np.mean(gt)) pr = ((pr - np.mean(pr)) / np.std(pr)) * np.std(gt) + np.mean(gt) return pr -sample_types=["aesthetic", "technical"] + +sample_types = ["aesthetic", "technical"] def profile_inference(inf_set, model, device): @@ -41,17 +41,33 @@ def profile_inference(inf_set, model, device): if key in data: video[key] = data[key].to(device) c, t, h, w = video[key].shape - video[key] = video[key].reshape(1, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape( data["num_clips"][key], c, t // data["num_clips"][key], h, w) + video[key] = ( + video[key] + .reshape( + 1, c, data["num_clips"][key], t // data["num_clips"][key], h, w + ) + .permute(0, 2, 1, 3, 4, 5) + .reshape(data["num_clips"][key], c, t // data["num_clips"][key], h, w) + ) with torch.no_grad(): - flops, params = profile(model, (video, )) - print(f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M.") + flops, params = profile(model, (video,)) + print( + f"The FLOps of the Variant is {flops/1e9:.1f}G, with Params {params/1e6:.2f}M." + ) + -def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s', set_name="na"): +def inference_set( + inf_loader, model, device, best_, save_model=False, suffix="s", set_name="na" +): print(f"Validating for {set_name}.") results = [] + try: + model = torch.compile(model) + except: + print("You may try to accelerate your model with torch 2.0") best_s, best_p, best_k, best_r = best_ - + keys = [] for i, data in enumerate(tqdm(inf_loader, desc="Validating")): @@ -63,9 +79,18 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s' if key in data: video[key] = data[key].to(device) b, c, t, h, w = video[key].shape - video[key] = video[key].reshape(b, c, data["num_clips"][key], t // data["num_clips"][key], h, w).permute(0,2,1,3,4,5).reshape(b * data["num_clips"][key], c, t // data["num_clips"][key], h, w) + video[key] = ( + video[key] + .reshape( + b, c, data["num_clips"][key], t // data["num_clips"][key], h, w + ) + .permute(0, 2, 1, 3, 4, 5) + .reshape( + b * data["num_clips"][key], c, t // data["num_clips"][key], h, w + ) + ) with torch.no_grad(): - labels = model(video,reduce_scores=False) + labels = model(video, reduce_scores=False) labels = [np.mean(l.cpu().numpy()) for l in labels] result["pr_labels"] = labels result["gt_label"] = data["gt_label"].item() @@ -74,7 +99,6 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s' # del data results.append(result) - ## generate the demo video for video quality localization gt_labels = [r["gt_label"] for r in results] pr_labels = 0 @@ -84,22 +108,28 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s' key_pr_labels = rescale([np.mean(r["pr_labels"][i]) for r in results]) pr_labels += key_pr_labels * w pr_dict[key] = key_pr_labels - - #with open(f"dover_predictions/{set_name}.pkl", "wb") as f: + + # with open(f"dover_predictions/{set_name}.pkl", "wb") as f: # pickle.dump(pr_dict, f) - + print(pr_labels) pr_labels = rescale(pr_labels, gt_labels) s = spearmanr(gt_labels, pr_labels)[0] p = pearsonr(gt_labels, pr_labels)[0] k = kendallr(gt_labels, pr_labels)[0] r = np.sqrt(((gt_labels - pr_labels) ** 2).mean()) - - + results = sorted(results, key=lambda x: x["pr_labels"]) try: - wandb.log({f"val/SRCC-{suffix}": s, f"val/PLCC-{suffix}": p, f"val/KRCC-{suffix}": k, f"val/RMSE-{suffix}": r}) + wandb.log( + { + f"val/SRCC-{suffix}": s, + f"val/PLCC-{suffix}": p, + f"val/KRCC-{suffix}": k, + f"val/RMSE-{suffix}": r, + } + ) except: pass @@ -127,6 +157,7 @@ def inference_set(inf_loader, model, device, best_, save_model=False, suffix='s' return best_s, best_p, best_k, best_r, pr_labels + def main(): parser = argparse.ArgumentParser() @@ -138,44 +169,45 @@ def main(): with open(args.opt, "r") as f: opt = yaml.safe_load(f) print(opt) - - - ## adaptively choose the device device = "cuda" if torch.cuda.is_available() else "cpu" - #device = "cpu" + # device = "cpu" ## defining model and loading checkpoint bests_ = [] - + model = getattr(models, opt["model"]["type"])(**opt["model"]["args"]).to(device) - state_dict = torch.load(opt["test_load_path"], map_location=device)#["state_dict"] - + state_dict = torch.load( + opt["test_load_path"], map_location=device + ) # ["state_dict"] + model.load_state_dict(state_dict, strict=True) - + for key in opt["data"].keys(): - + if "val" not in key and "test" not in key: continue - + run = wandb.init( project=opt["wandb"]["project_name"], - name=opt["name"]+"_Test_"+key, + name=opt["name"] + "_Test_" + key, reinit=True, ) - - val_dataset = getattr(datasets, opt["data"][key]["type"])(opt["data"][key]["args"]) - - val_loader = torch.utils.data.DataLoader( - val_dataset, batch_size=1, num_workers=opt["num_workers"], pin_memory=True, + val_dataset = getattr(datasets, opt["data"][key]["type"])( + opt["data"][key]["args"] ) - + val_loader = torch.utils.data.DataLoader( + val_dataset, + batch_size=1, + num_workers=opt["num_workers"], + pin_memory=True, + ) profile_inference(val_dataset, model, device) @@ -184,11 +216,11 @@ def main(): best_ = -1, -1, -1, 1000 - best_ = inference_set( val_loader, model, - device, best_, + device, + best_, set_name=key, ) @@ -199,11 +231,9 @@ def main(): KROCC: {best_[2]:.4f} RMSE: {best_[3]:.4f}.""" ) - run.finish() - if __name__ == "__main__": main() diff --git a/evaluate_a_set_of_videos.py b/evaluate_a_set_of_videos.py index 46ab00d..871b511 100644 --- a/evaluate_a_set_of_videos.py +++ b/evaluate_a_set_of_videos.py @@ -1,17 +1,19 @@ +import torch + import argparse import os import pickle as pkl import decord import numpy as np -import torch import yaml from tqdm import tqdm -from dover.datasets import (UnifiedFrameSampler, - spatial_temporal_view_decomposition, - ViewDecompositionDataset, - ) +from dover.datasets import ( + UnifiedFrameSampler, + ViewDecompositionDataset, + spatial_temporal_view_decomposition, +) from dover.models import DOVER mean, std = ( @@ -23,12 +25,11 @@ def fuse_results(results: list): a, t = (results[0] - 0.1107) / 0.07355, (results[1] + 0.08285) / 0.03774 x = a * 0.6104 + t * 0.3896 - return {"aesthetic": 1/(1 + np.exp(-a)), - "technical": 1/(1 + np.exp(-t)), - "overall": 1/(1 + np.exp(-x)) - } - - + return { + "aesthetic": 1 / (1 + np.exp(-a)), + "technical": 1 / (1 + np.exp(-t)), + "overall": 1 / (1 + np.exp(-x)), + } if __name__ == "__main__": @@ -41,11 +42,19 @@ def fuse_results(results: list): ## can be your own parser.add_argument( - "-in", "--input_video_dir", type=str, default="./demo", help="the input video dir" + "-in", + "--input_video_dir", + type=str, + default="./demo", + help="the input video dir", ) - + parser.add_argument( - "-out", "--output_result_csv", type=str, default="./dover_predictions/demo.csv", help="the input video dir" + "-out", + "--output_result_csv", + type=str, + default="./dover_predictions/demo.csv", + help="the input video dir", ) parser.add_argument( @@ -66,35 +75,33 @@ def fuse_results(results: list): video_paths = [] all_results = {} - with open( - args.output_result_csv, "w" - ) as w: - w.write(f'path, aesthetic score, technical score, overall/final score\n') + with open(args.output_result_csv, "w") as w: + w.write(f"path, aesthetic score, technical score, overall/final score\n") dopt = opt["data"]["val-l1080p"]["args"] - + dopt["anno_file"] = None dopt["data_prefix"] = args.input_video_dir - + dataset = ViewDecompositionDataset(dopt) - - + dataloader = torch.utils.data.DataLoader( - dataset, - batch_size=1, - num_workers=opt["num_workers"], - pin_memory=True, + dataset, + batch_size=1, + num_workers=opt["num_workers"], + pin_memory=True, ) - + try: with open( - f"dover_predictions/val-custom_{args.input_video_dir.split('/')[-1]}.pkl", "rb" - ) as rf: + f"dover_predictions/val-custom_{args.input_video_dir.split('/')[-1]}.pkl", + "rb", + ) as rf: all_results = pkl.dump(all_results, rf) print(f"Starting from {len(all_results)}.") except: print("Starting over.") - + sample_types = ["aesthetic", "technical"] for i, data in enumerate(tqdm(dataloader, desc="Testing")): @@ -117,22 +124,20 @@ def fuse_results(results: list): b * data["num_clips"][key], c, t // data["num_clips"][key], h, w ) ) - + with torch.no_grad(): results = evaluator(video, reduce_scores=False) results = [np.mean(l.cpu().numpy()) for l in results] rescaled_results = fuse_results(results) - #all_results[data["name"][0]] = rescaled_results + # all_results[data["name"][0]] = rescaled_results - #with open( + # with open( # f"dover_predictions/val-custom_{args.input_video_dir.split('/')[-1]}.pkl", "wb" - #) as wf: - #pkl.dump(all_results, wf) - - - with open( - args.output_result_csv, "a" - ) as w: - w.write(f'{data["name"][0]}, {rescaled_results["aesthetic"]*100:4f}, {rescaled_results["technical"]*100:4f},{rescaled_results["overall"]*100:4f}\n') - + # ) as wf: + # pkl.dump(all_results, wf) + + with open(args.output_result_csv, "a") as w: + w.write( + f'{data["name"][0]}, {rescaled_results["aesthetic"]*100:4f}, {rescaled_results["technical"]*100:4f},{rescaled_results["overall"]*100:4f}\n' + ) diff --git a/evaluate_one_video.py b/evaluate_one_video.py index 8e50fa2..f1441b2 100644 --- a/evaluate_one_video.py +++ b/evaluate_one_video.py @@ -1,23 +1,26 @@ +import torch + import argparse import pickle as pkl import decord import numpy as np -import torch import yaml -from dover.datasets import (UnifiedFrameSampler, - spatial_temporal_view_decomposition) +from dover.datasets import UnifiedFrameSampler, spatial_temporal_view_decomposition from dover.models import DOVER mean, std = torch.FloatTensor([123.675, 116.28, 103.53]), torch.FloatTensor( [58.395, 57.12, 57.375] ) + def fuse_results(results: list): - x = (results[0] - 0.1107) / 0.07355 * 0.6104 + (results[1] + 0.08285) / 0.03774 * 0.3896 + x = (results[0] - 0.1107) / 0.07355 * 0.6104 + ( + results[1] + 0.08285 + ) / 0.03774 * 0.3896 print(x) - return 1/(1 + np.exp(-x)) + return 1 / (1 + np.exp(-x)) def gaussian_rescale(pr): @@ -81,13 +84,13 @@ def rescale_results(results: list, vname="undefined"): ) parser.add_argument( - "-f", "--fusion", action="store_true", + "-f", + "--fusion", + action="store_true", ) args = parser.parse_args() - video_reader = decord.VideoReader(args.video_path) - with open(args.opt, "r") as f: opt = yaml.safe_load(f) @@ -136,7 +139,7 @@ def rescale_results(results: list, vname="undefined"): if args.fusion: # predict fused overall score, with default score-level fusion parameters print("Normalized fused overall score (scale in [0,1]):", fuse_results(results)) - + else: # predict disentangled scores rescale_results(results, vname=args.video_path)