From 8e14944f3cc9f178b1c5626482fad509068f1395 Mon Sep 17 00:00:00 2001 From: Xiaotian Han Date: Wed, 28 Jul 2021 13:37:17 -0700 Subject: [PATCH] update citation & fix broken url --- README.md | 13 +++++++------ SCENE_GRAPH_MODEL_ZOO.md | 14 +++++++------- tools/train_sg_net.py | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 6227a55..f345d88 100644 --- a/README.md +++ b/README.md @@ -215,12 +215,13 @@ free to open a new issue. ## Citations Please consider citing this project in your publications if it helps your research. The following is a BibTeX reference. The BibTeX entry requires the `url` LaTeX package. ``` -@misc{han2021sgbenchmark, -author = {Xiaotian Han and Jianwei Yang and Houdong Hu and Lei Zhang and Pengchuan Zhang}, -title = {{Scene Graph Benchmark}}, -year = {2021}, -howpublished = {\url{https://github.com/microsoft/scene_graph_benchmark}}, -note = {Accessed: [Insert date here]} +@misc{han2021image, + title={Image Scene Graph Generation (SGG) Benchmark}, + author={Xiaotian Han and Jianwei Yang and Houdong Hu and Lei Zhang and Jianfeng Gao and Pengchuan Zhang}, + year={2021}, + eprint={2107.12604}, + archivePrefix={arXiv}, + primaryClass={cs.CV} } ``` diff --git a/SCENE_GRAPH_MODEL_ZOO.md b/SCENE_GRAPH_MODEL_ZOO.md index e68359e..b46a470 100644 --- a/SCENE_GRAPH_MODEL_ZOO.md +++ b/SCENE_GRAPH_MODEL_ZOO.md @@ -10,13 +10,13 @@ All the following models are inferenced using unconstraint method, the detection model | recall@50 | wmAP(Triplet) | mAP(Triplet) | wmAP(Phrase) | mAP(Phrase) | Triplet proposal recall | Phrase proposal recall | model | config -----------|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----: -IMP, no bias | 71.64 | 30.56 | 36.47 | 32.90 | 40.61 | 72.57 | 75.87 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_imp_nobias.pth) | [link](sgg_configs/oi_vrd/R152FPN_imp_nobias_oi.yaml) -IMP, bias | 71.81 | 30.88 | 45.97 | 33.25 | 50.42 | 72.81 | 76.04 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_imp_bias.pth) | [link](sgg_configs/oi_vrd/R152FPN_imp_bias_oi.yaml) -MSDN, no bias | 71.76 | 30.40 | 36.76 | 32.81 | 40.89 | 72.54 | 75.85 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_msdn_nobias.pth) | [link](sgg_configs/oi_vrd/R152FPN_msdn_nobias_oi.yaml) -MSDN, bias | 71.48 | 30.22 | 34.49 | 32.58 | 38.71 | 72.45 | 75.62 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_msdn_bias.pth) | [link](sgg_configs/oi_vrd/R152FPN_msdn_bias_oi.yaml) -Neural Motif, bias | 72.54 | 29.35 | 29.26 | 33.10 | 35.02 | 73.64 | 78.70 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_nm.pth) | [link](sgg_configs/oi_vrd/R152FPN_motif_oi.yaml) -GRCNN, bias | 74.17 | 34.73 | 39.56 | 37.04 | 43.63 | 74.11 | 77.32 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_grcnn.pth) | [link](sgg_configs/oi_vrd/R152FPN_grcnn_oi.yaml) -RelDN | 75.40 | 40.85 | 44.24 | 49.16 | 50.60 | 78.74 | 90.39 | [link](https://penzhanwu2.blob.core.windows.net/phillytools/data/maskrcnn/pretrained_model/sgg_model_zoo/oi_R152_reldn.pth) | [link](sgg_configs/oi_vrd/R152FPN_reldn_oi.yaml) +IMP, no bias | 71.64 | 30.56 | 36.47 | 32.90 | 40.61 | 72.57 | 75.87 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_imp_nobias.pth) | [link](sgg_configs/oi_vrd/R152FPN_imp_nobias_oi.yaml) +IMP, bias | 71.81 | 30.88 | 45.97 | 33.25 | 50.42 | 72.81 | 76.04 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_imp_bias.pth) | [link](sgg_configs/oi_vrd/R152FPN_imp_bias_oi.yaml) +MSDN, no bias | 71.76 | 30.40 | 36.76 | 32.81 | 40.89 | 72.54 | 75.85 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_msdn_nobias.pth) | [link](sgg_configs/oi_vrd/R152FPN_msdn_nobias_oi.yaml) +MSDN, bias | 71.48 | 30.22 | 34.49 | 32.58 | 38.71 | 72.45 | 75.62 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_msdn_bias.pth) | [link](sgg_configs/oi_vrd/R152FPN_msdn_bias_oi.yaml) +Neural Motif, bias | 72.54 | 29.35 | 29.26 | 33.10 | 35.02 | 73.64 | 78.70 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_nm.pth) | [link](sgg_configs/oi_vrd/R152FPN_motif_oi.yaml) +GRCNN, bias | 74.17 | 34.73 | 39.56 | 37.04 | 43.63 | 74.11 | 77.32 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_grcnn.pth) | [link](sgg_configs/oi_vrd/R152FPN_grcnn_oi.yaml) +RelDN | 75.40 | 40.85 | 44.24 | 49.16 | 50.60 | 78.74 | 90.39 | [link](https://penzhanwu2.blob.core.windows.net/sgg/sgg_benchmark/sgg_model_zoo/oi_R152_reldn.pth) | [link](sgg_configs/oi_vrd/R152FPN_reldn_oi.yaml) ### Visual Genome diff --git a/tools/train_sg_net.py b/tools/train_sg_net.py index db235f4..b21faf6 100644 --- a/tools/train_sg_net.py +++ b/tools/train_sg_net.py @@ -113,6 +113,44 @@ def train(cfg, local_rank, distributed): return model +def run_test(cfg, model, distributed): + if distributed: + model = model.module + torch.cuda.empty_cache() # TODO check if it helps + iou_types = ("bbox",) + if cfg.MODEL.MASK_ON: + iou_types = iou_types + ("segm",) + if cfg.MODEL.KEYPOINT_ON: + iou_types = iou_types + ("keypoints",) + output_folders = [None] * len(cfg.DATASETS.TEST) + dataset_names = cfg.DATASETS.TEST + if cfg.OUTPUT_DIR: + for idx, dataset_name in enumerate(dataset_names): + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + mkdir(output_folder) + output_folders[idx] = output_folder + data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed) + labelmap_file = config_dataset_file(cfg.DATA_DIR, cfg.DATASETS.LABELMAP_FILE) + for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): + inference( + model, + cfg, + data_loader_val, + dataset_name=dataset_name, + iou_types=iou_types, + box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, + bbox_aug=cfg.TEST.BBOX_AUG.ENABLED, + device=cfg.MODEL.DEVICE, + expected_results=cfg.TEST.EXPECTED_RESULTS, + expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, + output_folder=output_folder, + skip_performance_eval=cfg.TEST.SKIP_PERFORMANCE_EVAL, + labelmap_file=labelmap_file, + save_predictions=cfg.TEST.SAVE_PREDICTIONS, + ) + synchronize() + + def main(): parser = argparse.ArgumentParser(description="PyTorch Object Detection Training") parser.add_argument(