зеркало из https://github.com/microsoft/archai.git
patch(root): Patches missing/changed files according to master.
This commit is contained in:
Родитель
711a3b0ad3
Коммит
c059c391cd
|
@ -149,4 +149,4 @@ venv.bak/
|
||||||
dmypy.json
|
dmypy.json
|
||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
|
@ -326,82 +326,6 @@
|
||||||
"--param_constraint_upper", "15000000",
|
"--param_constraint_upper", "15000000",
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Train GPT2 Flex Distributed WT103",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"module": "torch.distributed.launch",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--full", "--algos", "local_search_darts_reg", "--datasets", "cifar10"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Aggregate",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_aggregate.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\hog_imagenet16_search",
|
|
||||||
"--out-dir", "F:\\dedey\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Train Transformer XL",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/archai/nlp/nvidia_transformer_xl/train.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--config", "dgx1_1gpu_fp32", "--config_file", "wt103_base.yaml", "--attn_type", "2", "--n_layer", "16", "--n_head", "8", "--d_model", "256", "--d_head", "32", "--d_inner", "768", "--log_interval", "10"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Train TransformerXL Distributed",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"module": "torch.distributed.launch",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"cwd": "${workspaceFolder}",
|
|
||||||
"args": [
|
|
||||||
"--nproc_per_node=4",
|
|
||||||
"archai/nlp/nvidia_transformer_xl/train.py",
|
|
||||||
"--config",
|
|
||||||
"dgx1_4gpu_fp32",
|
|
||||||
"--config_file",
|
|
||||||
"wt103_base.yaml"
|
|
||||||
],
|
|
||||||
"env": {
|
|
||||||
"NCCL_P2P_DISABLE": "1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Search Autoregressive Memformer",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/archai/nlp/search.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--population_size", "10",
|
|
||||||
"--parent_size", "2",
|
|
||||||
"--mutation_size", "4",
|
|
||||||
"--crossover_size", "4",
|
|
||||||
"--n_iter", "2",
|
|
||||||
"--use_quantization",
|
|
||||||
"--model_type", "mem_transformer",
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Search Autoregressive HF GPT2/Flex",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/archai/nlp/search.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--population_size", "10",
|
|
||||||
"--parent_size", "2",
|
|
||||||
"--mutation_size", "4",
|
|
||||||
"--crossover_size", "4",
|
|
||||||
"--n_iter", "2",
|
|
||||||
"--use_quantization",
|
|
||||||
"--model_type", "hf_gpt2_flex",
|
|
||||||
"--latency_constraint_upper", "10.0",
|
|
||||||
"--param_constraint_upper", "15000000",
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "Train GPT2 Flex Distributed WT103",
|
"name": "Train GPT2 Flex Distributed WT103",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
|
@ -443,306 +367,6 @@
|
||||||
"console": "integratedTerminal",
|
"console": "integratedTerminal",
|
||||||
"args": []
|
"args": []
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "Analysis Freeze Natsbench Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/analysis_freeze_natsbench_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "C:\\Users\\dedey\\Documents\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_ct1024_augnone",
|
|
||||||
"--out-dir", "C:\\Users\\dedey\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Naswot Conditional Natsbench Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/analysis_naswot_cond_natsbench_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "C:\\Users\\dedey\\Documents\\archaiphilly\\phillytools\\proxynas_naswotcond0.8",
|
|
||||||
"--out-dir", "C:\\Users\\dedey\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Proxynas",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_create_darts_space_benchmark.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "/home/dedey/archaiphilly/amlt/darts_constant_random_synthetic_cifar10",
|
|
||||||
"--out-dir", "/home/dedey/archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Freeze Darts Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_freeze_darts_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_nofreeze_ftonly",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports", "--reg-evals-file",
|
|
||||||
"F:\\archai_experiment_reports\\ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6\\darts_benchmark.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Regular Darts Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_regular_darts_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "/home/dedey/archaiphilly/amlt/darts_constant_random_cifar100",
|
|
||||||
"--out-dir", "/home/dedey/archai_experiments_reports", "--reg-evals-file",
|
|
||||||
"F:\\archai_experiment_reports\\ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6\\darts_benchmark.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Freeze Natsbench and Nb101 Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_freeze_natsbench_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\ft_sc10_fb1024_ftlr0.1_fte5_ct256_ftt0.15",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports",
|
|
||||||
"--reg-evals-file", "F:\\archai_experiment_reports\\nb_reg_b256_e200_sc10\\arch_id_test_accuracy_synthetic_cifar10.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "NEW Analysis Freeze Natsbench and Nb101 Space NEW",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_scu",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Freeze Natsbench SSS Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_freeze_natsbench_sss.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\nb_sss_r1.0_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Freeze Addon No Cond",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_freezeaddon_nocond.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.001_fte5_nocond",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Naswot Conditional Natsbench Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_naswot_cond_natsbench_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "C:\\Users\\dedey\\Documents\\archaiphilly\\phillytools\\proxynas_naswotcond0.8",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Natsbench Nonstandard Generate Benchmark",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_natsbench_nonstandard_generate_benchmark.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "/home/dedey/archaiphilly/amlt/natsbench_constant_random_cifar100",
|
|
||||||
"--out-dir", "/home/dedey/archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Regular Natsbench TSS, SSS and Nb101 Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_regular_natsbench_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\nb_sss_reg_b256_e20",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Cross Experiment Plots",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_plots/cross_exp_plots.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--dataset", "darts_cifar10",
|
|
||||||
"--conf-location", "scripts/reports/fear_plots/cross_exp_conf.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Cross Random Search Plots",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_plots/cross_random_search.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--dataset", "darts_cifar10",
|
|
||||||
"--conf-location", "scripts/reports/fear_plots/cross_random_search.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Cross Local Search Plots",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_plots/cross_local_search.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--dataset", "natsbench_cifar10",
|
|
||||||
"--conf-location", "scripts/reports/fear_plots/cross_local_search.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Zero Cost Natsbench, Nasbench101, Dartsspace Experiments",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_natsbench_zerocost.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "/home/dedey/archaiphilly/amlt/zc_synthetic_cifar10",
|
|
||||||
"--out-dir", "/home/dedey/archai_experiment_reports",
|
|
||||||
"--reg-evals-file",
|
|
||||||
"/home/dedey/archai_experiment_reports/nb_reg_b256_e200_sc10/arch_id_test_accuracy.yaml",
|
|
||||||
"--params-flops-file",
|
|
||||||
"/home/dedey/archai_experiment_reports/nb_reg_b256_e200_sc10/arch_id_params_flops.yaml"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Zero Cost Natsbench Epochs Experiments",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_natsbench_zerocost_epochs.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "/home/dedey/archaiphilly/amlt/natsbench_constant_random_zerocost_epochs_scifar100",
|
|
||||||
"--out-dir", "/home/dedey/archai_experiment_reports",
|
|
||||||
"--reg-evals-file",
|
|
||||||
"/home/dedey/archai_experiment_reports/natsbench_constant_random_scifar100/arch_id_test_accuracy.yaml"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Zero Cost Conditional Natsbench Experiments",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_natsbench_conditional_zerocost.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\zc_cond_b256_ftt0.6",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Generate ArchId Test Accuracy",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_generate_archid_test_acc.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\nb_f102_b256_reg200",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Hog FFNet",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_hog_ffnet.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\hog_flower102_search",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Random Search Natsbench TSS Far",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_random_search_natsbench_tss_far.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\rs_far_ftt0.6_max500_ratio8.0_fixedseeds",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Random Search Natsbench TSS Far Post",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_random_search_natsbench_tss_far_post.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\rs_farpost_c100_ftt0.3_max500_ratio2.0_fixedseeds",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Random Search Natsbench TSS Reg",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_random_search_natsbench_tss_reg.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\rs_reg_max500_b1024_e50_fixedseeds",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Random Search DARTS Reg",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_random_search_darts_reg.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\rs_darts_reg_e5",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Random Search DARTS Far",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_random_search_darts_far.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\rs_darts_far_ftt0.6_fte10_ratio_8.0_nofreeze",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Local Search Natsbench TSS",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "F:\\archaiphilly\\phillytools\\ls_far_ftt0.6_max300_ratio2.0_fixedseeds_refactor",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports", "--natsbench_loc", "C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple",
|
|
||||||
"--dataset", "cifar10"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Nasbench301 Time-to-thresh vs. Test Acc",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_nasbench301_timetothresh.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--nb301-logs-dir", "C:\\Users\\dedey\\dataroot\\nasbench301\\nasbench301_full_data\\nb_301_v13_lc_iclr_final\\rs",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Nasbench301 Ranking",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/analysis_nasbench301_ranking.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--nb301-logs-dir", "C:\\Users\\dedey\\dataroot\\nasbench301\\nasbench301_full_data\\nb_301_v13_lc_iclr_final\\rs",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Simulate FEAR on Nasbench301",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/simulate_fear_on_nb301.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--nb301-logs-dir", "C:\\Users\\dedey\\dataroot\\nasbench301\\nasbench301_full_data\\nb_301_v13_lc_iclr_final\\rs",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports", "--scorer", "train_accuracy"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Simulate FEAR on Nasbench301 Toy",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/reports/fear_analysis/simulate_fear_on_nb301.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--nb301-logs-dir", "C:\\Users\\dedey\\dataroot\\nasbench301\\nasbench301_full_data\\nb_301_v13_toy",
|
|
||||||
"--out-dir", "F:\\archai_experiment_reports", "--scorer", "train_accuracy"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Arch Population",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/scripts/lts_analysis/analyze_arch_population.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--root_folder", "/home/dedey/archaiphilly/amlt/gpt2_flex_random_l5_u12_finedm",
|
|
||||||
"--out_dir", "/home/dedey/archai_experiment_reports",
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Analysis Characterize Search Space",
|
|
||||||
"type": "python",
|
|
||||||
"request": "launch",
|
|
||||||
"program": "${cwd}/archai/nlp/nas/characterize_search_space.py",
|
|
||||||
"console": "integratedTerminal",
|
|
||||||
"args": ["--results-dir", "C:\\Users\\dedey\\Documents\\archaiphilly\\phillytools\\proxynas_regular_and_parameterless",
|
|
||||||
"--out-dir", "C:\\Users\\dedey\\archai_experiment_reports"]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "CurrentFile",
|
"name": "CurrentFile",
|
||||||
"type": "python",
|
"type": "python",
|
||||||
|
|
|
@ -1,568 +0,0 @@
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from pathlib import Path
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
from overrides.overrides import overrides
|
|
||||||
from typing import List, Tuple, Optional, Dict
|
|
||||||
from collections import OrderedDict
|
|
||||||
import random
|
|
||||||
import ray
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from archai.common.common import get_conf_common
|
|
||||||
from archai.common.common import get_expdir
|
|
||||||
from archai.nas.searcher import SearchResult
|
|
||||||
from archai.common.common import logger
|
|
||||||
from archai.common.config import Config
|
|
||||||
from archai.algos.evolution_pareto.evolution_pareto_search import EvolutionParetoSearch
|
|
||||||
from archai.nas.arch_meta import ArchWithMetaData
|
|
||||||
from archai.nas.nas_utils import compute_crowding_distance, compute_pareto_hypervolume
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.search_spaces.discrete_search_spaces.segmentation_search_spaces.discrete_search_space_segmentation import DiscreteSearchSpaceSegmentation
|
|
||||||
|
|
||||||
from archai.algos.evolution_pareto_image_seg.segmentation_trainer import SegmentationTrainer
|
|
||||||
from archai.algos.evolution_pareto_image_seg.utils import get_onnx_latency
|
|
||||||
from archai.algos.evolution_pareto_image_seg.report import get_search_status_df, save_3d_pareto_plot, save_2d_pareto_evolution_plot
|
|
||||||
from archai.algos.evolution_pareto_image_seg.remote_benchmark import RemoteAzureBenchmark
|
|
||||||
|
|
||||||
from archai.nas.constraints.macs import calculate_macs
|
|
||||||
from archai.nas.constraints.torch_constraints import measure_torch_peak_memory
|
|
||||||
from archai.nas.constraints.pareto_frontier import find_pareto_frontier_points
|
|
||||||
|
|
||||||
class EvolutionParetoSearchSegmentation(EvolutionParetoSearch):
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def search(self, conf_search:Config)->SearchResult:
|
|
||||||
|
|
||||||
self.dataset_conf = conf_search['loader']['dataset']
|
|
||||||
self.nb_classes = self.dataset_conf.get('nb_classes', 19)
|
|
||||||
self.dataroot = utils.full_path(self.dataset_conf['dataroot'])
|
|
||||||
self.dataset_name = self.dataset_conf['name']
|
|
||||||
self.conf_train = conf_search['trainer']
|
|
||||||
self.conf_loader = conf_search['loader']
|
|
||||||
self.min_mac = conf_search['min_mac']
|
|
||||||
self.max_mac = conf_search['max_mac']
|
|
||||||
self.min_layers = conf_search['min_layers']
|
|
||||||
self.max_layers = conf_search['max_layers']
|
|
||||||
self.max_downsample_factor = conf_search['max_downsample_factor']
|
|
||||||
self.skip_connections = conf_search['skip_connections']
|
|
||||||
self.max_skip_connection_length = conf_search['max_skip_connection_length']
|
|
||||||
self.max_scale_delta = conf_search['max_scale_delta']
|
|
||||||
self.max_post_upsample_layers = conf_search['max_post_upsample_layers']
|
|
||||||
self.min_base_channels = conf_search['min_base_channels']
|
|
||||||
self.max_base_channels = conf_search['max_base_channels']
|
|
||||||
self.base_channels_binwidth = conf_search['base_channels_binwidth']
|
|
||||||
self.min_delta_channels = conf_search['min_delta_channels']
|
|
||||||
self.max_delta_channels = conf_search['max_delta_channels']
|
|
||||||
self.delta_channels_binwidth = conf_search['delta_channels_binwidth']
|
|
||||||
self.mult_delta = conf_search.get('mult_delta', False)
|
|
||||||
self.op_subset = conf_search['op_subset']
|
|
||||||
self.downsample_prob_ratio = conf_search['downsample_prob_ratio']
|
|
||||||
self.img_size = self.dataset_conf['img_size']
|
|
||||||
|
|
||||||
# Parses soft constraints parameters
|
|
||||||
soft_constraints = conf_search['objectives']['soft_constraints_penalty']
|
|
||||||
soft_constraints['allowed_ops'] = soft_constraints['allowed_ops'].split(',')
|
|
||||||
soft_constraints['allowed_scales'] = [int(s) for s in soft_constraints['allowed_scales'].split(',')]
|
|
||||||
soft_constraints['allowed_channels'] = [int(s) for s in soft_constraints['allowed_channels'].split(',')]
|
|
||||||
|
|
||||||
self.objectives = conf_search['objectives']
|
|
||||||
|
|
||||||
self.crowd_sorting = conf_search['crowd_sorting']
|
|
||||||
|
|
||||||
self.init_architectures_from_dir = conf_search['init_architectures_from_dir']
|
|
||||||
self.use_remote_benchmark = conf_search['use_remote_benchmark']
|
|
||||||
|
|
||||||
if self.use_remote_benchmark:
|
|
||||||
remote_config = conf_search['remote_benchmark_config']
|
|
||||||
assert 'connection_string_env_var_name' in remote_config
|
|
||||||
assert remote_config['connection_string_env_var_name'] in os.environ
|
|
||||||
|
|
||||||
con_string = os.environ[remote_config['connection_string_env_var_name']]
|
|
||||||
self.patience = remote_config['patience']
|
|
||||||
self.check_interval = remote_config['check_interval']
|
|
||||||
|
|
||||||
self.remote_benchmark = RemoteAzureBenchmark(
|
|
||||||
connection_string=con_string,
|
|
||||||
blob_container_name=remote_config['blob_container_name'],
|
|
||||||
table_name=remote_config['table_name'],
|
|
||||||
partition_key=remote_config['partition_key'],
|
|
||||||
overwrite=remote_config['overwrite']
|
|
||||||
)
|
|
||||||
|
|
||||||
# eval cache so that if search visits
|
|
||||||
# a network already evaluated then we don't
|
|
||||||
# evaluate it again.
|
|
||||||
self.eval_cache = dict()
|
|
||||||
|
|
||||||
# Place to store models with evaluation errors
|
|
||||||
self.models_with_missing_results = []
|
|
||||||
|
|
||||||
# init ray
|
|
||||||
ray.init()
|
|
||||||
|
|
||||||
super().search(conf_search)
|
|
||||||
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def get_search_space(self)->DiscreteSearchSpaceSegmentation:
|
|
||||||
return DiscreteSearchSpaceSegmentation(
|
|
||||||
self.dataset_name,
|
|
||||||
min_layers=self.min_layers,
|
|
||||||
max_layers=self.max_layers,
|
|
||||||
max_downsample_factor=self.max_downsample_factor,
|
|
||||||
skip_connections=self.skip_connections,
|
|
||||||
max_skip_connection_length=self.max_skip_connection_length,
|
|
||||||
max_scale_delta=self.max_scale_delta,
|
|
||||||
min_base_channels=self.min_base_channels,
|
|
||||||
max_base_channels=self.max_base_channels,
|
|
||||||
base_channels_binwidth=self.base_channels_binwidth,
|
|
||||||
min_delta_channels=self.min_delta_channels,
|
|
||||||
max_delta_channels=self.max_delta_channels,
|
|
||||||
delta_channels_binwidth=self.delta_channels_binwidth,
|
|
||||||
min_mac=self.min_mac,
|
|
||||||
max_mac=self.max_mac,
|
|
||||||
op_subset=self.op_subset,
|
|
||||||
downsample_prob_ratio=self.downsample_prob_ratio,
|
|
||||||
mult_delta=self.mult_delta,
|
|
||||||
img_size=self.img_size,
|
|
||||||
nb_classes=self.nb_classes
|
|
||||||
)
|
|
||||||
|
|
||||||
def _get_secondary_objectives_proxy(self, model: ArchWithMetaData) -> Tuple[float, float]:
|
|
||||||
''' Gets a proxy for all secondary objectives (latency, memory and soft_constraints)'''
|
|
||||||
# TODO: Filter secondary objectives in a smarter way
|
|
||||||
proxy_objs = OrderedDict(
|
|
||||||
(obj, 0.0)
|
|
||||||
for obj in self.objectives if obj != 'f1'
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.objectives['latency']['enabled']:
|
|
||||||
proxy_objs['latency'] = get_onnx_latency(model.arch, img_size=model.arch.img_size)
|
|
||||||
|
|
||||||
if self.objectives['memory']['enabled']:
|
|
||||||
proxy_objs['memory'] = measure_torch_peak_memory(
|
|
||||||
model.arch, use_quantization=False,
|
|
||||||
input_dims=(1, 3, *model.arch.img_size[::-1]),
|
|
||||||
n_threads=1, device='cpu'
|
|
||||||
)
|
|
||||||
|
|
||||||
if self.objectives['soft_constraints_penalty']['enabled']:
|
|
||||||
soft_constraints = self.objectives['soft_constraints_penalty']
|
|
||||||
|
|
||||||
proxy_objs['soft_constraints_penalty'] = sum(
|
|
||||||
node['scale'] not in soft_constraints['allowed_scales'] or
|
|
||||||
(node['op'] not in soft_constraints['allowed_ops'] and bool(node['op'])) or
|
|
||||||
model.arch.channels_per_scale[node['scale']] not in soft_constraints['allowed_channels']
|
|
||||||
for node in model.arch.graph.values()
|
|
||||||
) / len(model.arch.graph.values())
|
|
||||||
|
|
||||||
if self.objectives['macs']['enabled']:
|
|
||||||
proxy_objs['macs'] = calculate_macs(model.arch, (1, 3, *model.arch.img_size[::-1]))
|
|
||||||
|
|
||||||
return proxy_objs
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def _sample_init_population(self) -> List[ArchWithMetaData]:
|
|
||||||
if self.init_architectures_from_dir:
|
|
||||||
arch_dir = Path(self.init_architectures_from_dir)
|
|
||||||
arch_files = list(arch_dir.glob('*.yaml'))
|
|
||||||
search_space = self.get_search_space()
|
|
||||||
logger.info(f'Loading {len(arch_files)} seed models for first iteration.')
|
|
||||||
|
|
||||||
model_list = [search_space.load_from_file(arch_file) for arch_file in arch_files]
|
|
||||||
|
|
||||||
# Initialization with crowd sorting
|
|
||||||
if self.crowd_sorting['initialization']:
|
|
||||||
init_pop = []
|
|
||||||
|
|
||||||
for _ in range(self.crowd_sorting['oversampling_factor']):
|
|
||||||
init_pop += super()._sample_init_population()
|
|
||||||
|
|
||||||
# Scores memory and latency
|
|
||||||
secondary_objs_proxy = np.array([
|
|
||||||
list(self._get_secondary_objectives_proxy(p).values()) for p in init_pop
|
|
||||||
])
|
|
||||||
|
|
||||||
crowd_dist = compute_crowding_distance(secondary_objs_proxy)
|
|
||||||
idxs = np.argsort(-crowd_dist, axis=None)[:self.init_num_models]
|
|
||||||
model_list = [p for pi, p in enumerate(init_pop) if pi in idxs]
|
|
||||||
else:
|
|
||||||
model_list = super()._sample_init_population()
|
|
||||||
|
|
||||||
for model in model_list:
|
|
||||||
model.metadata['generation'] = self.iter_num
|
|
||||||
|
|
||||||
return model_list
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def _sample_random_to_mix(self) -> List[ArchWithMetaData]:
|
|
||||||
if self.crowd_sorting['random_mix']:
|
|
||||||
init_pop = []
|
|
||||||
|
|
||||||
for _ in range(self.crowd_sorting['oversampling_factor']):
|
|
||||||
init_pop += super()._sample_random_to_mix()
|
|
||||||
|
|
||||||
# Scores memory, latency and soft constraints penalty
|
|
||||||
secondary_objs = np.array([
|
|
||||||
list(self._get_secondary_objectives_proxy(p).values()) for p in init_pop
|
|
||||||
])
|
|
||||||
|
|
||||||
crowd_dist = compute_crowding_distance(secondary_objs)
|
|
||||||
idxs = np.argsort(-crowd_dist, axis=None)[:self.num_random_mix]
|
|
||||||
model_list = [p for pi, p in enumerate(init_pop) if pi in idxs]
|
|
||||||
else:
|
|
||||||
model_list = super()._sample_random_to_mix()
|
|
||||||
|
|
||||||
for model in model_list:
|
|
||||||
model.metadata['generation'] = self.iter_num
|
|
||||||
|
|
||||||
return model_list
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def calc_secondary_objectives(self, population:List[ArchWithMetaData])->None:
|
|
||||||
cache_misses = 0
|
|
||||||
|
|
||||||
for p in tqdm(population):
|
|
||||||
sec_objs_proxy = self._get_secondary_objectives_proxy(p)
|
|
||||||
|
|
||||||
# TODO: Avoid special casing here and use a general logic for
|
|
||||||
# computing secondary objectives remotely or not
|
|
||||||
proxy_latency, proxy_mem = sec_objs_proxy['latency'], sec_objs_proxy['memory']
|
|
||||||
p.metadata['soft_constraints_penalty'] = sec_objs_proxy['soft_constraints_penalty']
|
|
||||||
p.metadata['macs'] = sec_objs_proxy['macs']
|
|
||||||
|
|
||||||
if not self.use_remote_benchmark:
|
|
||||||
p.metadata['latency'], p.metadata['memory'] = proxy_latency, proxy_mem
|
|
||||||
else:
|
|
||||||
p.metadata['proxy_latency'], p.metadata['proxy_memory'] = proxy_latency, proxy_mem
|
|
||||||
|
|
||||||
# Checks if this architecture was already benchmarked before
|
|
||||||
if p.metadata['archid'] not in self.remote_benchmark:
|
|
||||||
cache_misses += 1
|
|
||||||
self.remote_benchmark.send_model(p)
|
|
||||||
|
|
||||||
if self.use_remote_benchmark:
|
|
||||||
logger.info(f'{len(population) - cache_misses} benchmark cache hits')
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def calc_task_accuracy(self, population:List[ArchWithMetaData])->None:
|
|
||||||
# computes task accuracy of each model
|
|
||||||
# and updates the meta data
|
|
||||||
# TODO: parallelize it via ray
|
|
||||||
|
|
||||||
# folder where to store training logs of each model
|
|
||||||
exp_dir = utils.full_path(get_expdir())
|
|
||||||
save_folder = os.path.join(exp_dir, f'arch_eval_logs_iter_{self.iter_num}')
|
|
||||||
os.makedirs(save_folder, exist_ok=True)
|
|
||||||
|
|
||||||
fit_refs = []
|
|
||||||
|
|
||||||
pop_to_eval = [
|
|
||||||
p for p in population
|
|
||||||
if p.metadata['archid'] not in self.eval_cache
|
|
||||||
]
|
|
||||||
|
|
||||||
if len(pop_to_eval) < len(population):
|
|
||||||
logger.info(
|
|
||||||
f'{len(population) - len(pop_to_eval)} evaluation cache hits'
|
|
||||||
)
|
|
||||||
|
|
||||||
for p in pop_to_eval:
|
|
||||||
# create a ray actor per model to be trained
|
|
||||||
actor_ref = self._create_training_job(p)
|
|
||||||
# create a folder name for the model training logs
|
|
||||||
run_path = os.path.join(save_folder, str(p.metadata['archid']))
|
|
||||||
os.makedirs(run_path, exist_ok=True)
|
|
||||||
# fit and validate the model
|
|
||||||
fit_refs.append(actor_ref.fit_and_validate.remote(run_path=run_path))
|
|
||||||
|
|
||||||
# gather all results for all models
|
|
||||||
results = ray.get(fit_refs)
|
|
||||||
|
|
||||||
# Cached results
|
|
||||||
for p in population:
|
|
||||||
if p.metadata['archid'] in self.eval_cache:
|
|
||||||
p.metadata['f1'] = self.eval_cache[p.metadata['archid']]
|
|
||||||
|
|
||||||
# Evaluation results
|
|
||||||
for r, p in zip(results, pop_to_eval):
|
|
||||||
p.metadata['f1'] = r
|
|
||||||
self.eval_cache[p.metadata['archid']] = r
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def on_calc_task_accuracy_end(self, current_pop: List[ArchWithMetaData]) -> None:
|
|
||||||
if self.use_remote_benchmark:
|
|
||||||
evaluated = set()
|
|
||||||
nb_tries = 0
|
|
||||||
logger.info('Gathering remote benchmark results...')
|
|
||||||
pbar = tqdm(total=len(current_pop), desc='Gathering remote benchmark results...')
|
|
||||||
|
|
||||||
while len(evaluated) < len(current_pop) and (nb_tries < self.patience or len(evaluated) == 0):
|
|
||||||
for i, p in enumerate(current_pop):
|
|
||||||
# Gets the metrics for all the models in `current_pop``.
|
|
||||||
# we don't need to worry about the cost of checking the same model
|
|
||||||
# more than once since the cost of `get_entity` is infimal
|
|
||||||
# and we may get better estimates for the latency mean when we
|
|
||||||
# check the same model again (given how the pipeline is constructed)
|
|
||||||
|
|
||||||
metrics = self.remote_benchmark.get_entity(
|
|
||||||
str(p.metadata['archid'])
|
|
||||||
)
|
|
||||||
|
|
||||||
# Updates the metadata with the remote benchmark metrics
|
|
||||||
if 'mean' in metrics and metrics['mean']:
|
|
||||||
p.metadata['latency'] = metrics['mean']
|
|
||||||
p.metadata['memory'] = p.metadata['proxy_memory']
|
|
||||||
|
|
||||||
if i not in evaluated:
|
|
||||||
evaluated.add(i)
|
|
||||||
pbar.update()
|
|
||||||
nb_tries = 0
|
|
||||||
|
|
||||||
# Resets an entry from the Azure table if the status="complete" prematurely
|
|
||||||
if i not in evaluated and 'status' in metrics and metrics['status'] == 'complete':
|
|
||||||
metrics['status'] = 'incomplete'
|
|
||||||
|
|
||||||
if 'mean' in metrics:
|
|
||||||
del metrics['mean']
|
|
||||||
|
|
||||||
if 'total_inference_avg' in metrics:
|
|
||||||
del metrics['total_inference_avg']
|
|
||||||
|
|
||||||
self.remote_benchmark.update_entity(str(p.metadata['archid']), metrics)
|
|
||||||
|
|
||||||
if len(evaluated) < len(current_pop):
|
|
||||||
pbar.set_description('Sleeping...')
|
|
||||||
logger.info(
|
|
||||||
'Waiting remote benchmark results for '
|
|
||||||
f'{len(current_pop) - len(evaluated)} models...'
|
|
||||||
)
|
|
||||||
time.sleep(self.check_interval)
|
|
||||||
nb_tries += 1
|
|
||||||
|
|
||||||
if nb_tries >= self.patience:
|
|
||||||
logger.warn('Patience reached. Adding missing models to the next iteration...')
|
|
||||||
|
|
||||||
for i, p in enumerate(current_pop):
|
|
||||||
if i not in evaluated:
|
|
||||||
# Removes possibly incomplete results
|
|
||||||
p.metadata.pop('latency', None)
|
|
||||||
p.metadata.pop('memory', None)
|
|
||||||
|
|
||||||
# Removes entry from the Azure table
|
|
||||||
self.remote_benchmark.delete_model(p.metadata['archid'])
|
|
||||||
self.models_with_missing_results.append(p)
|
|
||||||
|
|
||||||
# Removes the models from the current population
|
|
||||||
for p in self.models_with_missing_results:
|
|
||||||
current_pop.remove(p)
|
|
||||||
|
|
||||||
if p in self.all_pop:
|
|
||||||
self.all_pop.remove(p)
|
|
||||||
|
|
||||||
logger.info('Finished gathering remote benchmark results.')
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def on_search_iteration_start(self, current_pop: List[ArchWithMetaData]) -> None:
|
|
||||||
if self.use_remote_benchmark and self.models_with_missing_results:
|
|
||||||
logger.info(f'Adding missing models to the next iteration...')
|
|
||||||
current_pop.extend(self.models_with_missing_results)
|
|
||||||
self.models_with_missing_results = []
|
|
||||||
|
|
||||||
def _create_training_job(self, arch:ArchWithMetaData)->List:
|
|
||||||
''' Creates a ray actor that will train a single architecture '''
|
|
||||||
# region config
|
|
||||||
self.evaluate_for_steps = self.conf_train['evaluate_for_steps']
|
|
||||||
self.val_check_interval = self.conf_train['val_check_interval']
|
|
||||||
self.val_size = self.conf_train['val_size']
|
|
||||||
self.lr = self.conf_train['lr']
|
|
||||||
self.lr_exp_decay_gamma = self.conf_train['lr_exp_decay_gamma']
|
|
||||||
self.criterion_name = self.conf_train['criterion_name']
|
|
||||||
self.batch_size = self.conf_loader['batch_size']
|
|
||||||
self.seed = get_conf_common()['seed']
|
|
||||||
|
|
||||||
# train
|
|
||||||
trainer = ray.remote(
|
|
||||||
num_gpus=self.conf_train['gpus_per_job']
|
|
||||||
)(SegmentationTrainer)
|
|
||||||
|
|
||||||
ref = trainer.remote(
|
|
||||||
arch.arch, dataset_conf=self.dataset_conf,
|
|
||||||
max_steps=self.evaluate_for_steps,
|
|
||||||
val_check_interval=self.val_check_interval,
|
|
||||||
img_size=self.img_size, batch_size=self.batch_size, lr=self.lr,
|
|
||||||
lr_exp_decay_gamma=self.lr_exp_decay_gamma,
|
|
||||||
criterion_name=self.criterion_name, seed=self.seed
|
|
||||||
)
|
|
||||||
return ref
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def update_pareto_frontier(self, population:List[ArchWithMetaData])->List[ArchWithMetaData]:
|
|
||||||
# TODO: Make this more general
|
|
||||||
objs = [
|
|
||||||
[1.0 - p.metadata['f1'] for p in population],
|
|
||||||
[p.metadata['latency'] for p in population],
|
|
||||||
[p.metadata['memory'] for p in population],
|
|
||||||
[p.metadata['soft_constraints_penalty'] for p in population]
|
|
||||||
]
|
|
||||||
|
|
||||||
objs = [np.array(obj).reshape(-1, 1) for obj in objs]
|
|
||||||
|
|
||||||
points = np.concatenate(objs, axis=1)
|
|
||||||
points_idx = find_pareto_frontier_points(points, is_decreasing=True)
|
|
||||||
pareto_points = [population[idx] for idx in points_idx]
|
|
||||||
|
|
||||||
# save all the pareto points
|
|
||||||
self._save_yaml(pareto_points, basename='pareto')
|
|
||||||
|
|
||||||
return pareto_points
|
|
||||||
|
|
||||||
def _filter_population(self, population:List[ArchWithMetaData])->List[ArchWithMetaData]:
|
|
||||||
''' Filter the population based on the objectives constraints '''
|
|
||||||
return [
|
|
||||||
p for p in population
|
|
||||||
if all(
|
|
||||||
obj_data['min'] <= p.metadata[obj_name] <= obj_data['max']
|
|
||||||
for obj_name, obj_data in self.objectives.items()
|
|
||||||
if obj_data['enabled'] and obj_name in p.metadata
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
def _save_yaml(self, points:List[ArchWithMetaData], basename='pareto')->None:
|
|
||||||
exp_dir = utils.full_path(get_expdir())
|
|
||||||
save_folder = os.path.join(exp_dir, f'{basename}_iter_{self.iter_num}')
|
|
||||||
os.makedirs(save_folder, exist_ok=True)
|
|
||||||
for p in points:
|
|
||||||
this_name = os.path.join(save_folder, str(p.metadata['archid']) + '.yaml')
|
|
||||||
p.arch.to_file(this_name)
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def mutate_parents(self, parents:List[ArchWithMetaData], mutations_per_parent: int = 1)->List[ArchWithMetaData]:
|
|
||||||
''' Using the nearest neighbors as mutations'''
|
|
||||||
mutations = {}
|
|
||||||
oversample_factor = (
|
|
||||||
self.crowd_sorting['oversampling_factor'] if self.crowd_sorting['mutation']
|
|
||||||
else 1
|
|
||||||
)
|
|
||||||
|
|
||||||
for p in tqdm(parents, desc='Mutating parents'):
|
|
||||||
candidates = {}
|
|
||||||
nb_tries = 0
|
|
||||||
patience = 20
|
|
||||||
|
|
||||||
if len(self._filter_population([p])) == 0:
|
|
||||||
logger.info(
|
|
||||||
f'Model {p.metadata["archid"]} has latency {p.metadata["latency"]}'
|
|
||||||
f' or memory {p.metadata["memory"]} that is too high. Skipping mutation.'
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
while len(candidates) < (mutations_per_parent * oversample_factor) and nb_tries < patience:
|
|
||||||
for nbr in self.search_space.get_neighbors(p):
|
|
||||||
if nbr.metadata['archid'] not in self.eval_cache:
|
|
||||||
nbr.metadata['generation'] = self.iter_num
|
|
||||||
candidates[nbr.metadata['archid']] = nbr
|
|
||||||
nb_tries += 1
|
|
||||||
|
|
||||||
if candidates and self.crowd_sorting['mutation']:
|
|
||||||
candidates_list = list(candidates.items())
|
|
||||||
|
|
||||||
secondary_objs_proxy = np.array([
|
|
||||||
list(self._get_secondary_objectives_proxy(p).values()) for _, p in candidates_list
|
|
||||||
])
|
|
||||||
|
|
||||||
crowd_dist = compute_crowding_distance(secondary_objs_proxy)
|
|
||||||
|
|
||||||
# Deletes mutations that are not on the top k
|
|
||||||
for idx in np.argsort(-crowd_dist, axis=None)[mutations_per_parent:]:
|
|
||||||
del candidates[candidates_list[idx][0]]
|
|
||||||
|
|
||||||
mutations.update(candidates)
|
|
||||||
|
|
||||||
return list(mutations.values())
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def crossover_parents(self, parents:List[ArchWithMetaData], num_crossovers: int = 1)->List[ArchWithMetaData]:
|
|
||||||
# Randomly samples k distinct pairs from `parents`
|
|
||||||
children, children_hashes = [], set()
|
|
||||||
|
|
||||||
if len(parents) >= 2:
|
|
||||||
pairs = [random.sample(parents, 2) for _ in range(num_crossovers)]
|
|
||||||
|
|
||||||
for p1, p2 in pairs:
|
|
||||||
child = self.search_space.crossover(p1, p2)
|
|
||||||
child_id = child.metadata['archid']
|
|
||||||
|
|
||||||
if child and child_id not in children_hashes and child_id not in self.eval_cache:
|
|
||||||
child.metadata['generation'] = self.iter_num
|
|
||||||
children.append(child)
|
|
||||||
children_hashes.add(child_id)
|
|
||||||
|
|
||||||
return children
|
|
||||||
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def plot_search_state(self, all_pop:List[ArchWithMetaData], pareto:List[ArchWithMetaData], iter_num:int) -> None:
|
|
||||||
expdir = Path(get_expdir())
|
|
||||||
save_3d_pareto_plot(all_pop, pareto, ['f1', 'latency', 'memory'], iter_num, expdir)
|
|
||||||
save_3d_pareto_plot(all_pop, pareto, ['f1', 'latency', 'macs'], f'{iter_num}_macs', expdir)
|
|
||||||
|
|
||||||
status_df = get_search_status_df(
|
|
||||||
all_pop, pareto, iter_num,
|
|
||||||
fields=['archid', 'f1', 'latency', 'memory',
|
|
||||||
'soft_constraints_penalty', 'macs', 'generation']
|
|
||||||
)
|
|
||||||
|
|
||||||
save_2d_pareto_evolution_plot(
|
|
||||||
status_df, x='latency', y='f1', save_path=expdir / 'latency_f1_2d_pareto.png',
|
|
||||||
x_increasing=False, max_x=self.objectives['latency']['max'], y_increasing=True, max_y=1.0
|
|
||||||
)
|
|
||||||
|
|
||||||
save_2d_pareto_evolution_plot(
|
|
||||||
status_df, x='macs', y='f1', save_path=expdir / 'macs_f1_2d_pareto.png',
|
|
||||||
x_increasing=False, max_x=self.objectives['macs']['max'], y_increasing=True, max_y=1.0
|
|
||||||
)
|
|
||||||
|
|
||||||
save_2d_pareto_evolution_plot(
|
|
||||||
status_df, x='memory', y='f1', save_path=expdir / 'memory_f1_2d_pareto.png',
|
|
||||||
x_increasing=False, max_x=self.objectives['memory']['max'], y_increasing=True, max_y=1.0
|
|
||||||
)
|
|
||||||
|
|
||||||
save_2d_pareto_evolution_plot(
|
|
||||||
status_df, x='soft_constraints_penalty', y='f1', save_path=expdir / 'softconstraints_f1_2d_pareto.png',
|
|
||||||
x_increasing=False, max_x=self.objectives['soft_constraints_penalty']['max'], y_increasing=True, max_y=1.0
|
|
||||||
)
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def save_search_status(self, all_pop:List[ArchWithMetaData], pareto:List[ArchWithMetaData], iter_num:int) -> None:
|
|
||||||
fields = [
|
|
||||||
'archid', 'f1', 'latency', 'memory', 'proxy_latency',
|
|
||||||
'proxy_memory', 'soft_constraints_penalty',
|
|
||||||
'parent', 'parents', 'macs', 'generation'
|
|
||||||
]
|
|
||||||
|
|
||||||
status_df = get_search_status_df(all_pop, pareto, iter_num, fields)
|
|
||||||
|
|
||||||
# Adds pareto hypervolume
|
|
||||||
pareto_points = np.array([
|
|
||||||
[p.metadata['latency'], p.metadata['memory'], p.metadata['macs'], 1 - p.metadata['f1']]
|
|
||||||
for p in pareto
|
|
||||||
])
|
|
||||||
|
|
||||||
status_df['pareto_hypervolume'] = compute_pareto_hypervolume(
|
|
||||||
pareto_points,
|
|
||||||
np.array([
|
|
||||||
self.objectives['latency']['max'],
|
|
||||||
self.objectives['memory']['max'],
|
|
||||||
self.objectives['macs']['max'],
|
|
||||||
1.0
|
|
||||||
], dtype=np.float32)
|
|
||||||
)
|
|
||||||
|
|
||||||
expdir = Path(get_expdir())
|
|
||||||
status_df.to_csv(expdir / f'search_status_{iter_num}.csv')
|
|
|
@ -1,243 +0,0 @@
|
||||||
from typing import List, Union, Dict, Tuple, Optional, Callable
|
|
||||||
import random
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from torch.utils.data.dataloader import DataLoader
|
|
||||||
import numpy as np
|
|
||||||
import pytorch_lightning as pl
|
|
||||||
import segmentation_models_pytorch as smp
|
|
||||||
|
|
||||||
from archai.common.config import Config
|
|
||||||
from archai.algos.evolution_pareto_image_seg.model import SegmentationNasModel
|
|
||||||
from archai.datasets.data import create_dataset_provider
|
|
||||||
|
|
||||||
|
|
||||||
def get_custom_overall_metrics(tp, fp, fn, tn, stage, ignore_classes: Optional[List[int]] = None):
|
|
||||||
gt_pos = (tp + fn).sum(axis=0)
|
|
||||||
pd_pos = (tp + fp).sum(axis=0)
|
|
||||||
|
|
||||||
tp_diag = tp.sum(axis=0)
|
|
||||||
f1 = 2 * tp_diag / torch.maximum(torch.ones_like(gt_pos), gt_pos + pd_pos)
|
|
||||||
iou = tp_diag / torch.maximum(torch.ones_like(gt_pos), gt_pos + pd_pos - tp_diag)
|
|
||||||
|
|
||||||
class_mask = torch.ones(tp.shape[1], dtype=torch.bool)
|
|
||||||
if ignore_classes is not None:
|
|
||||||
class_mask = [c not in ignore_classes for c in torch.arange(tp.shape[1])]
|
|
||||||
|
|
||||||
weight = 1 / torch.sqrt(gt_pos[class_mask])
|
|
||||||
overall_f1 = torch.sum(f1[class_mask] * weight) / torch.sum(weight)
|
|
||||||
overall_iou = torch.sum(iou[class_mask] * weight) / torch.sum(weight)
|
|
||||||
|
|
||||||
return {
|
|
||||||
f'{stage}_overall_f1': overall_f1,
|
|
||||||
f'{stage}_overall_iou': overall_iou
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class LightningModelWrapper(pl.LightningModule):
|
|
||||||
def __init__(self,
|
|
||||||
model: SegmentationNasModel,
|
|
||||||
criterion_name: str = 'ce',
|
|
||||||
lr: float = 2e-4,
|
|
||||||
lr_exp_decay_gamma: float = 0.98,
|
|
||||||
img_size: Tuple[int, int] = (256, 256),
|
|
||||||
metrics_ignore_classes: Optional[List[int]] = None,
|
|
||||||
weight_decay: float = 0.0):
|
|
||||||
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.model = model
|
|
||||||
self.lr = lr
|
|
||||||
self.weight_decay = weight_decay
|
|
||||||
self.lr_exp_decay_gamma = lr_exp_decay_gamma
|
|
||||||
self.latency = None
|
|
||||||
self.img_size = img_size
|
|
||||||
|
|
||||||
self.set_loss(criterion_name)
|
|
||||||
self.save_hyperparameters()
|
|
||||||
self.metrics_ignore_classes = metrics_ignore_classes
|
|
||||||
|
|
||||||
def set_loss(self, criterion_name):
|
|
||||||
mode = smp.losses.MULTICLASS_MODE if self.model.nb_classes > 1 else smp.losses.BINARY_MODE
|
|
||||||
if criterion_name == 'ce':
|
|
||||||
if self.model.nb_classes > 1:
|
|
||||||
self.loss_fn = smp.losses.SoftCrossEntropyLoss(ignore_index=255, smooth_factor=0)
|
|
||||||
else:
|
|
||||||
self.loss_fn = smp.losses.SoftBCEWithLogitsLoss(smooth_factor=0)
|
|
||||||
elif criterion_name == 'dice':
|
|
||||||
self.loss_fn = smp.losses.DiceLoss(mode, from_logits=True, ignore_index=255)
|
|
||||||
elif criterion_name == 'lovasz':
|
|
||||||
self.loss_fn = smp.losses.LovaszLoss(mode, ignore_index=255, from_logits=True)
|
|
||||||
|
|
||||||
def forward(self, image):
|
|
||||||
return self.model(image)
|
|
||||||
|
|
||||||
def evaluate(self, dataloader: DataLoader) -> Dict[str, float]:
|
|
||||||
with torch.no_grad():
|
|
||||||
outputs = [
|
|
||||||
{k: v.cpu() for k, v in self.shared_step(batch.cuda()).items()}
|
|
||||||
for batch in dataloader
|
|
||||||
]
|
|
||||||
return self.shared_epoch_end(outputs, stage='validation', log=False)
|
|
||||||
|
|
||||||
def shared_step(self, batch):
|
|
||||||
image = batch['image']
|
|
||||||
|
|
||||||
assert image.ndim == 4
|
|
||||||
|
|
||||||
h, w = image.shape[2:]
|
|
||||||
assert h % 32 == 0 and w % 32 == 0, \
|
|
||||||
f'found invalid image size ({image.shape}) in batch {batch}'
|
|
||||||
|
|
||||||
mask = batch['mask']
|
|
||||||
logits_mask = self.forward(image)
|
|
||||||
mask = (mask/255.0).unsqueeze(1) if self.model.nb_classes == 1 else mask
|
|
||||||
loss = self.loss_fn(logits_mask, mask)
|
|
||||||
|
|
||||||
if self.model.nb_classes == 1:
|
|
||||||
pred_mask = torch.sigmoid(logits_mask)
|
|
||||||
tp, fp, fn, tn = smp.metrics.get_stats(
|
|
||||||
pred_mask, (mask >= 0.5).long(),
|
|
||||||
mode='binary', threshold=0.5
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
tp, fp, fn, tn = smp.metrics.get_stats(
|
|
||||||
logits_mask.argmax(axis=1), mask.long(), mode='multiclass',
|
|
||||||
num_classes=self.model.nb_classes, ignore_index=255
|
|
||||||
)
|
|
||||||
|
|
||||||
metrics_result = {
|
|
||||||
'tp': tp,
|
|
||||||
'fp': fp,
|
|
||||||
'fn': fn,
|
|
||||||
'tn': tn,
|
|
||||||
'loss': loss
|
|
||||||
}
|
|
||||||
|
|
||||||
return metrics_result
|
|
||||||
|
|
||||||
def training_step(self, batch, batch_idx):
|
|
||||||
results = self.shared_step(batch)
|
|
||||||
self.log_dict({'training_loss': results['loss']}, sync_dist=True)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def predict(self, image):
|
|
||||||
with torch.no_grad():
|
|
||||||
return self.model.predict(image)
|
|
||||||
|
|
||||||
def validation_step(self, batch, batch_idx):
|
|
||||||
results = self.shared_step(batch)
|
|
||||||
return results
|
|
||||||
|
|
||||||
def validation_epoch_end(self, outputs):
|
|
||||||
self.shared_epoch_end(outputs, stage='validation')
|
|
||||||
|
|
||||||
def training_epoch_end(self, outputs):
|
|
||||||
self.shared_epoch_end(outputs, stage='train')
|
|
||||||
|
|
||||||
def shared_epoch_end(self, outputs, stage, log=True):
|
|
||||||
tp = torch.cat([x['tp'] for x in outputs])
|
|
||||||
fp = torch.cat([x['fp'] for x in outputs])
|
|
||||||
fn = torch.cat([x['fn'] for x in outputs])
|
|
||||||
tn = torch.cat([x['tn'] for x in outputs])
|
|
||||||
avg_loss = torch.tensor([x['loss'] for x in outputs]).mean()
|
|
||||||
|
|
||||||
results = get_custom_overall_metrics(
|
|
||||||
tp, fp, fn, tn, stage=stage,
|
|
||||||
ignore_classes=self.metrics_ignore_classes
|
|
||||||
)
|
|
||||||
results[f'{stage}_loss'] = avg_loss
|
|
||||||
|
|
||||||
if log:
|
|
||||||
self.log_dict(results, sync_dist=True)
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
def configure_optimizers(self):
|
|
||||||
optimizer = torch.optim.Adam(self.parameters(), lr=self.lr, weight_decay=self.weight_decay)
|
|
||||||
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=self.lr_exp_decay_gamma)
|
|
||||||
|
|
||||||
scheduler = {
|
|
||||||
'scheduler': scheduler,
|
|
||||||
'interval': 'epoch'
|
|
||||||
}
|
|
||||||
|
|
||||||
return [optimizer], [scheduler]
|
|
||||||
|
|
||||||
def on_train_start(self) -> None:
|
|
||||||
sample = torch.randn((1, 3, self.img_size[1], self.img_size[0])).to(self.device)
|
|
||||||
self.logger.experiment.add_graph(self.model, sample)
|
|
||||||
|
|
||||||
|
|
||||||
class SegmentationTrainer():
|
|
||||||
|
|
||||||
def __init__(self, model: SegmentationNasModel,
|
|
||||||
dataset_conf: Config,
|
|
||||||
max_steps: int = 12000, img_size: Tuple[int, int] = (256, 256),
|
|
||||||
batch_size: int = 16, lr: float = 2e-4,
|
|
||||||
criterion_name: str = 'ce',
|
|
||||||
val_check_interval: Union[int, float] = 0.25,
|
|
||||||
lr_exp_decay_gamma: float = 0.98,
|
|
||||||
seed: int = 1, tr_num_workers: int = 4, val_num_workers: int = 4,
|
|
||||||
tr_augmentation_fn: Optional[Callable] = None,
|
|
||||||
val_augmentation_fn: Optional[Callable] = None):
|
|
||||||
torch.manual_seed(seed)
|
|
||||||
random.seed(seed)
|
|
||||||
np.random.seed(int(seed))
|
|
||||||
|
|
||||||
self.max_steps = max_steps
|
|
||||||
self.val_check_interval = val_check_interval
|
|
||||||
|
|
||||||
self.dataset_conf = dataset_conf
|
|
||||||
self.dp = create_dataset_provider(dataset_conf)
|
|
||||||
self.tr_dataset, self.val_dataset = self.dp.get_train_val_datasets(
|
|
||||||
tr_augmentation_fn, val_augmentation_fn
|
|
||||||
)
|
|
||||||
|
|
||||||
self.tr_dataloader = DataLoader(self.tr_dataset, batch_size=batch_size, num_workers=tr_num_workers, shuffle=True)
|
|
||||||
self.val_dataloader = DataLoader(self.val_dataset, batch_size=batch_size, num_workers=val_num_workers, shuffle=False)
|
|
||||||
|
|
||||||
self.model = LightningModelWrapper(model, criterion_name=criterion_name, lr=lr,
|
|
||||||
img_size=img_size, lr_exp_decay_gamma=lr_exp_decay_gamma,
|
|
||||||
metrics_ignore_classes=dataset_conf.get('metrics_ignore_classes', None))
|
|
||||||
self.img_size = img_size
|
|
||||||
|
|
||||||
def get_training_callbacks(self, run_dir: Path) -> List[pl.callbacks.Callback]:
|
|
||||||
return [pl.callbacks.ModelCheckpoint(
|
|
||||||
dirpath=str(run_dir / 'best_model'),
|
|
||||||
mode='max', save_top_k=1, verbose=True,
|
|
||||||
monitor='validation_overall_f1',
|
|
||||||
filename='{epoch}-{step}-{validation_overall_f1:.2f}'
|
|
||||||
), pl.callbacks.lr_monitor.LearningRateMonitor()]
|
|
||||||
|
|
||||||
def fit(self, run_path: str) -> pl.Trainer:
|
|
||||||
run_path = Path(run_path)
|
|
||||||
arch = self.model.model
|
|
||||||
|
|
||||||
# Saves architecture metadata
|
|
||||||
arch.to_file(run_path / 'architecture.yaml')
|
|
||||||
|
|
||||||
# Saves architecture diagram
|
|
||||||
try:
|
|
||||||
digraph = arch.view()
|
|
||||||
digraph.render(str(run_path / 'architecture'), format='png')
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
trainer = pl.Trainer(
|
|
||||||
max_steps=self.max_steps,
|
|
||||||
default_root_dir=run_path,
|
|
||||||
callbacks=self.get_training_callbacks(run_path),
|
|
||||||
val_check_interval=self.val_check_interval,
|
|
||||||
gpus=1
|
|
||||||
)
|
|
||||||
|
|
||||||
trainer.fit(self.model, self.tr_dataloader, self.val_dataloader)
|
|
||||||
return trainer
|
|
||||||
|
|
||||||
def fit_and_validate(self, run_path: str)->float:
|
|
||||||
trainer = self.fit(run_path)
|
|
||||||
metrics = trainer.validate(model=trainer.model, dataloaders=self.val_dataloader)[0]
|
|
||||||
return metrics['validation_overall_f1']
|
|
|
@ -1,191 +0,0 @@
|
||||||
from abc import ABCMeta, abstractmethod
|
|
||||||
from overrides.overrides import overrides
|
|
||||||
|
|
||||||
import random
|
|
||||||
from typing import Tuple, List, Dict, Union
|
|
||||||
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
from archai.common.common import logger
|
|
||||||
from archai.nas.arch_meta import ArchWithMetaData
|
|
||||||
from archai.search_spaces.discrete_search_spaces.base import EvolutionarySearchSpace
|
|
||||||
from archai.metrics.base import BaseMetric
|
|
||||||
from archai.nas.searcher import Searcher, SearchResult
|
|
||||||
from archai.common.config import Config
|
|
||||||
|
|
||||||
|
|
||||||
class EvolutionParetoSearch(Searcher):
|
|
||||||
def __init__(self, search_space: EvolutionarySearchSpace,
|
|
||||||
objectives: Union[List[BaseMetric], List[Tuple[BaseMetric, int]]]):
|
|
||||||
assert isinstance(search_space, EvolutionarySearchSpace), \
|
|
||||||
f'{str(search_space.__class__)} is not compatible with {str(self.__class__)}'
|
|
||||||
self.search_space = search_space
|
|
||||||
self.objectives = objectives
|
|
||||||
|
|
||||||
def mutate_parents(self, parents:List[ArchWithMetaData], mutations_per_parent: int = 1) -> List[ArchWithMetaData]:
|
|
||||||
''' Using the nearest neighbors as mutations'''
|
|
||||||
mutations = {}
|
|
||||||
oversample_factor = (
|
|
||||||
self.crowd_sorting['oversampling_factor'] if self.crowd_sorting['mutation']
|
|
||||||
else 1
|
|
||||||
)
|
|
||||||
|
|
||||||
for p in tqdm(parents, desc='Mutating parents'):
|
|
||||||
candidates = {}
|
|
||||||
nb_tries = 0
|
|
||||||
patience = 20
|
|
||||||
|
|
||||||
if len(self._filter_population([p])) == 0:
|
|
||||||
logger.info(
|
|
||||||
f'Model {p.metadata["archid"]} has latency {p.metadata["latency"]}'
|
|
||||||
f' or memory {p.metadata["memory"]} that is too high. Skipping mutation.'
|
|
||||||
)
|
|
||||||
|
|
||||||
continue
|
|
||||||
|
|
||||||
while len(candidates) < (mutations_per_parent * oversample_factor) and nb_tries < patience:
|
|
||||||
nbr = self.search_space.mutate(p)
|
|
||||||
if nbr.metadata['archid'] not in self.eval_cache:
|
|
||||||
nbr.metadata['generation'] = self.iter_num
|
|
||||||
candidates[nbr.metadata['archid']] = nbr
|
|
||||||
nb_tries += 1
|
|
||||||
|
|
||||||
if candidates and self.crowd_sorting['mutation']:
|
|
||||||
candidates_list = list(candidates.items())
|
|
||||||
|
|
||||||
secondary_objs_proxy = np.array([
|
|
||||||
list(self._get_secondary_objectives_proxy(p).values()) for _, p in candidates_list
|
|
||||||
])
|
|
||||||
|
|
||||||
crowd_dist = compute_crowding_distance(secondary_objs_proxy)
|
|
||||||
|
|
||||||
# Deletes mutations that are not on the top k
|
|
||||||
for idx in np.argsort(-crowd_dist, axis=None)[mutations_per_parent:]:
|
|
||||||
del candidates[candidates_list[idx][0]]
|
|
||||||
|
|
||||||
mutations.update(candidates)
|
|
||||||
|
|
||||||
return list(mutations.values())
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def crossover_parents(self, parents:List[ArchWithMetaData], num_crossovers: int = 1) -> List[ArchWithMetaData]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def update_pareto_frontier(self, population:List[ArchWithMetaData]) -> List[ArchWithMetaData]:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def plot_search_state(self, all_pop:List[ArchWithMetaData], pareto:List[ArchWithMetaData], iter_num:int) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def save_search_status(self, all_pop:List[ArchWithMetaData], pareto:List[ArchWithMetaData], iter_num:int) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _sample_init_population(self)->List[ArchWithMetaData]:
|
|
||||||
init_pop:List[ArchWithMetaData] = []
|
|
||||||
while len(init_pop) < self.init_num_models:
|
|
||||||
init_pop.append(self.search_space.random_sample())
|
|
||||||
return init_pop
|
|
||||||
|
|
||||||
def _sample_random_to_mix(self)->List[ArchWithMetaData]:
|
|
||||||
mix_pop:List[ArchWithMetaData] = []
|
|
||||||
while len(mix_pop) < self.num_random_mix:
|
|
||||||
mix_pop.append(self.search_space.random_sample())
|
|
||||||
return mix_pop
|
|
||||||
|
|
||||||
def on_calc_task_accuracy_end(self, current_pop: List[ArchWithMetaData]) -> None:
|
|
||||||
''' Callback function called right after calc_task_accuracy()'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_search_iteration_start(self, current_pop: List[ArchWithMetaData]) -> None:
|
|
||||||
''' Callback function called right before each search iteration'''
|
|
||||||
pass
|
|
||||||
|
|
||||||
def select_next_population(self, current_pop: List[ArchWithMetaData]) -> List[ArchWithMetaData]:
|
|
||||||
random.shuffle(current_pop)
|
|
||||||
return current_pop[:self.max_unseen_population]
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def search(self, conf_search:Config):
|
|
||||||
|
|
||||||
self.init_num_models = conf_search['init_num_models']
|
|
||||||
self.num_iters = conf_search['num_iters']
|
|
||||||
self.num_random_mix = conf_search['num_random_mix']
|
|
||||||
self.max_unseen_population = conf_search['max_unseen_population']
|
|
||||||
self.mutations_per_parent = conf_search.get('mutations_per_parent', 1)
|
|
||||||
self.num_crossovers = conf_search.get('num_crossovers', 1)
|
|
||||||
|
|
||||||
assert self.init_num_models > 0
|
|
||||||
assert self.num_iters > 0
|
|
||||||
assert self.num_random_mix > 0
|
|
||||||
assert self.max_unseen_population > 0
|
|
||||||
|
|
||||||
self.search_space = self.get_search_space()
|
|
||||||
assert isinstance(self.search_space, DiscreteSearchSpace)
|
|
||||||
|
|
||||||
# sample the initial population
|
|
||||||
self.iter_num = 0
|
|
||||||
unseen_pop:List[ArchWithMetaData] = self._sample_init_population()
|
|
||||||
|
|
||||||
self.all_pop = unseen_pop
|
|
||||||
for i in range(self.num_iters):
|
|
||||||
self.iter_num = i + 1
|
|
||||||
|
|
||||||
logger.info(f'starting evolution pareto iter {i}')
|
|
||||||
self.on_search_iteration_start(unseen_pop)
|
|
||||||
|
|
||||||
# for the unseen population
|
|
||||||
# calculates the memory and latency
|
|
||||||
# and inserts it into the meta data of each member
|
|
||||||
logger.info(f'iter {i}: calculating memory latency for {len(unseen_pop)} models')
|
|
||||||
self.calc_secondary_objectives(unseen_pop)
|
|
||||||
|
|
||||||
# calculate task accuracy proxy
|
|
||||||
# could be anything from zero-cost proxy
|
|
||||||
# to partial training
|
|
||||||
logger.info(f'iter {i}: calculating task accuracy for {len(unseen_pop)} models')
|
|
||||||
self.calc_task_accuracy(unseen_pop)
|
|
||||||
self.on_calc_task_accuracy_end(unseen_pop)
|
|
||||||
|
|
||||||
# update the pareto frontier
|
|
||||||
logger.info(f'iter {i}: updating the pareto')
|
|
||||||
pareto:List[ArchWithMetaData] = self.update_pareto_frontier(self.all_pop)
|
|
||||||
logger.info(f'iter {i}: found {len(pareto)} members')
|
|
||||||
|
|
||||||
# select parents for the next iteration from
|
|
||||||
# the current estimate of the frontier while
|
|
||||||
# giving more weight to newer parents
|
|
||||||
# TODO
|
|
||||||
parents = pareto # for now
|
|
||||||
logger.info(f'iter {i}: chose {len(parents)} parents')
|
|
||||||
|
|
||||||
# plot the state of search
|
|
||||||
self.save_search_status(all_pop=self.all_pop, pareto=pareto, iter_num=i)
|
|
||||||
self.plot_search_state(all_pop=self.all_pop, pareto=pareto, iter_num=i)
|
|
||||||
|
|
||||||
# mutate random 'k' subsets of the parents
|
|
||||||
# while ensuring the mutations fall within
|
|
||||||
# desired constraint limits
|
|
||||||
mutated = self.mutate_parents(parents, self.mutations_per_parent)
|
|
||||||
logger.info(f'iter {i}: mutation yielded {len(mutated)} new models')
|
|
||||||
|
|
||||||
# crossover random 'k' subsets of the parents
|
|
||||||
# while ensuring the mutations fall within
|
|
||||||
# desired constraint limits
|
|
||||||
crossovered = self.crossover_parents(parents, self.num_crossovers)
|
|
||||||
logger.info(f'iter {i}: crossover yielded {len(crossovered)} new models')
|
|
||||||
|
|
||||||
# sample some random samples to add to the parent mix
|
|
||||||
# to mitigage local minima
|
|
||||||
rand_mix = self._sample_random_to_mix()
|
|
||||||
|
|
||||||
unseen_pop = crossovered + mutated + rand_mix
|
|
||||||
# shuffle before we pick a smaller population for the next stage
|
|
||||||
logger.info(f'iter {i}: total unseen population before restriction {len(unseen_pop)}')
|
|
||||||
unseen_pop = self.select_next_population(unseen_pop)
|
|
||||||
logger.info(f'iter {i}: total unseen population after restriction {len(unseen_pop)}')
|
|
||||||
|
|
||||||
# update the set of architectures ever visited
|
|
||||||
self.all_pop.extend(unseen_pop)
|
|
|
@ -10,6 +10,7 @@ from torch import Tensor, nn, autograd
|
||||||
from torch.nn.modules.loss import _Loss
|
from torch.nn.modules.loss import _Loss
|
||||||
from torch.optim.optimizer import Optimizer
|
from torch.optim.optimizer import Optimizer
|
||||||
from torch.optim.lr_scheduler import _LRScheduler
|
from torch.optim.lr_scheduler import _LRScheduler
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from overrides import overrides
|
from overrides import overrides
|
||||||
|
|
||||||
|
@ -18,8 +19,8 @@ from archai.nas.arch_trainer import ArchTrainer
|
||||||
from archai.common import utils, ml_utils
|
from archai.common import utils, ml_utils
|
||||||
from archai.nas.model import Model
|
from archai.nas.model import Model
|
||||||
from archai.common.checkpoint import CheckPoint
|
from archai.common.checkpoint import CheckPoint
|
||||||
from archai.common.common import logger
|
from archai.common.common import logger, get_conf
|
||||||
|
from archai.algos.gumbelsoftmax.gs_op import GsOp
|
||||||
|
|
||||||
class GsArchTrainer(ArchTrainer):
|
class GsArchTrainer(ArchTrainer):
|
||||||
def __init__(self, conf_train: Config, model: nn.Module, checkpoint: Optional[CheckPoint]) -> None:
|
def __init__(self, conf_train: Config, model: nn.Module, checkpoint: Optional[CheckPoint]) -> None:
|
||||||
|
|
|
@ -10,6 +10,7 @@ from archai.nas.arch_trainer import TArchTrainer
|
||||||
from archai.nas.finalizers import Finalizers
|
from archai.nas.finalizers import Finalizers
|
||||||
from .gs_model_desc_builder import GsModelDescBuilder
|
from .gs_model_desc_builder import GsModelDescBuilder
|
||||||
from .gs_arch_trainer import GsArchTrainer
|
from .gs_arch_trainer import GsArchTrainer
|
||||||
|
from .gs_finalizers import GsFinalizers
|
||||||
|
|
||||||
class GsExperimentRunner(ExperimentRunner):
|
class GsExperimentRunner(ExperimentRunner):
|
||||||
@overrides
|
@overrides
|
||||||
|
|
|
@ -5,6 +5,7 @@ from typing import Iterable, Optional, Tuple, List, Iterator
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
from torch import Tensor
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
|
|
||||||
from overrides import overrides
|
from overrides import overrides
|
||||||
|
@ -41,8 +42,6 @@ class GsOp(Op):
|
||||||
# assume last PRIMITIVE is 'none'
|
# assume last PRIMITIVE is 'none'
|
||||||
assert GsOp.PRIMITIVES[-1] == 'none'
|
assert GsOp.PRIMITIVES[-1] == 'none'
|
||||||
|
|
||||||
self._gs_num_sample = op_desc.params['gs_num_sample']
|
|
||||||
|
|
||||||
self._ops = nn.ModuleList()
|
self._ops = nn.ModuleList()
|
||||||
for primitive in GsOp.PRIMITIVES:
|
for primitive in GsOp.PRIMITIVES:
|
||||||
op = Op.create(
|
op = Op.create(
|
||||||
|
@ -53,38 +52,36 @@ class GsOp(Op):
|
||||||
# any previous child modules
|
# any previous child modules
|
||||||
self._setup_arch_params(arch_params)
|
self._setup_arch_params(arch_params)
|
||||||
|
|
||||||
|
|
||||||
|
def set_op_sampled_weights(self, sampled_weights:Tensor):
|
||||||
|
''' Sets the weight for each op '''
|
||||||
|
assert sampled_weights.shape[0] == len(GsOp.PRIMITIVES)
|
||||||
|
self._sampled_weights = sampled_weights
|
||||||
|
|
||||||
|
|
||||||
@overrides
|
@overrides
|
||||||
def forward(self, x):
|
def forward(self, x):
|
||||||
# soft sample from the categorical distribution
|
assert self._sampled_weights is not None
|
||||||
# via gumbel softmax distribution
|
return sum(w * op(x) for w, op in zip_eq(self._sampled_weights, self._ops))
|
||||||
# TODO: should we be normalizing the ensemble?
|
|
||||||
#sampled = torch.zeros(alphas.size(), requires_grad=True)
|
|
||||||
|
|
||||||
sample_storage = []
|
|
||||||
for _ in range(self._gs_num_sample):
|
|
||||||
sampled = F.gumbel_softmax(self._alphas[0], tau=1, hard=False, eps=1e-10, dim=-1)
|
|
||||||
sample_storage.append(sampled)
|
|
||||||
|
|
||||||
samples_summed = torch.sum(torch.stack(sample_storage, dim=0), dim=0)
|
|
||||||
return sum(w * op(x) for w, op in zip(samples_summed, self._ops))
|
|
||||||
|
|
||||||
@overrides
|
@overrides
|
||||||
def finalize(self) -> Tuple[OpDesc, Optional[float]]:
|
def finalize(self, sampled_weights) -> Tuple[OpDesc, Optional[float]]:
|
||||||
# finalization where each edge gets to keep as many
|
# finalization where each edge gets to keep as many
|
||||||
# unique operations that are sampled
|
# unique operations that are **sampled at the node level**
|
||||||
sample_storage = []
|
assert sampled_weights.shape[0] == len(GsOp.PRIMITIVES)
|
||||||
for i in range(self._gs_num_sample):
|
|
||||||
sampled = F.gumbel_softmax(self._alphas[0], tau=1, hard=True, eps=1e-10, dim=-1)
|
|
||||||
sample_storage.append(sampled)
|
|
||||||
|
|
||||||
samples_summed = torch.sum(torch.stack(sample_storage, dim=0), dim=0)
|
# we can't handle empty op
|
||||||
greater_than_0 = samples_summed > 0
|
assert sampled_weights.bool().any()
|
||||||
|
|
||||||
|
greater_than_0 = sampled_weights > 0
|
||||||
children = []
|
children = []
|
||||||
children_ins = []
|
children_ins = []
|
||||||
|
selected_alphas = []
|
||||||
|
|
||||||
for i in range(greater_than_0.size()[0]):
|
for i in range(greater_than_0.size()[0]):
|
||||||
if greater_than_0[i]:
|
if greater_than_0[i]:
|
||||||
children.append(self._ops[i].finalize()[0])
|
children.append(self._ops[i].finalize()[0])
|
||||||
|
selected_alphas.append(self._alphas[0][i].item())
|
||||||
# all the ops will operate on the single node input
|
# all the ops will operate on the single node input
|
||||||
children_ins.append(0)
|
children_ins.append(0)
|
||||||
|
|
||||||
|
@ -104,7 +101,11 @@ class GsOp(Op):
|
||||||
children_ins = children_ins
|
children_ins = children_ins
|
||||||
)
|
)
|
||||||
|
|
||||||
return final_op_desc, None
|
max_alpha = 0.0
|
||||||
|
if selected_alphas:
|
||||||
|
max_alpha = max(selected_alphas)
|
||||||
|
|
||||||
|
return final_op_desc, max_alpha
|
||||||
|
|
||||||
@overrides
|
@overrides
|
||||||
def can_drop_path(self) -> bool:
|
def can_drop_path(self) -> bool:
|
||||||
|
|
|
@ -1,91 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
from typing import Optional, Callable, Tuple, Type
|
|
||||||
import os
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
from torch import nn, Tensor
|
|
||||||
from torch.optim.optimizer import Optimizer
|
|
||||||
from torch.optim.lr_scheduler import _LRScheduler
|
|
||||||
|
|
||||||
from overrides import overrides, EnforceOverrides
|
|
||||||
|
|
||||||
from archai.common.metrics import Metrics
|
|
||||||
from archai.common.config import Config
|
|
||||||
from archai.common import common, utils
|
|
||||||
from archai.common.common import logger
|
|
||||||
from archai.nas.model import Model
|
|
||||||
from archai.nas.model_desc import ModelDesc
|
|
||||||
from archai.nas.arch_trainer import ArchTrainer
|
|
||||||
from archai.datasets import data
|
|
||||||
from archai.common.trainer import Trainer
|
|
||||||
from archai.nas.vis_model_desc import draw_model_desc
|
|
||||||
from archai.common.checkpoint import CheckPoint
|
|
||||||
from archai.common.ml_utils import set_optim_lr
|
|
||||||
|
|
||||||
from .naswotrain_metrics import NaswoTrainMetrics
|
|
||||||
|
|
||||||
TNaswotrainTrainer = Optional[Type['NaswotrainTrainer']]
|
|
||||||
|
|
||||||
|
|
||||||
class NaswotrainTrainer(ArchTrainer, EnforceOverrides):
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def fit(self, data_loaders:data.DataLoaders)->Metrics:
|
|
||||||
logger.pushd(self._title)
|
|
||||||
|
|
||||||
self._metrics = NaswoTrainMetrics(self._title, self._apex, logger_freq=self._logger_freq)
|
|
||||||
|
|
||||||
# create optimizers and schedulers (we don't need it only to make to_amp call pass)
|
|
||||||
self._multi_optim = self.create_multi_optim(len(data_loaders.train_dl))
|
|
||||||
|
|
||||||
# before checkpoint restore, convert to amp
|
|
||||||
self.model = self._apex.to_amp(self.model, self._multi_optim,
|
|
||||||
batch_size=data_loaders.train_dl.batch_size)
|
|
||||||
|
|
||||||
# score the model with one minibatch of data
|
|
||||||
# as in the paper "Neural Architecture Search without Training", Mellor et al. 2020
|
|
||||||
# modified from https://github.com/BayesWatch/nas-without-training/blob/master/search.py
|
|
||||||
self.model.train()
|
|
||||||
data_iterator = iter(data_loaders.train_dl)
|
|
||||||
x, target = next(data_iterator)
|
|
||||||
x, target = x.to(self.get_device()), target.to(self.get_device())
|
|
||||||
|
|
||||||
jacobs = self._get_batch_jacobian(x)
|
|
||||||
jacobs = jacobs.reshape(jacobs.size(0), -1).cpu().numpy()
|
|
||||||
score = self._eval_score(jacobs)
|
|
||||||
self._metrics.naswotraining_score = score
|
|
||||||
logger.info(f'nas without training score: {score} using batch size: {data_loaders.train_dl.batch_size}')
|
|
||||||
logger.info({'naswithouttraining':float(score)})
|
|
||||||
logger.info({'naswithouttraining_batch_size':data_loaders.train_dl.batch_size})
|
|
||||||
|
|
||||||
# make sure we don't keep references to the graph
|
|
||||||
del self._multi_optim
|
|
||||||
|
|
||||||
logger.popd()
|
|
||||||
return self.get_metrics()
|
|
||||||
|
|
||||||
|
|
||||||
def _get_batch_jacobian(self, x):
|
|
||||||
''' Modified from https://github.com/BayesWatch/nas-without-training/blob/master/search.py '''
|
|
||||||
self.model.zero_grad()
|
|
||||||
x.requires_grad_(True)
|
|
||||||
logits = self.model(x)
|
|
||||||
# Manual models only return logits,
|
|
||||||
# whereas DARTS space models return logits, aux_logits
|
|
||||||
if isinstance(logits, tuple):
|
|
||||||
logits = logits[0]
|
|
||||||
logits.backward(torch.ones_like(logits))
|
|
||||||
jacob = x.grad.detach()
|
|
||||||
return jacob
|
|
||||||
|
|
||||||
|
|
||||||
def _eval_score(self, jacob):
|
|
||||||
''' Modified from https://github.com/BayesWatch/nas-without-training/blob/master/search.py '''
|
|
||||||
corrs = np.corrcoef(jacob)
|
|
||||||
v, _ = np.linalg.eig(corrs)
|
|
||||||
k = 1e-5
|
|
||||||
return -np.sum(np.log(v + k) + 1./(v + k))
|
|
|
@ -15,7 +15,7 @@ from .searcher_petridish import SearcherPetridish
|
||||||
from .evaluater_petridish import EvaluaterPetridish
|
from .evaluater_petridish import EvaluaterPetridish
|
||||||
from archai.common.config import Config
|
from archai.common.config import Config
|
||||||
from archai.common import utils
|
from archai.common import utils
|
||||||
from .petridish_cell_builder import PetridishCellBuilder
|
|
||||||
|
|
||||||
class PetridishExperimentRunner(ExperimentRunner):
|
class PetridishExperimentRunner(ExperimentRunner):
|
||||||
@overrides
|
@overrides
|
||||||
|
|
|
@ -1,69 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
from typing import Optional, Callable, Type
|
|
||||||
import os
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from torch.utils.data import DataLoader
|
|
||||||
from torch import nn, Tensor
|
|
||||||
from torch.optim.optimizer import Optimizer
|
|
||||||
from torch.optim.lr_scheduler import _LRScheduler
|
|
||||||
|
|
||||||
from overrides import overrides, EnforceOverrides
|
|
||||||
|
|
||||||
from archai.common.config import Config
|
|
||||||
from archai.common import common, utils
|
|
||||||
from archai.common.common import logger
|
|
||||||
from archai.nas.model import Model
|
|
||||||
from archai.nas.model_desc import ModelDesc
|
|
||||||
from archai.nas.arch_trainer import ArchTrainer
|
|
||||||
from archai.common.trainer import Trainer
|
|
||||||
from archai.nas.vis_model_desc import draw_model_desc
|
|
||||||
from archai.common.checkpoint import CheckPoint
|
|
||||||
from archai.common.ml_utils import set_optim_lr
|
|
||||||
from archai.datasets import data
|
|
||||||
|
|
||||||
TFreezeTrainer = Optional[Type['FreezeTrainer']]
|
|
||||||
|
|
||||||
|
|
||||||
class FreezeTrainer(ArchTrainer, EnforceOverrides):
|
|
||||||
def __init__(self, conf_train: Config, model: nn.Module,
|
|
||||||
checkpoint:Optional[CheckPoint]) -> None:
|
|
||||||
super().__init__(conf_train, model, checkpoint)
|
|
||||||
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def pre_fit(self, data_loaders:data.DataLoaders) -> None:
|
|
||||||
super().pre_fit(data_loaders)
|
|
||||||
|
|
||||||
# freeze everything other than the last layer
|
|
||||||
if not self.conf_train['bypass_freeze']:
|
|
||||||
# addup parameters which are not frozen
|
|
||||||
num_frozen_params = 0
|
|
||||||
for l in model_stats.layer_stats:
|
|
||||||
for identifier in self.conf_train['identifiers_to_unfreeze']:
|
|
||||||
if identifier in l.name:
|
|
||||||
num_frozen_params += l.parameters
|
|
||||||
ratio_unfrozen = num_frozen_params / model_stats.parameters
|
|
||||||
logger.info(f'unfrozen parameters ratio {ratio_unfrozen}')
|
|
||||||
|
|
||||||
self._freeze_but_last_layer()
|
|
||||||
else:
|
|
||||||
logger.info(f'Bypassing freezing!')
|
|
||||||
|
|
||||||
|
|
||||||
def _freeze_but_last_layer(self) -> None:
|
|
||||||
|
|
||||||
# Do it via parameters
|
|
||||||
for param in self.model.parameters():
|
|
||||||
param.requires_grad = False
|
|
||||||
|
|
||||||
for name, param in self.model.named_parameters():
|
|
||||||
for identifier in self.conf_train['identifiers_to_unfreeze']:
|
|
||||||
if identifier in name:
|
|
||||||
param.requires_grad = True
|
|
||||||
|
|
||||||
for name, param in self.model.named_parameters():
|
|
||||||
if param.requires_grad:
|
|
||||||
logger.info(f'{name} requires grad')
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
from typing import Mapping, Optional, Union
|
from typing import Mapping, Optional, Union
|
||||||
import copy
|
import copy
|
||||||
|
import math as ma
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch.utils.data import DataLoader
|
from torch.utils.data import DataLoader
|
||||||
|
@ -17,6 +18,7 @@ from archai.common.config import Config
|
||||||
from archai.nas.arch_trainer import ArchTrainer
|
from archai.nas.arch_trainer import ArchTrainer
|
||||||
from archai.common import utils, ml_utils
|
from archai.common import utils, ml_utils
|
||||||
from archai.nas.model import Model
|
from archai.nas.model import Model
|
||||||
|
from archai.nas.model_desc import CellType
|
||||||
from archai.common.checkpoint import CheckPoint
|
from archai.common.checkpoint import CheckPoint
|
||||||
from archai.common.common import logger
|
from archai.common.common import logger
|
||||||
from archai.datasets import data
|
from archai.datasets import data
|
||||||
|
@ -26,14 +28,13 @@ from .xnas_op import XnasOp
|
||||||
|
|
||||||
class XnasArchTrainer(ArchTrainer):
|
class XnasArchTrainer(ArchTrainer):
|
||||||
def __init__(self, conf_train: Config, model: Model,
|
def __init__(self, conf_train: Config, model: Model,
|
||||||
checkpoint:Optional[CheckPoint]) -> None:
|
checkpoint: Optional[CheckPoint]) -> None:
|
||||||
super().__init__(conf_train, model, checkpoint)
|
super().__init__(conf_train, model, checkpoint)
|
||||||
|
|
||||||
self._conf_w_lossfn = conf_train['lossfn']
|
self._conf_w_lossfn = conf_train['lossfn']
|
||||||
self._conf_alpha_optim = conf_train['alpha_optimizer']
|
|
||||||
|
|
||||||
@overrides
|
@overrides
|
||||||
def create_optimizer(self, conf_optim:Config, params) -> Optimizer:
|
def create_optimizer(self, conf_optim: Config, params) -> Optimizer:
|
||||||
# return optim that only operates on w, not alphas
|
# return optim that only operates on w, not alphas
|
||||||
return ml_utils.create_optimizer(conf_optim,
|
return ml_utils.create_optimizer(conf_optim,
|
||||||
self.model.nonarch_params(recurse=True))
|
self.model.nonarch_params(recurse=True))
|
||||||
|
@ -105,27 +106,43 @@ class XnasArchTrainer(ArchTrainer):
|
||||||
self._valid_iter = iter(self._val_dl)
|
self._valid_iter = iter(self._val_dl)
|
||||||
x_val, y_val = next(self._valid_iter)
|
x_val, y_val = next(self._valid_iter)
|
||||||
|
|
||||||
x_val, y_val = x_val.to(self.get_device()), y_val.to(self.get_device(), non_blocking=True)
|
x_val, y_val = x_val.to(self.get_device()), y_val.to(
|
||||||
|
self.get_device(), non_blocking=True)
|
||||||
|
|
||||||
# update alphas
|
# update alphas
|
||||||
self._xnas_optim.step(x, y, x_val, y_val)
|
self._xnas_optim.step(x, y, x_val, y_val)
|
||||||
|
|
||||||
@overrides
|
@overrides
|
||||||
def update_checkpoint(self, checkpoint:CheckPoint)->None:
|
def update_checkpoint(self, checkpoint: CheckPoint) -> None:
|
||||||
super().update_checkpoint(checkpoint)
|
super().update_checkpoint(checkpoint)
|
||||||
|
|
||||||
|
|
||||||
class _XnasOptimizer:
|
class _XnasOptimizer:
|
||||||
def __init__(self, conf_alpha_optim:Config,
|
def __init__(self, ncell_lr: float, rcell_lr: float,
|
||||||
model: Model, lossfn: _Loss) -> None:
|
ncell_effective_t: float, rcell_effective_t: float, train_batch: int,
|
||||||
self._alpha_lr = conf_alpha_optim['lr']
|
grad_clip: float, optim, apex, model: Model) -> None:
|
||||||
|
|
||||||
|
self._ncell_lr = ncell_lr
|
||||||
|
self._rcell_lr = rcell_lr
|
||||||
|
self._ncell_effective_t = ncell_effective_t
|
||||||
|
self._rcell_effective_t = rcell_effective_t
|
||||||
|
self._train_batch = train_batch
|
||||||
|
|
||||||
|
self._grad_clip = grad_clip
|
||||||
|
self._optim = optim
|
||||||
|
self._apex = apex
|
||||||
|
|
||||||
|
self._lossfn = nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
# to keep track of where we are in effective updates
|
||||||
|
self._t_rcell = 0
|
||||||
|
self._t_ncell = 0
|
||||||
|
|
||||||
self._lossfn = lossfn
|
|
||||||
self._model = model # main model with respect to w and alpha
|
self._model = model # main model with respect to w and alpha
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_loss(model, lossfn, x, y):
|
def _get_loss(model, lossfn, x, y):
|
||||||
logits, *_ = model(x) # might also return aux tower logits
|
logits, *_ = model(x) # might also return aux tower logits
|
||||||
return lossfn(logits, y)
|
return lossfn(logits, y)
|
||||||
|
|
||||||
def step(self, x_train: Tensor, y_train: Tensor, x_valid: Tensor, y_valid: Tensor) -> None:
|
def step(self, x_train: Tensor, y_train: Tensor, x_valid: Tensor, y_valid: Tensor) -> None:
|
||||||
|
|
|
@ -1,9 +1,13 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
# Licensed under the MIT license.
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
from archai.common.utils import AverageMeter
|
import os
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from typing import Iterable, Optional, Tuple, List
|
from typing import Iterable, Optional, Tuple, List
|
||||||
|
import copy
|
||||||
|
import math as ma
|
||||||
|
from itertools import count
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from torch import nn
|
from torch import nn
|
||||||
|
@ -15,6 +19,9 @@ from archai.nas.model_desc import OpDesc
|
||||||
from archai.nas.operations import Op
|
from archai.nas.operations import Op
|
||||||
from archai.nas.arch_params import ArchParams
|
from archai.nas.arch_params import ArchParams
|
||||||
from archai.common.utils import zip_eq
|
from archai.common.utils import zip_eq
|
||||||
|
from archai.common.common import get_conf
|
||||||
|
from archai.common.common import get_expdir
|
||||||
|
|
||||||
|
|
||||||
# TODO: reduction cell might have output reduced by 2^1=2X due to
|
# TODO: reduction cell might have output reduced by 2^1=2X due to
|
||||||
# stride 2 through input nodes however FactorizedReduce does only
|
# stride 2 through input nodes however FactorizedReduce does only
|
||||||
|
@ -42,7 +49,7 @@ class XnasOp(Op):
|
||||||
|
|
||||||
# assume last PRIMITIVE is 'none'
|
# assume last PRIMITIVE is 'none'
|
||||||
assert XnasOp.PRIMITIVES[-1] == 'none'
|
assert XnasOp.PRIMITIVES[-1] == 'none'
|
||||||
|
|
||||||
self._ops = nn.ModuleList()
|
self._ops = nn.ModuleList()
|
||||||
for primitive in XnasOp.PRIMITIVES:
|
for primitive in XnasOp.PRIMITIVES:
|
||||||
op = Op.create(
|
op = Op.create(
|
||||||
|
@ -51,28 +58,47 @@ class XnasOp(Op):
|
||||||
self._ops.append(op)
|
self._ops.append(op)
|
||||||
|
|
||||||
# for getting gradients to non-leaf node
|
# for getting gradients to non-leaf node
|
||||||
self._is_first_call = True
|
self._grad = None
|
||||||
self._avg_grad_meter = AverageMeter()
|
|
||||||
|
|
||||||
# we do this at the end so that we can capture all arch params registered by
|
# we do this at the end so that we can capture all arch params registered by
|
||||||
# any previous child modules
|
# any previous child modules
|
||||||
self._setup_arch_params(arch_params)
|
self._setup_arch_params(arch_params)
|
||||||
|
|
||||||
def get_avg_grad(self)->torch.Tensor:
|
def update_alphas(self, eta:float, current_t:int, total_t:int, grad_clip:float):
|
||||||
return self._avg_grad_meter.avg
|
grad_flat = torch.flatten(self._grad)
|
||||||
|
|
||||||
def update_alphas(self, eta:float):
|
|
||||||
grad_flat = torch.flatten(self._avg_grad_meter.avg)
|
|
||||||
rewards = torch.tensor([-torch.dot(grad_flat, torch.flatten(activ)) for activ in self._activs])
|
rewards = torch.tensor([-torch.dot(grad_flat, torch.flatten(activ)) for activ in self._activs])
|
||||||
exprewards = torch.exp(eta * rewards).cuda()
|
exprewards = torch.exp(eta * rewards).cuda()
|
||||||
# TODO: Will this remain registered?
|
# NOTE: Will this remain registered?
|
||||||
self._alphas[0] = torch.mul(self._alphas[0], exprewards)
|
self._alphas[0] = torch.mul(self._alphas[0], exprewards)
|
||||||
# TODO: Implement the weak learner eviction
|
|
||||||
|
# weak learner eviction
|
||||||
|
conf = get_conf()
|
||||||
|
to_evict = conf['nas']['search']['xnas']['to_evict']
|
||||||
|
if to_evict:
|
||||||
|
theta = max(self._alphas[0]) * ma.exp(-2 * eta * grad_clip * (total_t - current_t))
|
||||||
|
assert len(self._ops) == self._alphas[0].shape[0]
|
||||||
|
to_keep_mask = self._alphas[0] >= theta
|
||||||
|
num_ops_kept = torch.sum(to_keep_mask).item()
|
||||||
|
assert num_ops_kept > 0
|
||||||
|
# zero out the weights which are evicted
|
||||||
|
self._alphas[0] = torch.mul(self._alphas[0], to_keep_mask)
|
||||||
|
|
||||||
|
# save some debugging info
|
||||||
|
expdir = get_expdir()
|
||||||
|
filename = os.path.join(expdir, str(id(self)) + '.txt')
|
||||||
|
|
||||||
|
# save debug info to file
|
||||||
|
alphas = [str(self._alphas[0][i].item()) for i in range(self._alphas[0].shape[0])]
|
||||||
|
with open(filename, 'a') as f:
|
||||||
|
f.write(str(alphas))
|
||||||
|
f.write('\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _save_grad(self):
|
def _save_grad(self):
|
||||||
def hook(grad):
|
def hook(grad):
|
||||||
# TODO: Note that we have to reduce the minibatch to 1 finally
|
self._grad = copy.deepcopy(grad)
|
||||||
self._avg_grad_meter.update(grad, n=1)
|
|
||||||
return hook
|
return hook
|
||||||
|
|
||||||
@overrides
|
@overrides
|
||||||
|
@ -82,10 +108,12 @@ class XnasOp(Op):
|
||||||
denom = sum(self._alphas[0])
|
denom = sum(self._alphas[0])
|
||||||
self.pt = torch.div(numer, denom)
|
self.pt = torch.div(numer, denom)
|
||||||
|
|
||||||
# register gradient hook if first time
|
# register hook to save gradients
|
||||||
if self._is_first_call:
|
# NOTE: it has to be done every forward call
|
||||||
|
# otherwise the hook doesn't remain registered
|
||||||
|
# for subsequent loss.backward calls
|
||||||
|
if self.training:
|
||||||
self.pt.register_hook(self._save_grad())
|
self.pt.register_hook(self._save_grad())
|
||||||
self._is_first_call = False
|
|
||||||
|
|
||||||
return self.pt
|
return self.pt
|
||||||
|
|
||||||
|
@ -105,9 +133,9 @@ class XnasOp(Op):
|
||||||
# do we have shared arch params?
|
# do we have shared arch params?
|
||||||
if arch_params is None:
|
if arch_params is None:
|
||||||
# create our own arch params
|
# create our own arch params
|
||||||
# TODO: dey: why requires_grad = False?
|
# the alphas are updated by exponentiated gradient descent
|
||||||
new_p = nn.Parameter( # TODO: use better init than uniform random?
|
# and not by gradients from backprop. so we don't require grad.
|
||||||
1.0e-3*torch.randn(len(XnasOp.PRIMITIVES)), requires_grad=False)
|
new_p = nn.Parameter(torch.ones(len(XnasOp.PRIMITIVES)), requires_grad=False)
|
||||||
self.create_arch_params([('alphas', new_p)])
|
self.create_arch_params([('alphas', new_p)])
|
||||||
else:
|
else:
|
||||||
assert arch_params.has_kind('alphas')
|
assert arch_params.has_kind('alphas')
|
||||||
|
|
|
@ -209,7 +209,7 @@ def accuracy(output, target, topk=(1,)):
|
||||||
|
|
||||||
res = []
|
res = []
|
||||||
for k in topk:
|
for k in topk:
|
||||||
correct_k = correct[:k].reshape(-1).float().sum(0)
|
correct_k = correct[:k].contiguous().view(-1).float().sum(0)
|
||||||
res.append(correct_k.mul_(1.0 / batch_size))
|
res.append(correct_k.mul_(1.0 / batch_size))
|
||||||
|
|
||||||
return res
|
return res
|
||||||
|
|
|
@ -69,11 +69,7 @@ class Trainer(EnforceOverrides):
|
||||||
|
|
||||||
assert data_loaders.train_dl is not None
|
assert data_loaders.train_dl is not None
|
||||||
|
|
||||||
self._metrics = self.init_metrics()
|
self._metrics = Metrics(self._title, self._apex, logger_freq=self._logger_freq)
|
||||||
|
|
||||||
# NOTE: critical that pre_fit is called before creating optimizers
|
|
||||||
# as otherwise FreezeTrainer does not work correctly
|
|
||||||
self.pre_fit(train_dl, val_dl)
|
|
||||||
|
|
||||||
# create optimizers and schedulers
|
# create optimizers and schedulers
|
||||||
self._multi_optim = self.create_multi_optim(len(data_loaders.train_dl))
|
self._multi_optim = self.create_multi_optim(len(data_loaders.train_dl))
|
||||||
|
@ -181,25 +177,6 @@ class Trainer(EnforceOverrides):
|
||||||
######################### hooks #########################
|
######################### hooks #########################
|
||||||
def pre_fit(self, data_loaders:data.DataLoaders)->None:
|
def pre_fit(self, data_loaders:data.DataLoaders)->None:
|
||||||
self._metrics.pre_run()
|
self._metrics.pre_run()
|
||||||
|
|
||||||
# compute model stats per minibatch of training data
|
|
||||||
data_iterator = iter(train_dl)
|
|
||||||
x, target = next(data_iterator)
|
|
||||||
x_shape = list(x.shape)
|
|
||||||
x_shape[0] = 1 # to prevent overflow errors with large batch size we will use a batch size of 1
|
|
||||||
model_stats = get_model_stats(self.model, input_tensor_shape=x_shape, clone_model=True)
|
|
||||||
|
|
||||||
# important to do to avoid overflow
|
|
||||||
mega_flops = float(model_stats.Flops)/1e6
|
|
||||||
mega_madd = float(model_stats.MAdd)/1e6
|
|
||||||
|
|
||||||
# log model stats
|
|
||||||
logger.info({'num_params': model_stats.parameters})
|
|
||||||
logger.info({'mega_flops_per_batch': mega_flops * float(train_dl.batch_size)})
|
|
||||||
logger.info({'mega_madd_per_batch': mega_madd * float(train_dl.batch_size)})
|
|
||||||
logger.info({'num_batches': len(train_dl)})
|
|
||||||
logger.info({'total_mega_flops_epoch': len(train_dl) * mega_flops * train_dl.batch_size})
|
|
||||||
|
|
||||||
|
|
||||||
def post_fit(self, data_loaders:data.DataLoaders)->None:
|
def post_fit(self, data_loaders:data.DataLoaders)->None:
|
||||||
test_metrics = None
|
test_metrics = None
|
||||||
|
@ -262,6 +239,9 @@ class Trainer(EnforceOverrides):
|
||||||
|
|
||||||
self._start_epoch = last_epoch + 1
|
self._start_epoch = last_epoch + 1
|
||||||
|
|
||||||
|
def epoch(self)->int:
|
||||||
|
return self._metrics.epochs()
|
||||||
|
|
||||||
def update_checkpoint(self, checkpoint:CheckPoint)->None:
|
def update_checkpoint(self, checkpoint:CheckPoint)->None:
|
||||||
# TODO: Don't need to pass checkpoint
|
# TODO: Don't need to pass checkpoint
|
||||||
# save all necessory state
|
# save all necessory state
|
||||||
|
@ -283,10 +263,11 @@ class Trainer(EnforceOverrides):
|
||||||
logger.pushd(step)
|
logger.pushd(step)
|
||||||
assert self.model.training # derived class might alter the mode
|
assert self.model.training # derived class might alter the mode
|
||||||
|
|
||||||
self.pre_step(x, y)
|
# TODO: please check that no algorithm is invalidated by swapping prestep with zero grad
|
||||||
|
|
||||||
self._multi_optim.zero_grad()
|
self._multi_optim.zero_grad()
|
||||||
|
|
||||||
|
self.pre_step(x, y)
|
||||||
|
|
||||||
# divide batch in to chunks if needed so it fits in GPU RAM
|
# divide batch in to chunks if needed so it fits in GPU RAM
|
||||||
if self.batch_chunks > 1:
|
if self.batch_chunks > 1:
|
||||||
x_chunks, y_chunks = torch.chunk(x, self.batch_chunks), torch.chunk(y, self.batch_chunks)
|
x_chunks, y_chunks = torch.chunk(x, self.batch_chunks), torch.chunk(y, self.batch_chunks)
|
||||||
|
|
|
@ -1,12 +1,8 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
# Licensed under the MIT license.
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
from typing import Dict, Iterable, Type, MutableMapping, Mapping, Any, Optional, Tuple, List, Union, Sized
|
|
||||||
=======
|
|
||||||
import functools
|
import functools
|
||||||
from typing import Dict, Iterable, Sized, Type, MutableMapping, Mapping, Any, Optional, Tuple, List, Union
|
from typing import Dict, Iterable, Sized, Type, MutableMapping, Mapping, Any, Optional, Tuple, List, Union
|
||||||
>>>>>>> ac2ff86d (fix(docs): Fixes sphinx-tabs not being compatible with other packages.)
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import logging
|
import logging
|
||||||
import csv
|
import csv
|
||||||
|
|
|
@ -339,8 +339,8 @@ def _eval_tta(conf, augment, reporter):
|
||||||
|
|
||||||
loaders = []
|
loaders = []
|
||||||
for _ in range(augment['num_policy']):
|
for _ in range(augment['num_policy']):
|
||||||
tl, validloader, tl2 = get_dataloaders(augment['dataroot'], ds_name
|
tl, validloader, tl2 = get_dataloaders(augment['dataroot'], ds_name,
|
||||||
, aug, cutout,
|
aug, cutout,
|
||||||
load_train=True, load_test=True,
|
load_train=True, load_test=True,
|
||||||
val_ratio=val_ratio, val_fold=val_fold, n_workers=n_workers)
|
val_ratio=val_ratio, val_fold=val_fold, n_workers=n_workers)
|
||||||
loaders.append(iter(validloader))
|
loaders.append(iter(validloader))
|
||||||
|
|
|
@ -119,17 +119,15 @@ class EvolutionParetoSearch(Searcher):
|
||||||
# list(self._get_secondary_objectives_proxy(p).values()) for _, p in candidates_list
|
# list(self._get_secondary_objectives_proxy(p).values()) for _, p in candidates_list
|
||||||
# ])
|
# ])
|
||||||
|
|
||||||
@abstractmethod
|
# crowd_dist = compute_crowding_distance(secondary_objs_proxy)
|
||||||
def calc_secondary_objectives(self, population:List[ArchWithMetaData])->None:
|
|
||||||
# computes memory and latency of each model
|
# # Deletes mutations that are not on the top k
|
||||||
# and updates the meta data
|
# for idx in np.argsort(-crowd_dist, axis=None)[mutations_per_parent:]:
|
||||||
pass
|
# del candidates[candidates_list[idx][0]]
|
||||||
|
|
||||||
@abstractmethod
|
mutations.update(candidates)
|
||||||
def calc_task_accuracy(self, population:List[ArchWithMetaData])->None:
|
|
||||||
# computes task accuracy of each model
|
return list(mutations.values())
|
||||||
# and updates the meta data
|
|
||||||
pass
|
|
||||||
|
|
||||||
def crossover_parents(self, parents: List[ArchaiModel], num_crossovers: int = 1) -> List[ArchaiModel]:
|
def crossover_parents(self, parents: List[ArchaiModel], num_crossovers: int = 1) -> List[ArchaiModel]:
|
||||||
# Randomly samples k distinct pairs from `parents`
|
# Randomly samples k distinct pairs from `parents`
|
||||||
|
@ -147,6 +145,7 @@ class EvolutionParetoSearch(Searcher):
|
||||||
children.append(child)
|
children.append(child)
|
||||||
children_hashes.add(child.archid)
|
children_hashes.add(child.archid)
|
||||||
|
|
||||||
|
return children
|
||||||
|
|
||||||
def sample_random_models(self, num_models: int) -> List[ArchaiModel]:
|
def sample_random_models(self, num_models: int) -> List[ArchaiModel]:
|
||||||
return [self.search_space.random_sample() for _ in range(num_models)]
|
return [self.search_space.random_sample() for _ in range(num_models)]
|
||||||
|
@ -181,7 +180,6 @@ class EvolutionParetoSearch(Searcher):
|
||||||
|
|
||||||
self.all_pop = unseen_pop
|
self.all_pop = unseen_pop
|
||||||
|
|
||||||
self.all_pop = unseen_pop
|
|
||||||
for i in range(self.num_iters):
|
for i in range(self.num_iters):
|
||||||
self.iter_num = i + 1
|
self.iter_num = i + 1
|
||||||
|
|
||||||
|
@ -249,9 +247,9 @@ class EvolutionParetoSearch(Searcher):
|
||||||
|
|
||||||
# sample some random samples to add to the parent mix
|
# sample some random samples to add to the parent mix
|
||||||
# to mitigage local minima
|
# to mitigage local minima
|
||||||
rand_mix = self._sample_random_to_mix()
|
rand_mix = self.sample_random_models(self.num_random_mix)
|
||||||
|
|
||||||
unseen_pop = crossovered + mutated + rand_mix
|
unseen_pop = crossovered + mutated + rand_mix
|
||||||
|
|
||||||
# shuffle before we pick a smaller population for the next stage
|
# shuffle before we pick a smaller population for the next stage
|
||||||
self.logger.info(f'iter {i}: total unseen population before restriction {len(unseen_pop)}')
|
self.logger.info(f'iter {i}: total unseen population before restriction {len(unseen_pop)}')
|
||||||
unseen_pop = self.select_next_population(unseen_pop)
|
unseen_pop = self.select_next_population(unseen_pop)
|
||||||
|
|
|
@ -27,21 +27,6 @@ class PredictiveDNNEnsemble(Predictor):
|
||||||
self.lr = lr
|
self.lr = lr
|
||||||
self.num_tr_steps = num_tr_steps
|
self.num_tr_steps = num_tr_steps
|
||||||
|
|
||||||
# TODO: should have an architecture featurizer
|
|
||||||
# object here and the featurizer should tell
|
|
||||||
# us what is the feature size
|
|
||||||
# TODO: get from config
|
|
||||||
self.input_feat_len = num_features
|
|
||||||
self.num_layers = num_layers
|
|
||||||
self.width = width
|
|
||||||
self.sigmoid = sigmoid
|
|
||||||
|
|
||||||
# build the ensemble
|
|
||||||
self.ensemble = [FFEnsembleMember(input_feat_len=self.input_feat_len,
|
|
||||||
num_layers=self.num_layers,
|
|
||||||
width=self.width, sigmoid=self.sigmoid)
|
|
||||||
for _ in range(self.num_ensemble_members)]
|
|
||||||
|
|
||||||
self.is_fit = False
|
self.is_fit = False
|
||||||
self.device = 'cuda'
|
self.device = 'cuda'
|
||||||
self.X_meanvar = None
|
self.X_meanvar = None
|
||||||
|
@ -120,7 +105,8 @@ class PredictiveDNNEnsemble(Predictor):
|
||||||
|
|
||||||
|
|
||||||
class FFEnsembleMember(nn.Module):
|
class FFEnsembleMember(nn.Module):
|
||||||
def __init__(self, input_feat_len:int=128, num_layers:int=10, width:int=20, sigmoid: bool = False):
|
def __init__(self, num_objectives: int = 1, input_feat_len: int = 128,
|
||||||
|
num_layers: int = 10, width: int = 20):
|
||||||
super(FFEnsembleMember, self).__init__()
|
super(FFEnsembleMember, self).__init__()
|
||||||
|
|
||||||
self.input_feat_len = input_feat_len
|
self.input_feat_len = input_feat_len
|
||||||
|
@ -129,15 +115,7 @@ class FFEnsembleMember(nn.Module):
|
||||||
|
|
||||||
self.linears = nn.ModuleList([nn.Linear(self.input_feat_len, width)])
|
self.linears = nn.ModuleList([nn.Linear(self.input_feat_len, width)])
|
||||||
self.linears.extend([nn.Linear(width, width) for i in range(1, self.num_layers-1)])
|
self.linears.extend([nn.Linear(width, width) for i in range(1, self.num_layers-1)])
|
||||||
|
self.output = nn.Linear(width, num_objectives)
|
||||||
output_layers = [
|
|
||||||
nn.Linear(width, 1)
|
|
||||||
]
|
|
||||||
|
|
||||||
if sigmoid:
|
|
||||||
output_layers.append(nn.Sigmoid())
|
|
||||||
|
|
||||||
self.output = nn.Sequential(*output_layers)
|
|
||||||
|
|
||||||
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
||||||
for layer in self.linears:
|
for layer in self.linears:
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from typing import List, Any
|
from typing import Sequence, Any
|
||||||
|
|
||||||
|
|
||||||
class DiscreteChoice():
|
class DiscreteChoice():
|
||||||
def __init__(self, choices: List[Any]):
|
def __init__(self, choices: Sequence[Any]):
|
||||||
self.choices = choices
|
self.choices = choices
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
|
|
|
@ -79,11 +79,12 @@ class OpDesc:
|
||||||
c, cs = self.children, state_dict['children']
|
c, cs = self.children, state_dict['children']
|
||||||
assert (c is None and cs is None) or \
|
assert (c is None and cs is None) or \
|
||||||
(c is not None and cs is not None and len(c) == len(cs))
|
(c is not None and cs is not None and len(c) == len(cs))
|
||||||
# TODO: when c and cs are both none, zip throws an error that the
|
|
||||||
|
# TODO: when c and cs are both none, zip throws an error that the
|
||||||
# first argument should be iterable
|
# first argument should be iterable
|
||||||
if (c is None and cs is None):
|
if (c is None and cs is None):
|
||||||
return
|
return
|
||||||
for cx, csx in zip(c, cs):
|
for cx, csx in utils.zip_eq(c, cs):
|
||||||
if cx is not None and csx is not None:
|
if cx is not None and csx is not None:
|
||||||
cx.load_state_dict(csx)
|
cx.load_state_dict(csx)
|
||||||
|
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Common-based methods and classes for the NLP package.
|
|
||||||
"""
|
|
|
@ -1,188 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Huggingface's Open AI GPT-2.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import types
|
|
||||||
from typing import Dict, Optional, Tuple
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.nn.functional as F
|
|
||||||
from transformers.models.gpt2.modeling_gpt2 import GPT2LMHeadModel
|
|
||||||
|
|
||||||
from archai.nlp.legacy_models.hf_gpt2.config_hf_gpt2 import HfGPT2Config, HfGPT2FlexConfig
|
|
||||||
from archai.nlp.legacy_models.hf_gpt2.hf_gpt2_utils.gpt2_lm_head_model_flex import GPT2LMHeadModelFlex
|
|
||||||
from archai.nlp.legacy_models.model_base import ArchaiModel
|
|
||||||
|
|
||||||
|
|
||||||
class HfGPT2(ArchaiModel):
|
|
||||||
"""Huggingface's Open AI GPT-2 standard architecture.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, **kwargs) -> None:
|
|
||||||
"""Initializes the class by creating compatible configuration and model objects.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.config = HfGPT2Config(**kwargs)
|
|
||||||
self.model = GPT2LMHeadModel(self.config)
|
|
||||||
|
|
||||||
if self.config.tie_weight:
|
|
||||||
self.model.tie_weights()
|
|
||||||
|
|
||||||
def forward(self,
|
|
||||||
input_ids: torch.Tensor,
|
|
||||||
labels: Optional[torch.Tensor] = None,
|
|
||||||
mems: Optional[torch.Tensor] = None,
|
|
||||||
past_key_values: Optional[torch.Tensor] = None,
|
|
||||||
output_loss: Optional[bool] = True,
|
|
||||||
output_prediction_scores: Optional[bool] = False
|
|
||||||
) -> Tuple[torch.Tensor, ...]:
|
|
||||||
<<<<<<< HEAD
|
|
||||||
"""Performs forward pass over the model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input_ids: Input tokens.
|
|
||||||
labels: Input labels (same as tokens).
|
|
||||||
mems: Memory tensor.
|
|
||||||
past_key_values: Tensor with past key/values.
|
|
||||||
output_loss: Whether loss should be outputted.
|
|
||||||
output_prediction_scores: Whether prediction scores should be outputted.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Tuple[torch.Tensor, ...]): Outputs, such as loss, prediction scores,
|
|
||||||
memories and past key/values.
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
"""
|
|
||||||
=======
|
|
||||||
def reset_length(self, tgt_len: int, ext_len: int, mem_len: int) -> None:
|
|
||||||
# There is no memory in GPT-2
|
|
||||||
pass
|
|
||||||
=======
|
|
||||||
assert mems is None, 'HfGPT2 does not support memory (mems).'
|
|
||||||
|
|
||||||
# Labels are the same as input_ids because they will be shifted inside the model
|
|
||||||
# Causal attention mask is also created inside the model
|
|
||||||
outputs = self.model(input_ids=input_ids,
|
|
||||||
labels=input_ids,
|
|
||||||
attention_mask=torch.ones_like(input_ids),
|
|
||||||
past_key_values=past_key_values)
|
|
||||||
|
|
||||||
if output_loss:
|
|
||||||
return (outputs.loss, None, None, outputs.past_key_values)
|
|
||||||
|
|
||||||
if output_prediction_scores:
|
|
||||||
# GPT-2 only outputs the logits, so they need to be converted with log_softmax
|
|
||||||
return (None, F.log_softmax(outputs.logits, dim=-1), None, outputs.past_key_values)
|
|
||||||
>>>>>>> 0a1d1a35 (chore(hf_gpt2): Re-structures hf_gpt2-related files.)
|
|
||||||
|
|
||||||
def get_params(self) -> Dict[str, int]:
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
params['embedding'] = self.get_params_from_layer(['Embedding'])
|
|
||||||
params['attention'] = self.get_params_from_layer(['GPT2Attention'])
|
|
||||||
params['ff'] = self.get_params_from_layer(['GPT2MLP'])
|
|
||||||
params['layer_norm'] = self.get_params_from_layer(['LayerNorm'])
|
|
||||||
|
|
||||||
params['non_embedding'] = params['attention'] + params['ff'] + params['layer_norm']
|
|
||||||
params['total'] = params['non_embedding'] + params['embedding']
|
|
||||||
>>>>>>> 628e74a0 (fix(hf_gpt2): Fixes parameters calculation for HfGPT2 and HfGPT2Flex.)
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
assert mems is None, 'GPT2 does not support memory (mems)'
|
|
||||||
|
|
||||||
# Labels in Huggingface's GPT-2 are the same as inputs_ids and they will be shifted inside the model
|
|
||||||
# Causal attention mask is created inside the model
|
|
||||||
hf_out = self.model(input_ids=input_ids,
|
|
||||||
labels=input_ids,
|
|
||||||
attention_mask=torch.ones_like(input_ids))
|
|
||||||
=======
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
class HfGPT2Flex(ArchaiModel):
|
|
||||||
"""Huggingface's Open AI GPT-2 flex-based architecture.
|
|
||||||
|
|
||||||
Flex-based architectures allow different hyperparameters settings for each layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, **kwargs) -> None:
|
|
||||||
"""Initializes the class by creating compatible configuration and model objects.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.config = HfGPT2FlexConfig(**kwargs)
|
|
||||||
assert all(self.config.n_head[0] == n_h for n_h in self.config.n_head), 'HfGPT2Flex does not support different `n_head`.'
|
|
||||||
|
|
||||||
self.model = GPT2LMHeadModelFlex(self.config)
|
|
||||||
|
|
||||||
if self.config.tie_weight:
|
|
||||||
self.model.tie_weights()
|
|
||||||
|
|
||||||
def forward(self,
|
|
||||||
input_ids: torch.Tensor,
|
|
||||||
labels: Optional[torch.Tensor] = None,
|
|
||||||
mems: Optional[torch.Tensor] = None,
|
|
||||||
past_key_values: Optional[torch.Tensor] = None,
|
|
||||||
output_loss: Optional[bool] = True,
|
|
||||||
output_prediction_scores: Optional[bool] = False
|
|
||||||
) -> Tuple[torch.Tensor, ...]:
|
|
||||||
assert mems is None, 'HfGPT2Flex does not support memory (mems).'
|
|
||||||
>>>>>>> 0a1d1a35 (chore(hf_gpt2): Re-structures hf_gpt2-related files.)
|
|
||||||
|
|
||||||
# Labels are the same as input_ids because they will be shifted inside the model
|
|
||||||
# Causal attention mask is also created inside the model
|
|
||||||
outputs = self.model(input_ids=input_ids,
|
|
||||||
labels=input_ids,
|
|
||||||
attention_mask=torch.ones_like(input_ids),
|
|
||||||
past_key_values=past_key_values)
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
def reset_length(self, tgt_len: int, ext_len: int, mem_len: int) -> None:
|
|
||||||
"""Resets the length of the memory.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
tgt_len: Length of target sample.
|
|
||||||
ext_len: Length of extended memory.
|
|
||||||
mem_len: Length of the memory.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# There is no memory in GPT-2
|
|
||||||
pass
|
|
||||||
=======
|
|
||||||
if output_loss:
|
|
||||||
return (outputs.loss, None, None, outputs.past_key_values)
|
|
||||||
|
|
||||||
if output_prediction_scores:
|
|
||||||
# GPT-2 only outputs the logits, so they need to be converted with log_softmax
|
|
||||||
return (None, F.log_softmax(outputs.logits, dim=-1), None, outputs.past_key_values)
|
|
||||||
>>>>>>> 0a1d1a35 (chore(hf_gpt2): Re-structures hf_gpt2-related files.)
|
|
||||||
|
|
||||||
def get_params(self) -> Dict[str, int]:
|
|
||||||
"""Returns a dictionary of total parameters per implemented layer.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Dict[str, int]): Number of total parameters.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
params['embedding'] = self.get_params_from_layer(['Embedding'])
|
|
||||||
params['attention'] = self.get_params_from_layer(['GPT2Attention'])
|
|
||||||
params['ff'] = self.get_params_from_layer(['GPT2MLPFlex'])
|
|
||||||
params['layer_norm'] = self.get_params_from_layer(['LayerNorm'])
|
|
||||||
|
|
||||||
params['non_embedding'] = params['attention'] + params['ff'] + params['layer_norm']
|
|
||||||
params['total'] = params['non_embedding'] + params['embedding']
|
|
||||||
|
|
||||||
return params
|
|
|
@ -1,86 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Hugginface's Transformer-XL.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Dict, Optional, Tuple
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from transformers import TransfoXLLMHeadModel
|
|
||||||
|
|
||||||
from archai.nlp.legacy_models.hf_transfo_xl.config_hf_transfo_xl import HfTransfoXLConfig
|
|
||||||
from archai.nlp.legacy_models.model_base import ArchaiModel
|
|
||||||
|
|
||||||
|
|
||||||
class HfTransfoXL(ArchaiModel):
|
|
||||||
"""Huggingface's Transformer-XL standard architecture.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, **kwargs) -> None:
|
|
||||||
"""Initializes the class by creating compatible configuration and model objects.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__()
|
|
||||||
|
|
||||||
self.config = HfTransfoXLConfig(**kwargs)
|
|
||||||
self.model = TransfoXLLMHeadModel(self.config)
|
|
||||||
|
|
||||||
if self.config.tie_weight:
|
|
||||||
self.model.tie_weights()
|
|
||||||
|
|
||||||
def forward(self,
|
|
||||||
input_ids: torch.Tensor,
|
|
||||||
labels: Optional[torch.Tensor] = None,
|
|
||||||
mems: Optional[torch.Tensor] = None,
|
|
||||||
past_key_values: Optional[torch.Tensor] = None,
|
|
||||||
output_loss: Optional[bool] = True,
|
|
||||||
output_prediction_scores: Optional[bool] = False
|
|
||||||
) -> Tuple[torch.Tensor, ...]:
|
|
||||||
# Labels are the same as input_ids because they will be shifted inside the model
|
|
||||||
if output_loss:
|
|
||||||
outputs = self.model(input_ids=input_ids,
|
|
||||||
labels=input_ids,
|
|
||||||
mems=mems)
|
|
||||||
|
|
||||||
return (outputs.losses, None, outputs.mems, None)
|
|
||||||
|
|
||||||
if output_prediction_scores:
|
|
||||||
outputs = self.model(input_ids=input_ids,
|
|
||||||
mems=mems)
|
|
||||||
|
|
||||||
return (None, outputs.logits, outputs.mems, None)
|
|
||||||
|
|
||||||
def get_params(self) -> Dict[str, int]:
|
|
||||||
"""Returns a dictionary of total parameters per implemented layer.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Dict[str, int]): Number of total parameters.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
params = {}
|
|
||||||
|
|
||||||
params['embedding'] = self.get_params_from_layer(['AdaptiveEmbedding'])
|
|
||||||
params['softmax'] = self.get_params_from_layer(['ProjectedAdaptiveLogSoftmax'])
|
|
||||||
params['attention'] = self.get_params_from_layer(['RelPartialLearnableMultiHeadAttn'])
|
|
||||||
params['ff'] = self.get_params_from_layer(['PositionwiseFF'])
|
|
||||||
|
|
||||||
params['non_embedding'] = params['softmax'] + params['attention'] + params['ff']
|
|
||||||
params['total'] = params['non_embedding'] + params['embedding']
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
def reset_length(self, tgt_len: int, ext_len: int, mem_len: int) -> None:
|
|
||||||
if tgt_len < 1:
|
|
||||||
raise ValueError(f'tgt_len: {tgt_len} should be >= 1.')
|
|
||||||
if ext_len < 0:
|
|
||||||
raise ValueError(f'ext_len: {ext_len} should be >= 0.')
|
|
||||||
if mem_len < 0:
|
|
||||||
raise ValueError(f'mem_len: {mem_len} should be >= 0.')
|
|
||||||
|
|
||||||
self.model.config.tgt_len = tgt_len
|
|
||||||
self.model.config.mem_len = mem_len
|
|
||||||
self.model.config.ext_len = ext_len
|
|
|
@ -1,242 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""NVIDIA's Memory Transformer configurations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
from typing import Any, Dict
|
|
||||||
<<<<<<< HEAD
|
|
||||||
<<<<<<< HEAD
|
|
||||||
|
|
||||||
=======
|
|
||||||
>>>>>>> b64935ed (chore(nlp): Adds configuration classes to every available model.)
|
|
||||||
=======
|
|
||||||
|
|
||||||
>>>>>>> 929e81d1 (chore(nlp): Removing the need of model configuration defaults on NAS.)
|
|
||||||
from archai.nlp.models.config_base import Config
|
|
||||||
|
|
||||||
|
|
||||||
class MemTransformerLMConfig(Config):
|
|
||||||
<<<<<<< HEAD
|
|
||||||
@property
|
|
||||||
def default(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
'd_head': -1,
|
|
||||||
'n_token': 267736,
|
|
||||||
'dropout': 0.1,
|
|
||||||
'dropatt': 0.0,
|
|
||||||
'd_embed': -1,
|
|
||||||
=======
|
|
||||||
"""Provides a configuration for MemTransformerLM.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, **kwargs) -> None:
|
|
||||||
"""Initializes the configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def default(self) -> Dict[str, Any]:
|
|
||||||
"""Defines the default configuration used by the class.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
return {
|
|
||||||
'd_head': None,
|
|
||||||
'n_token': 267736,
|
|
||||||
'dropout': 0.1,
|
|
||||||
'dropatt': 0.0,
|
|
||||||
'd_embed': None,
|
|
||||||
>>>>>>> b64935ed (chore(nlp): Adds configuration classes to every available model.)
|
|
||||||
'div_val': 4,
|
|
||||||
'pre_lnorm': False,
|
|
||||||
'tgt_len': 192,
|
|
||||||
'ext_len': 0,
|
|
||||||
'mem_len': 192,
|
|
||||||
'same_length': False,
|
|
||||||
'attn_type': 0,
|
|
||||||
'clamp_len': -1,
|
|
||||||
'sample_softmax': -1,
|
|
||||||
'cutoffs': [19997, 39997, 199997],
|
|
||||||
'tie_projs': [False, True, True, True],
|
|
||||||
'tie_weight': True,
|
|
||||||
'dtype': None,
|
|
||||||
'primer_conv': False,
|
|
||||||
'primer_square': False,
|
|
||||||
'use_cache': False
|
|
||||||
}
|
|
||||||
<<<<<<< HEAD
|
|
||||||
<<<<<<< HEAD
|
|
||||||
|
|
||||||
@property
|
|
||||||
def search(self) -> Dict[str, Any]:
|
|
||||||
return {
|
|
||||||
'n_layer': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
|
|
||||||
'd_model': [128, 256, 512, 768, 1024],
|
|
||||||
'd_inner': list(range(512, 2049, 50)) + list(range(2048, 3072, 200)),
|
|
||||||
'n_head': [2, 4, 8]
|
|
||||||
}
|
|
||||||
=======
|
|
||||||
>>>>>>> b64935ed (chore(nlp): Adds configuration classes to every available model.)
|
|
||||||
=======
|
|
||||||
|
|
||||||
@property
|
|
||||||
def search(self) -> Dict[str, Any]:
|
|
||||||
"""Defines the default configuration used when searching with the class.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
return {
|
|
||||||
'n_layer': {
|
|
||||||
'per_layer': False,
|
|
||||||
'value': [3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
|
|
||||||
},
|
|
||||||
'd_model': {
|
|
||||||
'per_layer': False,
|
|
||||||
'value': list(range(128, 1024, 64))
|
|
||||||
},
|
|
||||||
'd_inner': {
|
|
||||||
'per_layer': True,
|
|
||||||
'value': list(range(128, 4096, 64))
|
|
||||||
},
|
|
||||||
'n_head': {
|
|
||||||
'per_layer': True,
|
|
||||||
'value': [2, 4, 8]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
>>>>>>> 3a5c71c5 (chore(models): Adds default search configuration to models' config.)
|
|
||||||
=======
|
|
||||||
from typing import List, Optional
|
|
||||||
|
|
||||||
from archai.nlp.legacy_models.config_base import Config, SearchConfig, SearchConfigParameter
|
|
||||||
|
|
||||||
|
|
||||||
class MemTransformerLMConfig(Config):
|
|
||||||
"""NVIDIA's Memory Transformer default configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
n_token: Optional[int] = 267736,
|
|
||||||
tgt_len: Optional[int] = 192,
|
|
||||||
d_model: Optional[int] = 512,
|
|
||||||
d_inner: Optional[int] = 2048,
|
|
||||||
d_head: Optional[int] = 0,
|
|
||||||
d_embed: Optional[int] = 0,
|
|
||||||
n_layer: Optional[int] = 16,
|
|
||||||
n_head: Optional[int] = 8,
|
|
||||||
dropout: Optional[float] = 0.1,
|
|
||||||
dropatt: Optional[float] = 0.0,
|
|
||||||
div_val: Optional[int] = 4,
|
|
||||||
pre_lnorm: Optional[bool] = False,
|
|
||||||
cutoffs: Optional[List[int]] = [19997, 39997, 199997],
|
|
||||||
ext_len: Optional[int] = 0,
|
|
||||||
mem_len: Optional[int] = 192,
|
|
||||||
same_length: Optional[bool] = False,
|
|
||||||
attn_type: Optional[int] = 0,
|
|
||||||
clamp_len: Optional[int] = -1,
|
|
||||||
sample_softmax: Optional[int] = -1,
|
|
||||||
adaptive: Optional[bool] = True,
|
|
||||||
weight_init_type: Optional[str] = 'normal',
|
|
||||||
weight_init_range: Optional[float] = 0.01,
|
|
||||||
weight_init_std: Optional[float] = 0.02,
|
|
||||||
proj_init_std: Optional[float] = 0.01,
|
|
||||||
tie_weight: Optional[bool] = True,
|
|
||||||
tie_projs: Optional[List[bool]] = [False, True, True, True],
|
|
||||||
primer_conv: Optional[bool] = False,
|
|
||||||
primer_square: Optional[bool] = False,
|
|
||||||
use_cache: Optional[bool] = False,
|
|
||||||
**kwargs) -> None:
|
|
||||||
"""Initializes the class by overriding default arguments.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
n_token: Size of the vocabulary (number of tokens).
|
|
||||||
tgt_len: Maximum length of sequences (positional embeddings).
|
|
||||||
d_model: Dimensionality of the model.
|
|
||||||
d_inner: Dimensionality of inner feed-forward layers.
|
|
||||||
d_head: Dimensionality of attention heads (`0` for using `d_model` // `n_head`)
|
|
||||||
d_embed: Dimensionality of embedding layer (`0` for using same as `d_model`)
|
|
||||||
n_layer: Number of layers.
|
|
||||||
n_head: Number of attention heads.
|
|
||||||
dropout: Dropout probability.
|
|
||||||
dropatt: Attention dropout probability.
|
|
||||||
div_val: Adaptive embedding/softmax divident.
|
|
||||||
pre_lnorm: Whether layer normalization should be performed to input instead of output.
|
|
||||||
cutoffs: Cutoffs values for adaptive embedding/softmax.
|
|
||||||
ext_len: Maximum length of extended context.
|
|
||||||
mem_len: Maximum length of the memory.
|
|
||||||
same_length: Whether every incoming sample should use the same attention length.
|
|
||||||
attn_type: Type of attention mechanism (`0` for default attention).
|
|
||||||
clamp_len: Uses the same positional embeddings after clamp_len (`0` for no clamp).
|
|
||||||
sample_softmax: Number of samples in the sampled softmax (`-1` for disabling).
|
|
||||||
adaptive: Whether to use adaptive softmax.
|
|
||||||
weight_init_type: Type of weight initialization (`normal` for default).
|
|
||||||
weight_init_range: Range to initialize the weights.
|
|
||||||
weight_init_std: Standard deviation to initialize the weights.
|
|
||||||
proj_init_std: Standard deviation to initialize the projections.
|
|
||||||
tie_weight: Whether embedding and softmax weights should be tied.
|
|
||||||
tie_projs: Whether embedding/softmax projections should be tied.
|
|
||||||
primer_conv: Whether 1D convolution primitive should be employed.
|
|
||||||
primer_square: Whether squared ReLU primitive should be employed.
|
|
||||||
use_cache: Whether `past_key_values` should be stored and used.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
self.n_token = n_token
|
|
||||||
self.tgt_len = tgt_len
|
|
||||||
self.d_model = d_model
|
|
||||||
self.d_inner = d_inner
|
|
||||||
self.d_head = d_head if d_head > 0 else d_model // n_head
|
|
||||||
self.d_embed = d_embed if d_embed > 0 else d_model
|
|
||||||
self.n_layer = n_layer
|
|
||||||
self.n_head = n_head
|
|
||||||
self.dropout = dropout
|
|
||||||
self.dropatt = dropatt
|
|
||||||
self.div_val = div_val
|
|
||||||
self.pre_lnorm = pre_lnorm
|
|
||||||
self.cutoffs = cutoffs
|
|
||||||
self.ext_len = ext_len
|
|
||||||
self.mem_len = mem_len
|
|
||||||
self.same_length = same_length
|
|
||||||
self.attn_type = attn_type
|
|
||||||
self.clamp_len = clamp_len
|
|
||||||
self.sample_softmax = sample_softmax
|
|
||||||
self.adaptive = adaptive
|
|
||||||
self.weight_init_type = weight_init_type
|
|
||||||
self.weight_init_range = weight_init_range
|
|
||||||
self.weight_init_std = weight_init_std
|
|
||||||
self.proj_init_std = proj_init_std
|
|
||||||
self.tie_weight = tie_weight
|
|
||||||
self.tie_projs = tie_projs
|
|
||||||
self.primer_conv = primer_conv
|
|
||||||
self.primer_square = primer_square
|
|
||||||
self.use_cache = use_cache
|
|
||||||
|
|
||||||
super().__init__(**kwargs)
|
|
||||||
|
|
||||||
|
|
||||||
class MemTransformerLMSearchConfig(SearchConfig):
|
|
||||||
"""NVIDIA's Memory Transformer search configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
"""Initializes the class by setting default parameters that are used during search.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Default MemTransformerLM search options: n_layer, d_model, d_inner and n_head
|
|
||||||
n_layer = SearchConfigParameter(per_layer=False, value=[3, 4, 5, 6, 7, 8, 9, 10])
|
|
||||||
d_model = SearchConfigParameter(per_layer=False, value=list(range(128, 1024, 64)))
|
|
||||||
d_inner = SearchConfigParameter(per_layer=True, value=list(range(128, 4096, 64)))
|
|
||||||
n_head = SearchConfigParameter(per_layer=True, value=[2, 4, 8])
|
|
||||||
|
|
||||||
super().__init__(n_layer=n_layer,
|
|
||||||
d_model=d_model,
|
|
||||||
d_inner=d_inner,
|
|
||||||
n_head=n_head)
|
|
||||||
>>>>>>> 13f92a50 (chore(mem_transformer): Re-structures mem_transformer-related files.)
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,356 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""NVIDIA's Memory Transformer for ONNX.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple
|
|
||||||
|
|
||||||
from onnx import (GraphProto, ModelProto, NodeProto, TensorProto,
|
|
||||||
ValueInfoProto, helper)
|
|
||||||
from onnxruntime.transformers.fusion_attention import (AttentionMask,
|
|
||||||
FusionAttention)
|
|
||||||
from onnxruntime.transformers.fusion_layernorm import FusionLayerNormalization
|
|
||||||
from onnxruntime.transformers.fusion_reshape import FusionReshape
|
|
||||||
from onnxruntime.transformers.fusion_shape import FusionShape
|
|
||||||
from onnxruntime.transformers.fusion_skiplayernorm import (
|
|
||||||
FusionBiasSkipLayerNormalization, FusionSkipLayerNormalization)
|
|
||||||
from onnxruntime.transformers.fusion_utils import FusionUtils
|
|
||||||
from onnxruntime.transformers.onnx_model import OnnxModel
|
|
||||||
|
|
||||||
from archai.nlp.compression.onnx.onnx_utils.fusion_options import FusionOptions
|
|
||||||
from archai.nlp.legacy_models.config_base import OnnxConfigWithPast
|
|
||||||
|
|
||||||
|
|
||||||
class MemTransformerLMOnnxConfig(OnnxConfigWithPast):
|
|
||||||
"""NVIDIA's Memory Transformer ONNX-based configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model_config: Dict[str, Any]) -> None:
|
|
||||||
"""Initializes the class by setting missing keys on incoming
|
|
||||||
model's configuration.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_config: Configuration of the model that will be exported.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Checks the type of attention to define the `past_key_values`
|
|
||||||
if model_config['attn_type'] == 0:
|
|
||||||
# `k`, `v` and relative embeddings
|
|
||||||
past_key_values = 3
|
|
||||||
else:
|
|
||||||
# `k` and `v`
|
|
||||||
past_key_values = 2
|
|
||||||
|
|
||||||
super().__init__(model_config,
|
|
||||||
model_type='transfo-xl',
|
|
||||||
past_key_values=past_key_values)
|
|
||||||
|
|
||||||
|
|
||||||
class MemTransformerLMOnnxModel(OnnxModel):
|
|
||||||
"""MemTransformerLM that enables addtiional ONNX optimizations.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, model: ModelProto) -> None:
|
|
||||||
"""Overrides initialization method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model: ONNX-based model.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super().__init__(model)
|
|
||||||
|
|
||||||
self.attention_mask = AttentionMask(self)
|
|
||||||
self.utils = FusionUtils(self)
|
|
||||||
|
|
||||||
def change_graph_input_type(self,
|
|
||||||
graph: GraphProto,
|
|
||||||
graph_input: ValueInfoProto,
|
|
||||||
new_type: Optional[int] = TensorProto.INT32
|
|
||||||
) -> Tuple[NodeProto, List[NodeProto]]:
|
|
||||||
"""Changes the input type of the graph and add Cast nodes if necessary.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
graph: Graph instance.
|
|
||||||
graph_input: Graph inputs.
|
|
||||||
new_type: New data type.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Tuple[NodeProto, List[NodeProto]]): Cast node to be added and
|
|
||||||
list of Cast nodes to be removed.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
assert isinstance(graph, GraphProto)
|
|
||||||
assert isinstance(graph_input, ValueInfoProto)
|
|
||||||
assert self.find_graph_input(graph_input.name)
|
|
||||||
|
|
||||||
if graph_input.type.tensor_type.elem_type == int(new_type):
|
|
||||||
return None, []
|
|
||||||
|
|
||||||
new_cast_node = None
|
|
||||||
nodes_to_remove = []
|
|
||||||
|
|
||||||
input_name_to_nodes = self.input_name_to_nodes()
|
|
||||||
if graph_input.name in input_name_to_nodes:
|
|
||||||
nodes = input_name_to_nodes[graph_input.name]
|
|
||||||
|
|
||||||
nodes_not_cast = [node for node in nodes if node.op_type != 'Cast']
|
|
||||||
if nodes_not_cast:
|
|
||||||
node_name = self.create_node_name('Cast')
|
|
||||||
output_name = node_name + '_' + graph_input.name
|
|
||||||
new_value_info = graph.value_info.add()
|
|
||||||
new_value_info.CopyFrom(graph_input)
|
|
||||||
new_value_info.name = output_name
|
|
||||||
new_cast_node = helper.make_node('Cast', [graph_input.name], [output_name],
|
|
||||||
to=int(graph_input.type.tensor_type.elem_type),
|
|
||||||
name=node_name)
|
|
||||||
graph.node.extend([new_cast_node])
|
|
||||||
|
|
||||||
for node in nodes_not_cast:
|
|
||||||
OnnxModel.replace_node_input(node, graph_input.name, output_name)
|
|
||||||
|
|
||||||
nodes_cast = [node for node in nodes if node.op_type == 'Cast']
|
|
||||||
for node in nodes_cast:
|
|
||||||
if OnnxModel.get_node_attribute(node, 'to') == int(new_type):
|
|
||||||
self.replace_input_of_all_nodes(node.output[0], graph_input.name)
|
|
||||||
if not self.find_graph_output(node.output[0]):
|
|
||||||
nodes_to_remove.append(node)
|
|
||||||
if nodes_to_remove:
|
|
||||||
self.remove_nodes(nodes_to_remove)
|
|
||||||
|
|
||||||
graph_input.type.tensor_type.elem_type = int(new_type)
|
|
||||||
|
|
||||||
return new_cast_node, nodes_to_remove
|
|
||||||
|
|
||||||
def change_graph_inputs_to_int32(self) -> None:
|
|
||||||
"""Changes the inputs to int32.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
graph = self.graph()
|
|
||||||
|
|
||||||
add_cast_count = 0
|
|
||||||
remove_cast_count = 0
|
|
||||||
|
|
||||||
for graph_input in graph.input:
|
|
||||||
new_node, removed_nodes = self.change_graph_input_type(graph,
|
|
||||||
graph_input,
|
|
||||||
TensorProto.INT32)
|
|
||||||
if new_node:
|
|
||||||
add_cast_count += 1
|
|
||||||
remove_cast_count += len(removed_nodes)
|
|
||||||
|
|
||||||
def fuse_layer_norm(self) -> None:
|
|
||||||
"""Fuses the appropriate nodes into a LayerNormalization layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
fusion = FusionLayerNormalization(self)
|
|
||||||
fusion.apply()
|
|
||||||
|
|
||||||
def fuse_skip_layer_norm(self) -> None:
|
|
||||||
"""Fuses the appropriate nodes into a SkipLayerNormalization layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
fusion = FusionSkipLayerNormalization(self)
|
|
||||||
fusion.apply()
|
|
||||||
|
|
||||||
def fuse_add_bias_skip_layer_norm(self) -> None:
|
|
||||||
"""Fuses the appropriate nodes into a BiasSkipLayerNormalization layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
fusion = FusionBiasSkipLayerNormalization(self)
|
|
||||||
fusion.apply()
|
|
||||||
|
|
||||||
def fuse_attention(self) -> None:
|
|
||||||
"""Fuses the appropriate nodes into an Attention layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
fusion = FusionAttention(self, 0, 0, self.attention_mask)
|
|
||||||
fusion.apply()
|
|
||||||
|
|
||||||
def fuse_reshape(self) -> None:
|
|
||||||
"""Fuses the appropriate nodes into a Reshape layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
fusion = FusionReshape(self)
|
|
||||||
fusion.apply()
|
|
||||||
|
|
||||||
def fuse_shape(self) -> None:
|
|
||||||
"""Fuses the appropriate nodes into a Shape layer.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
fusion = FusionShape(self)
|
|
||||||
fusion.apply()
|
|
||||||
|
|
||||||
def use_dynamic_axes(self,
|
|
||||||
dynamic_batch_dim: Optional[str] = 'batch_size',
|
|
||||||
dynamic_seq_len: Optional[str] = 'seq_len') -> None:
|
|
||||||
"""Updates inputs and outputs shapes to use dynamic axes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
dynamic_batch_dim: Name of batch size dimension.
|
|
||||||
dynamic_seq_len: Name of sequence length dimension.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
graph_inputs = self.get_graph_inputs_from_fused_nodes(casted=True) \
|
|
||||||
+ self.get_graph_inputs_from_fused_nodes(casted=False)
|
|
||||||
|
|
||||||
for inp in self.model.graph.input:
|
|
||||||
if inp.name in graph_inputs:
|
|
||||||
dim_proto = inp.type.tensor_type.shape.dim[0]
|
|
||||||
dim_proto.dim_param = dynamic_batch_dim
|
|
||||||
|
|
||||||
if dynamic_seq_len is not None:
|
|
||||||
dim_proto = inp.type.tensor_type.shape.dim[1]
|
|
||||||
dim_proto.dim_param = dynamic_seq_len
|
|
||||||
|
|
||||||
for out in self.model.graph.output:
|
|
||||||
dim_proto = out.type.tensor_type.shape.dim[0]
|
|
||||||
dim_proto.dim_param = dynamic_batch_dim
|
|
||||||
|
|
||||||
def adjust_reshape_and_expand(self) -> None:
|
|
||||||
"""Cleans up unncessary reshape nodes.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
nodes_to_remove = []
|
|
||||||
|
|
||||||
for node in self.nodes():
|
|
||||||
if node.op_type == 'Reshape':
|
|
||||||
reshape_shape = self.get_constant_value(node.input[1])
|
|
||||||
|
|
||||||
if reshape_shape is not None and reshape_shape.size == 0:
|
|
||||||
nodes_to_remove.extend([node])
|
|
||||||
self.replace_input_of_all_nodes(node.output[0], node.input[0])
|
|
||||||
continue
|
|
||||||
|
|
||||||
reshape_path = self.match_parent_path(node,
|
|
||||||
['Expand', 'Expand', 'Reshape', 'Slice'],
|
|
||||||
[0, 0, 0, 0],
|
|
||||||
self.output_name_to_node())
|
|
||||||
|
|
||||||
if reshape_path is not None:
|
|
||||||
expand_node = reshape_path[-3]
|
|
||||||
expand_shape_value = self.get_constant_value(expand_node.input[1])
|
|
||||||
|
|
||||||
reshape_before_expand = reshape_path[-2]
|
|
||||||
shape_value = self.get_constant_value(reshape_before_expand.input[1])
|
|
||||||
|
|
||||||
slice_node = reshape_path[-1]
|
|
||||||
|
|
||||||
if expand_shape_value is not None and shape_value is not None and len(
|
|
||||||
expand_shape_value) == 2 and len(
|
|
||||||
shape_value) == 1 and expand_shape_value[1] == shape_value[0]:
|
|
||||||
node.input[0] = slice_node.output[0]
|
|
||||||
|
|
||||||
if nodes_to_remove:
|
|
||||||
self.remove_nodes(nodes_to_remove)
|
|
||||||
|
|
||||||
def clean_graph(self) -> None:
|
|
||||||
"""Cleans the graph after fusing nodes.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
output_name_to_node = self.output_name_to_node()
|
|
||||||
nodes_to_remove = []
|
|
||||||
|
|
||||||
for node in self.nodes():
|
|
||||||
op_input_id = {'EmbedLayerNormalization': 1, 'ReduceSum': 0, 'Attention': 3}
|
|
||||||
|
|
||||||
if node.op_type in op_input_id:
|
|
||||||
i = op_input_id[node.op_type]
|
|
||||||
parent_nodes = self.match_parent_path(node,
|
|
||||||
['Cast', 'ConstantOfShape', 'Concat', 'Unsqueeze', 'Gather', 'Shape'],
|
|
||||||
[i, 0, 0, 0, 0, 0],
|
|
||||||
output_name_to_node)
|
|
||||||
|
|
||||||
if parent_nodes is not None:
|
|
||||||
cast, constantOfShape, concat, unsqueeze, gather, shape = parent_nodes
|
|
||||||
|
|
||||||
if shape.input[0] == self.graph().input[0].name:
|
|
||||||
constantOfShape.input[0] = shape.output[0]
|
|
||||||
output_name_to_node = self.output_name_to_node()
|
|
||||||
|
|
||||||
if node.op_type == 'Attention':
|
|
||||||
parent_nodes = self.match_parent_path(node,
|
|
||||||
['ReduceSum', 'Cast', 'ConstantOfShape', 'Shape'],
|
|
||||||
[3, 0, 0, 0],
|
|
||||||
output_name_to_node)
|
|
||||||
|
|
||||||
if parent_nodes is not None:
|
|
||||||
if parent_nodes[-1].input[0] == self.graph().input[0].name:
|
|
||||||
attention_node = helper.make_node('Attention',
|
|
||||||
inputs=node.input[0:len(node.input) - 1],
|
|
||||||
outputs=node.output,
|
|
||||||
name=node.name + '_remove_mask')
|
|
||||||
attention_node.domain = 'com.microsoft'
|
|
||||||
attention_node.attribute.extend([helper.make_attribute('num_heads', self.num_heads)])
|
|
||||||
|
|
||||||
self.add_node(attention_node, self.get_graph_by_node(attention_node).name)
|
|
||||||
nodes_to_remove.append(node)
|
|
||||||
|
|
||||||
self.remove_nodes(nodes_to_remove)
|
|
||||||
|
|
||||||
def optimize(self,
|
|
||||||
options: Optional[FusionOptions] = None,
|
|
||||||
add_dynamic_axes: Optional[bool] = False) -> None:
|
|
||||||
"""Performs the additional transformer-based optimization.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
options: Options holding which operators should be fused.
|
|
||||||
add_dynamic_axes: Whether dynamic axes should be added.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Fuses appropriate nodes into LayerNormalization
|
|
||||||
if (options is None) or options.enable_layer_norm:
|
|
||||||
self.fuse_layer_norm()
|
|
||||||
|
|
||||||
# Pre-processing step
|
|
||||||
self.adjust_reshape_and_expand()
|
|
||||||
|
|
||||||
# Fuses appropriate nodes into Reshape
|
|
||||||
self.fuse_reshape()
|
|
||||||
|
|
||||||
# Fuses appropriate nodes into SkipLayerNormalization
|
|
||||||
if (options is None) or options.enable_skip_layer_norm:
|
|
||||||
self.fuse_skip_layer_norm()
|
|
||||||
|
|
||||||
# Fuses appropriate nodes into Attention
|
|
||||||
# if (options is None) or options.enable_attention:
|
|
||||||
# if options is not None:
|
|
||||||
# self.attention_mask.set_mask_format(options.attention_mask_format)
|
|
||||||
# self.fuse_attention()
|
|
||||||
|
|
||||||
# Fuses appropriate nodes into Shape
|
|
||||||
self.fuse_shape()
|
|
||||||
|
|
||||||
# Removes useless Reshape nodes that are staling through the graph
|
|
||||||
self.utils.remove_useless_reshape_nodes()
|
|
||||||
|
|
||||||
# Post-processing step
|
|
||||||
self.clean_graph()
|
|
||||||
self.prune_graph()
|
|
||||||
|
|
||||||
# Fuses appropriate nodes into BiasSkipLayerNormalization
|
|
||||||
if (options is None) or options.enable_bias_skip_layer_norm:
|
|
||||||
self.fuse_add_bias_skip_layer_norm()
|
|
||||||
|
|
||||||
# Removes unused constants that are staling through the graph
|
|
||||||
self.remove_unused_constant()
|
|
||||||
|
|
||||||
# Whether dynamic axes should be used
|
|
||||||
if add_dynamic_axes:
|
|
||||||
self.use_dynamic_axes()
|
|
|
@ -1,89 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Availability dictionaries of implemented Transformer-based classes.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Huggingface's CodeGen
|
|
||||||
from archai.nlp.legacy_models.hf_codegen.config_hf_codegen import HfCodeGenConfig, HfCodeGenSearchConfig
|
|
||||||
from archai.nlp.legacy_models.hf_codegen.model_hf_codegen import HfCodeGen
|
|
||||||
|
|
||||||
# Huggingface's Open AI GPT-2
|
|
||||||
from archai.nlp.legacy_models.hf_gpt2.config_hf_gpt2 import (HfGPT2Config, HfGPT2SearchConfig,
|
|
||||||
HfGPT2FlexConfig, HfGPT2FlexSearchConfig)
|
|
||||||
from archai.nlp.legacy_models.hf_gpt2.model_hf_gpt2 import HfGPT2, HfGPT2Flex
|
|
||||||
from archai.nlp.legacy_models.hf_gpt2.onnx_hf_gpt2 import HfGPT2OnnxConfig, HfGPT2OnnxModel
|
|
||||||
|
|
||||||
# Huggingface's Open Pre-Trained Transformer
|
|
||||||
from archai.nlp.legacy_models.hf_opt.config_hf_opt import HfOPTConfig, HfOPTSearchConfig
|
|
||||||
from archai.nlp.legacy_models.hf_opt.model_hf_opt import HfOPT
|
|
||||||
|
|
||||||
# Huggingface's Transformer-XL
|
|
||||||
from archai.nlp.legacy_models.hf_transfo_xl.config_hf_transfo_xl import (HfTransfoXLConfig,
|
|
||||||
HfTransfoXLSearchConfig)
|
|
||||||
from archai.nlp.legacy_models.hf_transfo_xl.model_hf_transfo_xl import HfTransfoXL
|
|
||||||
from archai.nlp.legacy_models.hf_transfo_xl.onnx_hf_transfo_xl import (HfTransfoXLOnnxConfig,
|
|
||||||
HfTransfoXLOnnxModel)
|
|
||||||
|
|
||||||
# NVIDIA's Memory Transformer
|
|
||||||
from archai.nlp.legacy_models.mem_transformer.config_mem_transformer import (MemTransformerLMConfig,
|
|
||||||
MemTransformerLMSearchConfig)
|
|
||||||
from archai.nlp.legacy_models.mem_transformer.model_mem_transformer import MemTransformerLM
|
|
||||||
from archai.nlp.legacy_models.mem_transformer.onnx_mem_transformer import (MemTransformerLMOnnxConfig,
|
|
||||||
MemTransformerLMOnnxModel)
|
|
||||||
|
|
||||||
# Analytical parameters formulae
|
|
||||||
from archai.nlp.legacy_models.model_utils.analytical_params_formulae import (get_params_hf_codegen_formula,
|
|
||||||
get_params_hf_gpt2_formula,
|
|
||||||
get_params_hf_gpt2_flex_formula,
|
|
||||||
get_params_hf_opt_formula,
|
|
||||||
get_params_hf_transfo_xl_formula,
|
|
||||||
get_params_mem_transformer_formula)
|
|
||||||
|
|
||||||
MODELS = {
|
|
||||||
'hf_codegen': HfCodeGen,
|
|
||||||
'hf_gpt2': HfGPT2,
|
|
||||||
'hf_gpt2_flex': HfGPT2Flex,
|
|
||||||
'hf_opt': HfOPT,
|
|
||||||
'hf_transfo_xl': HfTransfoXL,
|
|
||||||
'mem_transformer': MemTransformerLM
|
|
||||||
}
|
|
||||||
|
|
||||||
MODELS_CONFIGS = {
|
|
||||||
'hf_codegen': HfCodeGenConfig,
|
|
||||||
'hf_gpt2': HfGPT2Config,
|
|
||||||
'hf_gpt2_flex': HfGPT2FlexConfig,
|
|
||||||
'hf_opt': HfOPTConfig,
|
|
||||||
'hf_transfo_xl': HfTransfoXLConfig,
|
|
||||||
'mem_transformer': MemTransformerLMConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
MODELS_SEARCH_CONFIGS = {
|
|
||||||
'hf_codegen': HfCodeGenSearchConfig,
|
|
||||||
'hf_gpt2': HfGPT2SearchConfig,
|
|
||||||
'hf_gpt2_flex': HfGPT2FlexSearchConfig,
|
|
||||||
'hf_opt': HfOPTSearchConfig,
|
|
||||||
'hf_transfo_xl': HfTransfoXLSearchConfig,
|
|
||||||
'mem_transformer': MemTransformerLMSearchConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
MODELS_PARAMS_FORMULAE = {
|
|
||||||
'hf_codegen': get_params_hf_codegen_formula,
|
|
||||||
'hf_gpt2': get_params_hf_gpt2_formula,
|
|
||||||
'hf_gpt2_flex': get_params_hf_gpt2_flex_formula,
|
|
||||||
'hf_opt': get_params_hf_opt_formula,
|
|
||||||
'hf_transfo_xl': get_params_hf_transfo_xl_formula,
|
|
||||||
'mem_transformer': get_params_mem_transformer_formula
|
|
||||||
}
|
|
||||||
|
|
||||||
ONNX_MODELS = {
|
|
||||||
'hf_gpt2': HfGPT2OnnxModel,
|
|
||||||
'hf_transfo_xl': HfTransfoXLOnnxModel,
|
|
||||||
'mem_transformer': MemTransformerLMOnnxModel
|
|
||||||
}
|
|
||||||
|
|
||||||
ONNX_MODELS_CONFIGS = {
|
|
||||||
'hf_gpt2': HfGPT2OnnxConfig,
|
|
||||||
'hf_transfo_xl': HfTransfoXLOnnxConfig,
|
|
||||||
'mem_transformer': MemTransformerLMOnnxConfig
|
|
||||||
}
|
|
|
@ -1,188 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Functions that allows easy-loading of models and their configurations.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Any, Callable, Dict, Optional, Tuple
|
|
||||||
|
|
||||||
import torch
|
|
||||||
|
|
||||||
from onnxruntime.transformers.onnx_model import OnnxModel
|
|
||||||
|
|
||||||
from archai.nlp.legacy_models.config_base import Config, OnnxConfig, SearchConfig
|
|
||||||
from archai.nlp.legacy_models.model_base import ArchaiModel
|
|
||||||
from archai.nlp.legacy_models.model_dict import (MODELS, MODELS_CONFIGS,
|
|
||||||
MODELS_SEARCH_CONFIGS, MODELS_PARAMS_FORMULAE,
|
|
||||||
ONNX_MODELS, ONNX_MODELS_CONFIGS)
|
|
||||||
|
|
||||||
|
|
||||||
def load_model_formula(model_type: str) -> Callable:
|
|
||||||
"""Loads an available analytical parameters formula.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Callable): Function that analytically calculates parameters.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if model_type not in MODELS_PARAMS_FORMULAE.keys():
|
|
||||||
raise Exception(f'model_type: {model_type} not supported yet.')
|
|
||||||
|
|
||||||
return MODELS_PARAMS_FORMULAE[model_type]
|
|
||||||
|
|
||||||
# Path to the `models` package
|
|
||||||
PACKAGE_PATH = 'archai.nlp.models'
|
|
||||||
|
|
||||||
def load_model_from_config(model_type: str, model_config: Dict[str, Any]) -> ArchaiModel:
|
|
||||||
"""Loads an available model from a configuration dictionary.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
model_config: Configuration of the model that will be created.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(ArchaiModel): An instance of the created model.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if model_type not in MODELS.keys():
|
|
||||||
raise Exception(f'model_type: {model_type} not supported yet.')
|
|
||||||
|
|
||||||
return MODELS[model_type](**model_config)
|
|
||||||
|
|
||||||
|
|
||||||
def load_model_from_checkpoint(model_type: str,
|
|
||||||
checkpoint_path: str,
|
|
||||||
replace_model_config: Optional[Dict[str, Any]] = None,
|
|
||||||
on_cpu: Optional[bool] = False,
|
|
||||||
for_export: Optional[bool] = False
|
|
||||||
) -> Tuple[ArchaiModel, Dict[str, Any], Dict[str, Any]]:
|
|
||||||
"""Loads an available model from a pre-trained checkpoint.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
checkpoint_path: Path to the pre-trained checkpoint.
|
|
||||||
replace_model_config: Model's configuration replacement dictionary.
|
|
||||||
on_cpu: Whether model should be loaded to CPU.
|
|
||||||
for_export: Whether model should be ready for ONNX exporting.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Tuple[ArchaiModel, Dict[str, Any], Dict[str, Any]]): Model, configuration
|
|
||||||
and checkpoint dictionaries.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
device = f'cuda:{torch.cuda.current_device()}' if not on_cpu and torch.cuda.is_available() else torch.device('cpu')
|
|
||||||
|
|
||||||
checkpoint = torch.load(checkpoint_path, map_location=device)
|
|
||||||
model_config = checkpoint['model_config']
|
|
||||||
|
|
||||||
def load_from_args(model_type: str, *args, cls_type: Optional[str] = 'model', **kwargs) -> Any:
|
|
||||||
"""Performs the loading of a pre-defined model and its
|
|
||||||
corresponding class.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of model to be loaded.
|
|
||||||
cls_type: Type of class to be loaded.
|
|
||||||
|
|
||||||
model = load_model_from_config(model_type, model_config)
|
|
||||||
model.load_state_dict(checkpoint['model_state'])
|
|
||||||
model.to(device)
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Gathers the name and index of corresponding type of class
|
|
||||||
cls_type = getattr(ModelClassType, cls_type.upper())
|
|
||||||
cls_type_idx = cls_type.value
|
|
||||||
|
|
||||||
def load_config(model_type: str) -> Config:
|
|
||||||
"""Loads an available default configuration class.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Config): Configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if model_type not in MODELS_CONFIGS.keys():
|
|
||||||
raise Exception(f'model_type: {model_type} not supported yet.')
|
|
||||||
|
|
||||||
return MODELS_CONFIGS[model_type]()
|
|
||||||
|
|
||||||
|
|
||||||
def load_search_config(model_type: str) -> SearchConfig:
|
|
||||||
"""Loads an available search configuration class.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(SearchConfig): Search configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if model_type not in MODELS_SEARCH_CONFIGS.keys():
|
|
||||||
raise Exception(f'model_type: {model_type} not supported yet.')
|
|
||||||
|
|
||||||
return MODELS_SEARCH_CONFIGS[model_type]()
|
|
||||||
|
|
||||||
|
|
||||||
def load_onnx_model(model_type: str, *model_args) -> OnnxModel:
|
|
||||||
"""Loads an available ONNX-based model (used during export optimization).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(OnnxModel): ONNX-based optimization model.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if model_type not in ONNX_MODELS.keys():
|
|
||||||
raise Exception(f'model_type: {model_type} not supported yet.')
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of model to be loaded.
|
|
||||||
checkpoint_path: Path of the checkpoint to be loaded.
|
|
||||||
replace_config: Dictionary with keys that should replace the model's configuration.
|
|
||||||
on_cpu: Whether model should be loaded on CPU or not.
|
|
||||||
for_export: If model should support export or not.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Tuple[torch.nn.Module, Dict[str, Any], Dict[str, Any]]): Model, configuration and checkpoint loaded from a checkpoint path.
|
|
||||||
|
|
||||||
def load_onnx_config(model_type: str, model_config: Dict[str, Any]) -> OnnxConfig:
|
|
||||||
"""Loads an available ONNX-based configuration (used during export).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of the model.
|
|
||||||
model_config: Model's configuration used to supply missing attributes.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(OnnxConfig): ONNX-based configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if model_type not in ONNX_MODELS_CONFIGS.keys():
|
|
||||||
raise Exception(f'model_type: {model_type} not supported yet.')
|
|
||||||
|
|
||||||
# Replaces keys that were provided in the `replace_config` dictionary
|
|
||||||
if replace_config is not None:
|
|
||||||
for k, v in replace_config.items():
|
|
||||||
model_config[k] = v
|
|
||||||
|
|
||||||
# Checks whether model is supposed to be exported
|
|
||||||
if for_export:
|
|
||||||
model_config['use_cache'] = True
|
|
||||||
|
|
||||||
# Loads the model
|
|
||||||
model = cls_instance(**model_config)
|
|
||||||
model.load_state_dict(checkpoint['model_state'])
|
|
||||||
model.to(device)
|
|
||||||
|
|
||||||
return model, model_config, checkpoint
|
|
|
@ -1,132 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Primer-EZ primitives.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
import torch.nn.functional as F
|
|
||||||
|
|
||||||
|
|
||||||
class DWiseConvPrimerEZ(nn.Module):
|
|
||||||
"""Implements the depth-wise convolution according to https://arxiv.org/abs/2109.08668.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
d_model: int,
|
|
||||||
kernel_size: Optional[int] = 3) -> None:
|
|
||||||
"""Overrides the initialization method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
d_model: Dimension of the model.
|
|
||||||
kernel_size: Amount of kernels.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super(DWiseConvPrimerEZ, self).__init__()
|
|
||||||
|
|
||||||
self.kernel_size = kernel_size
|
|
||||||
|
|
||||||
# Depthwise convolution: groups == in_channels
|
|
||||||
self.dconv = nn.Conv1d(d_model*3, d_model*3, kernel_size=kernel_size, groups=d_model*3)
|
|
||||||
|
|
||||||
def forward(self, inp: torch.Tensor) -> torch.Tensor:
|
|
||||||
"""Performs forward pass over the class. Note that the
|
|
||||||
input should have shape [length, batch, features].
|
|
||||||
|
|
||||||
Args:
|
|
||||||
inp: Input tensor.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(torch.Tensor) Output tensor.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def forward(self, inp: torch.Tensor) -> torch.Tensor:
|
|
||||||
# LxBxF -> BxFxL
|
|
||||||
w_heads = inp.permute((1, 2, 0))
|
|
||||||
|
|
||||||
# Pad kernel_size-1 to the left of the length so we have causal convolution (can't look forward)
|
|
||||||
w_heads = F.pad(w_heads, (self.kernel_size-1, 0))
|
|
||||||
w_heads = self.dconv(w_heads)
|
|
||||||
|
|
||||||
# Permute back: BxFxL -> LxBxF
|
|
||||||
w_heads = w_heads.permute((2, 0, 1))
|
|
||||||
|
|
||||||
return w_heads
|
|
||||||
|
|
||||||
|
|
||||||
class PositionwiseFFPrimerEZ(nn.Module):
|
|
||||||
"""Implements the squared ReLU according to https://arxiv.org/abs/2109.08668.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self,
|
|
||||||
d_model: int,
|
|
||||||
d_inner: int,
|
|
||||||
dropout: float,
|
|
||||||
pre_lnorm: Optional[bool] = False) -> None:
|
|
||||||
"""Overrides the initialization method.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
d_model: Dimension of the model.
|
|
||||||
d_inner: Inner dimension of the model.
|
|
||||||
dropout: Dropout ratio.
|
|
||||||
pre_lnorm: Whether to perform layer normalization before or after.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
super(PositionwiseFFPrimerEZ, self).__init__()
|
|
||||||
|
|
||||||
self.d_model = d_model
|
|
||||||
self.d_inner = d_inner
|
|
||||||
self.dropout = dropout
|
|
||||||
|
|
||||||
self.CoreNet1 = nn.Sequential(nn.Linear(d_model, d_inner), nn.ReLU(inplace=True))
|
|
||||||
self.CoreNet2 = nn.Sequential(nn.Dropout(dropout),
|
|
||||||
nn.Linear(d_inner, d_model),
|
|
||||||
nn.Dropout(dropout))
|
|
||||||
|
|
||||||
self.layer_norm = nn.LayerNorm(d_model)
|
|
||||||
|
|
||||||
self.pre_lnorm = pre_lnorm
|
|
||||||
|
|
||||||
def forward(self, inp: torch.Tensor) -> torch.Tensor:
|
|
||||||
"""Performs forward pass over the class.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
inp: Input tensor.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(torch.Tensor) Output tensor.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
if self.pre_lnorm:
|
|
||||||
inp = self.layer_norm(inp)
|
|
||||||
|
|
||||||
core_out = self.CoreNet2(self.CoreNet1(inp) ** 2)
|
|
||||||
|
|
||||||
# Residual connection
|
|
||||||
output = core_out + inp
|
|
||||||
|
|
||||||
if not self.pre_lnorm:
|
|
||||||
output = self.layer_norm(output)
|
|
||||||
|
|
||||||
return output
|
|
||||||
|
|
||||||
|
|
||||||
def forward_hf_gpt2_mlp_primer_ez(self, hidden_states: torch.Tensor) -> torch.Tensor:
|
|
||||||
"""Implements the squared ReLU for Huggingface's Open AI GPT-2 according to https://arxiv.org/abs/2109.08668.
|
|
||||||
|
|
||||||
"""
|
|
||||||
hidden_states = self.c_fc(hidden_states)
|
|
||||||
hidden_states = self.act(hidden_states) ** 2
|
|
||||||
hidden_states = self.c_proj(hidden_states)
|
|
||||||
hidden_states = self.dropout(hidden_states)
|
|
||||||
|
|
||||||
return hidden_states
|
|
|
@ -1,137 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""Handles every ONNX-related export methods.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import json
|
|
||||||
from itertools import chain
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import torch
|
|
||||||
from onnx import helper, load_model, numpy_helper, save
|
|
||||||
|
|
||||||
from archai.nlp.legacy_models.model_loader import load_onnx_config
|
|
||||||
from archai.nlp.compression.onnx.onnx_utils.operators import (tril_onnx,
|
|
||||||
triu_onnx)
|
|
||||||
|
|
||||||
|
|
||||||
def weight_sharing(onnx_model_path: str, model_type: str) -> None:
|
|
||||||
"""Shares weights between embedding and softmax layers.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
onnx_model_path: Path to the ONNX model that will have weights shared.
|
|
||||||
model_type: Type of model to share the weights.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Finds nodes in the graph based on their input name
|
|
||||||
def _find_nodes_by_input(nodes, input_name):
|
|
||||||
return [name for name in nodes.keys() if input_name in nodes[name].input]
|
|
||||||
|
|
||||||
# Finds weights in the graph based on their shape
|
|
||||||
def _find_weights_by_shape(weights, shape):
|
|
||||||
return [name for name in weights.keys() if numpy_helper.to_array(weights[name]).shape == shape]
|
|
||||||
|
|
||||||
# Loads the ONNX model
|
|
||||||
model = load_model(onnx_model_path)
|
|
||||||
|
|
||||||
# Gathers weights and nodes from the loaded model
|
|
||||||
weights = {w.name:w for w in model.graph.initializer}
|
|
||||||
nodes = {n.name:n for n in model.graph.node}
|
|
||||||
|
|
||||||
if model_type in ['hf_gpt2', 'hf_gpt2_flex']:
|
|
||||||
n_emb_weight = 1
|
|
||||||
n_cutoffs = 0
|
|
||||||
elif model_type == 'mem_transformer':
|
|
||||||
n_emb_weight = len(list(filter(lambda x: 'word_emb.emb_layers' in x, weights.keys())))
|
|
||||||
n_cutoffs = n_emb_weight - 1
|
|
||||||
else:
|
|
||||||
raise ValueError(f'Model {model_type} not supported for weight sharing.')
|
|
||||||
|
|
||||||
for i in range(n_emb_weight):
|
|
||||||
# Grabs the embedding weights pointer and removes from the graph
|
|
||||||
emb_weight_name = f'word_emb.emb_layers.{i}.weight'
|
|
||||||
if model_type == 'hf_gpt2':
|
|
||||||
emb_weight_name = 'transformer.wte.weight'
|
|
||||||
|
|
||||||
emb_weight = numpy_helper.to_array(weights[emb_weight_name])
|
|
||||||
model.graph.initializer.remove(weights[emb_weight_name])
|
|
||||||
|
|
||||||
# Replaces the duplicated embedding weights by the softmax ones
|
|
||||||
softmax_shape = (emb_weight.shape[1], emb_weight.shape[0])
|
|
||||||
if i == 0:
|
|
||||||
softmax_shape = (emb_weight.shape[1], emb_weight.shape[0] + n_cutoffs)
|
|
||||||
softmax_weight = _find_weights_by_shape(weights, softmax_shape)[0]
|
|
||||||
emb_gather_name = _find_nodes_by_input(nodes, emb_weight_name)[0]
|
|
||||||
nodes[emb_gather_name].attribute.append(helper.make_attribute('axis', 1))
|
|
||||||
nodes[emb_gather_name].input[0] = softmax_weight
|
|
||||||
|
|
||||||
# Adds a "Transpose" node to invert the new embedding weights
|
|
||||||
permute_dim = [1, 2, 0]
|
|
||||||
if n_cutoffs != 0:
|
|
||||||
permute_dim = [1, 0, 2]
|
|
||||||
emb_gather_output = nodes[emb_gather_name].output[0]
|
|
||||||
transpose_node_output = f'transposed_out_{i}'
|
|
||||||
transpose_node = helper.make_node('Transpose', [emb_gather_output], [transpose_node_output], perm=permute_dim)
|
|
||||||
model.graph.node.append(transpose_node)
|
|
||||||
|
|
||||||
# Links the previous embedding output with the "Transpose" node
|
|
||||||
emb_gather = _find_nodes_by_input(nodes, emb_gather_output)[0]
|
|
||||||
nodes[emb_gather].input[0] = transpose_node_output
|
|
||||||
|
|
||||||
# Saves the ONNX model
|
|
||||||
save(model, onnx_model_path)
|
|
||||||
|
|
||||||
|
|
||||||
def export_onnx_from_torch(model: torch.nn.Module,
|
|
||||||
model_config: dict,
|
|
||||||
model_type: str,
|
|
||||||
onnx_model_path: str,
|
|
||||||
share_weights: Optional[bool] = True,
|
|
||||||
do_constant_folding: Optional[bool] = True,
|
|
||||||
opset_version: Optional[int] = 11) -> None:
|
|
||||||
"""Exports a PyTorch-based model to ONNX.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model: Input model.
|
|
||||||
model_config: Model configuration.
|
|
||||||
model_type: Type of model to be exported.
|
|
||||||
onnx_model_path: Path to the output ONNX model file.
|
|
||||||
share_weights: Whether embedding/softmax weights should be shared.
|
|
||||||
do_constant_folding: Whether to apply constant folding.
|
|
||||||
opset_version: Version of the operators set.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Gathers the proper ONNX configuration instance
|
|
||||||
onnx_config = load_from_args(model_type,
|
|
||||||
cls_type='onnx_config',
|
|
||||||
model_config=model_config)
|
|
||||||
|
|
||||||
# Creates the dynamic axes based on inputs and outputs
|
|
||||||
dynamic_axes = {name: axes for name, axes in chain(onnx_config.inputs.items(), onnx_config.outputs.items())}
|
|
||||||
|
|
||||||
# Applies a caveat to use unsupported triu/tril by PyTorch
|
|
||||||
torch.triu = triu_onnx
|
|
||||||
torch.tril = tril_onnx
|
|
||||||
|
|
||||||
# Exports model to ONNX
|
|
||||||
torch.onnx.export(model,
|
|
||||||
(onnx_config.mockups,),
|
|
||||||
onnx_model_path,
|
|
||||||
input_names=list(onnx_config.inputs.keys()),
|
|
||||||
output_names=list(onnx_config.outputs.keys()),
|
|
||||||
dynamic_axes=dynamic_axes,
|
|
||||||
do_constant_folding=do_constant_folding,
|
|
||||||
opset_version=opset_version)
|
|
||||||
|
|
||||||
# Exports configuration to JSON
|
|
||||||
config_path = Path(onnx_model_path).parent / 'config.json'
|
|
||||||
with open(config_path, 'w') as f:
|
|
||||||
json.dump(onnx_config.config.to_dict(), f)
|
|
||||||
|
|
||||||
# Applies weight sharing
|
|
||||||
if share_weights:
|
|
||||||
weight_sharing(onnx_model_path, model_type)
|
|
|
@ -1,144 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""ONNX-loading utilities that enable exports.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import copy
|
|
||||||
import types
|
|
||||||
from os import environ
|
|
||||||
from typing import Any, Dict, Sized, Tuple
|
|
||||||
|
|
||||||
from onnxruntime import (GraphOptimizationLevel, InferenceSession,
|
|
||||||
SessionOptions)
|
|
||||||
from onnxruntime.transformers import quantize_helper
|
|
||||||
|
|
||||||
from archai.nlp.legacy_models.model_loader import load_model_from_checkpoint, load_model_from_config
|
|
||||||
from archai.nlp.compression.onnx.onnx_utils.forward import (crit_forward_mem_transformer_onnx,
|
|
||||||
forward_hf_gpt2_onnx,
|
|
||||||
forward_mem_transformer_onnx)
|
|
||||||
from archai.nlp.legacy_models.model_base import ArchaiModel
|
|
||||||
|
|
||||||
# ONNX-loading constants
|
|
||||||
OMP_NUM_THREADS = 1
|
|
||||||
OMP_WAIT_POLICY = 'ACTIVE'
|
|
||||||
|
|
||||||
# Constants available in onnxruntime
|
|
||||||
# that enables performance optimization
|
|
||||||
environ['OMP_NUM_THREADS'] = str(OMP_NUM_THREADS)
|
|
||||||
environ['OMP_WAIT_POLICY'] = OMP_WAIT_POLICY
|
|
||||||
|
|
||||||
|
|
||||||
def load_from_onnx(onnx_model_path: str) -> InferenceSession:
|
|
||||||
"""Loads an ONNX-based model from file.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
onnx_model_path: Path to the ONNX model file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(InferenceSession): ONNX inference session.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Defines the ONNX loading options
|
|
||||||
options = SessionOptions()
|
|
||||||
options.intra_op_num_threads = OMP_NUM_THREADS
|
|
||||||
options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
|
|
||||||
|
|
||||||
# Creates an inference session
|
|
||||||
session = InferenceSession(onnx_model_path, options)
|
|
||||||
session.disable_fallback()
|
|
||||||
|
|
||||||
return session
|
|
||||||
|
|
||||||
|
|
||||||
def _prepare_export(model: ArchaiModel,
|
|
||||||
model_config: Dict[str, Any],
|
|
||||||
model_type: str) -> ArchaiModel:
|
|
||||||
"""Prepares a PyTorch model with export-ready.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model: PyTorch model.
|
|
||||||
model_config: Model configuration.
|
|
||||||
model_type: Type of model.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(ArchaiModel): Export-ready PyTorch model.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Overrides forward functions if MemTransformerLM
|
|
||||||
if model_type == 'mem_transformer':
|
|
||||||
model.forward = types.MethodType(forward_mem_transformer_onnx, model)
|
|
||||||
model.crit.forward = types.MethodType(crit_forward_mem_transformer_onnx, model.crit)
|
|
||||||
|
|
||||||
# Overrides forward functions if HfGPT2
|
|
||||||
if model_type == 'hf_gpt2':
|
|
||||||
model = model.model
|
|
||||||
model.forward = types.MethodType(forward_hf_gpt2_onnx, model)
|
|
||||||
|
|
||||||
for layer in model.transformer.h:
|
|
||||||
quantize_helper.conv1d_to_linear(layer.mlp)
|
|
||||||
|
|
||||||
if isinstance(model_config['d_head'], Sized):
|
|
||||||
model_config['d_head'] = model_config['d_head'][0]
|
|
||||||
if isinstance(model_config['n_head'], Sized):
|
|
||||||
model_config['n_head'] = model_config['n_head'][0]
|
|
||||||
if model_config['d_head'] < 0:
|
|
||||||
model_config['d_head'] = model_config['d_model'] // model_config['n_head']
|
|
||||||
|
|
||||||
# Puts to evaluation model to disable dropout
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
return model, model_config
|
|
||||||
|
|
||||||
|
|
||||||
def load_from_config_for_export(model_type: str,
|
|
||||||
model_config: Dict[str, Any]) -> Tuple[ArchaiModel, Dict[str, Any]]:
|
|
||||||
"""Loads a PyTorch-based model from configuration with export-ready.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of model to be loaded.
|
|
||||||
model_config: Model configuration.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(ArchaiModel, Dict[str, Any]): Export-ready PyTorch model and its configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Copies model's configuration to prevent changing the original one
|
|
||||||
export_model_config = copy.deepcopy(model_config)
|
|
||||||
export_model_config['use_cache'] = True
|
|
||||||
|
|
||||||
# Loads the model from configuration
|
|
||||||
model = load_model_from_config(model_type, export_model_config)
|
|
||||||
|
|
||||||
# Prepares the model for export
|
|
||||||
model, export_model_config = _prepare_export(model, export_model_config, model_type)
|
|
||||||
|
|
||||||
return model, export_model_config
|
|
||||||
|
|
||||||
|
|
||||||
def load_from_torch_for_export(model_type: str,
|
|
||||||
torch_model_path: str) -> Tuple[ArchaiModel, Dict[str, Any]]:
|
|
||||||
"""Loads a PyTorch-based model from checkpoint with export-ready.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of model to be loaded.
|
|
||||||
torch_model_path: Path to the PyTorch model/checkpoint file.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(ArchaiModel, Dict[str, Any]): Export-ready PyTorch model and its configuration.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
# Loads the model
|
|
||||||
model, model_config, _ = load_model_from_checkpoint(model_type,
|
|
||||||
torch_model_path,
|
|
||||||
on_cpu=True,
|
|
||||||
for_export=True)
|
|
||||||
|
|
||||||
# Prepares the model for export
|
|
||||||
model, model_config = _prepare_export(model, model_config, model_type)
|
|
||||||
|
|
||||||
return model, model_config
|
|
|
@ -1,95 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
"""ONNX-related optimization helpers and utilities.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from onnx import load_model
|
|
||||||
from onnxruntime.transformers.optimizer import optimize_by_onnxruntime
|
|
||||||
|
|
||||||
from archai.common.utils import create_file_name_identifier
|
|
||||||
from archai.nlp.legacy_models.model_loader import load_onnx_model
|
|
||||||
from archai.nlp.compression.onnx.onnx_utils.fusion_options import FusionOptions
|
|
||||||
|
|
||||||
|
|
||||||
def optimize_onnx(model_type: str,
|
|
||||||
onnx_model_path: str,
|
|
||||||
num_heads: Optional[int] = 8,
|
|
||||||
use_gpu: Optional[bool] = False,
|
|
||||||
opt_level: Optional[int] = 0,
|
|
||||||
only_ort: Optional[bool] = False,
|
|
||||||
float16: Optional[bool] = False,
|
|
||||||
input_int32: Optional[bool] = False) -> Path:
|
|
||||||
"""Optimizes an ONNX model.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
model_type: Type of model to be optimized.
|
|
||||||
onnx_model_path: Path to the ONNX model to be optimized.
|
|
||||||
num_heads: Number of attention heads.
|
|
||||||
use_gpu: Whether to use GPU during optimization.
|
|
||||||
opt_level: Level of optimization.
|
|
||||||
only_ort: Whether to only apply ORT optimization.
|
|
||||||
float16: Whether to use graph with float16.
|
|
||||||
input_int32: Whether to use inputs with int32.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
(Path): Path to the optimized ONNX model.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
assert opt_level in [0, 1, 2, 99]
|
|
||||||
ort_model_path = None
|
|
||||||
|
|
||||||
# Applies standard ORT-based optimization
|
|
||||||
if opt_level > 0:
|
|
||||||
disabled_optimizers = []
|
|
||||||
|
|
||||||
if opt_level > 1:
|
|
||||||
# Disables some optimizers that might influence shape inference/attention fusion.
|
|
||||||
if not only_ort:
|
|
||||||
disabled_optimizers = ['MatMulScaleFusion', 'MatMulAddFusion'
|
|
||||||
'SimplifiedLayerNormFusion', 'GemmActivationFusion',
|
|
||||||
'BiasSoftmaxFusion']
|
|
||||||
|
|
||||||
# Performs the standard ORT optimization
|
|
||||||
ort_model_path = create_file_name_identifier(Path(onnx_model_path), '_ort')
|
|
||||||
optimize_by_onnxruntime(onnx_model_path,
|
|
||||||
use_gpu=use_gpu,
|
|
||||||
optimized_model_path=str(ort_model_path),
|
|
||||||
opt_level=opt_level,
|
|
||||||
disabled_optimizers=disabled_optimizers)
|
|
||||||
|
|
||||||
# Applies additional transformer-based optimization
|
|
||||||
if not only_ort:
|
|
||||||
# Loads the ORT-optimized model, optimizer and fusion options
|
|
||||||
ort_model = load_model(ort_model_path or onnx_model_path)
|
|
||||||
ort_model_path = create_file_name_identifier(Path(onnx_model_path), '_opt')
|
|
||||||
|
|
||||||
# Puts the arguments for the optimizer
|
|
||||||
optimizer_args = (ort_model, )
|
|
||||||
if model_type == 'hf_gpt2':
|
|
||||||
# Adds `hidden_size` as zero just for retro-compatibility
|
|
||||||
optimizer_args += (num_heads, 0)
|
|
||||||
|
|
||||||
optimizer = load_from_args(model_type, *optimizer_args, cls_type='onnx_model')
|
|
||||||
options = FusionOptions(model_type)
|
|
||||||
|
|
||||||
# Optimizes the model
|
|
||||||
optimizer.optimize(options)
|
|
||||||
|
|
||||||
# Applies float16 to the model
|
|
||||||
if float16:
|
|
||||||
ort_model_path = create_file_name_identifier(Path(onnx_model_path), '_opt_fp16')
|
|
||||||
optimizer.convert_float_to_float16(keep_io_types=True)
|
|
||||||
|
|
||||||
# Applies int32 to the model inputs
|
|
||||||
if input_int32:
|
|
||||||
optimizer.change_graph_inputs_to_int32()
|
|
||||||
|
|
||||||
# Saves the model to file
|
|
||||||
optimizer.save_model_to_file(str(ort_model_path))
|
|
||||||
|
|
||||||
return ort_model_path
|
|
1349
archai/nlp/train.py
1349
archai/nlp/train.py
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,145 +0,0 @@
|
||||||
import random
|
|
||||||
from typing import List
|
|
||||||
from numpy import dtype
|
|
||||||
from overrides.overrides import overrides
|
|
||||||
|
|
||||||
import torch
|
|
||||||
import torch_geometric
|
|
||||||
|
|
||||||
from archai.nas.arch_meta import ArchWithMetaData
|
|
||||||
from archai.nas.discrete_search_space import EncodableDiscreteSearchSpace
|
|
||||||
from archai.algos.natsbench.natsbench_utils import create_natsbench_tss_api, model_from_natsbench_tss
|
|
||||||
from archai.algos.natsbench.lib.models.cell_searchs import CellStructure
|
|
||||||
|
|
||||||
class DiscreteSearchSpaceNatsbenchTSS(EncodableDiscreteSearchSpace):
|
|
||||||
def __init__(self, datasetname:str, natsbench_location:str):
|
|
||||||
super().__init__()
|
|
||||||
self.datasetname = datasetname
|
|
||||||
self.natsbench_location = natsbench_location
|
|
||||||
|
|
||||||
# Natsbench TSS ops bag
|
|
||||||
self.OPS = ['avg_pool_3x3', 'nor_conv_1x1', 'nor_conv_3x3', 'none', 'skip_connect']
|
|
||||||
|
|
||||||
# create the natsbench api
|
|
||||||
self.api = create_natsbench_tss_api(self.natsbench_location)
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def random_sample(self)->ArchWithMetaData:
|
|
||||||
''' Uniform random sample an architecture '''
|
|
||||||
curr_archid = random.sample(range(len(self.api)), k=1)[0]
|
|
||||||
|
|
||||||
# if not in cache actually evaluate it
|
|
||||||
model = model_from_natsbench_tss(curr_archid, self.datasetname, self.api)
|
|
||||||
|
|
||||||
meta_data = {
|
|
||||||
'datasetname': self.datasetname,
|
|
||||||
'archid': curr_archid
|
|
||||||
}
|
|
||||||
arch_meta = ArchWithMetaData(model, meta_data)
|
|
||||||
return arch_meta
|
|
||||||
|
|
||||||
|
|
||||||
def get_training_accuracy_at_n_epoch(self,
|
|
||||||
archid:int,
|
|
||||||
datasetname:str,
|
|
||||||
epoch:int)->float:
|
|
||||||
data = self.api.query_by_index(archid, dataname=datasetname, hp='200')
|
|
||||||
train_top1s = []
|
|
||||||
for _, v in data.items():
|
|
||||||
train_top1s.append(v.train_acc1es[epoch])
|
|
||||||
|
|
||||||
avg_train_top1s = sum(train_top1s)/len(train_top1s)
|
|
||||||
return avg_train_top1s
|
|
||||||
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def get_neighbors(self, arch: ArchWithMetaData) -> List[ArchWithMetaData]:
|
|
||||||
''' Reused from https://github.com/naszilla/naszilla/blob/master/naszilla/nas_bench_201/cell_201.py '''
|
|
||||||
# first get the string representation of the current architecture
|
|
||||||
archid = arch.metadata['archid']
|
|
||||||
string_rep = self.api.get_net_config(archid, self.datasetname)['arch_str']
|
|
||||||
nbhd_strs = []
|
|
||||||
ops = self._get_op_list(string_rep)
|
|
||||||
for i in range(len(ops)):
|
|
||||||
available = [op for op in self.OPS if op != ops[i]]
|
|
||||||
for op in available:
|
|
||||||
new_ops = ops.copy()
|
|
||||||
new_ops[i] = op
|
|
||||||
new_arch_str = self._get_string_from_ops(new_ops)
|
|
||||||
nbhd_strs.append(new_arch_str)
|
|
||||||
|
|
||||||
# convert the arch strings to architecture ids
|
|
||||||
nbhd_archs = []
|
|
||||||
for arch_str in nbhd_strs:
|
|
||||||
this_archid = self.api.archstr2index[arch_str]
|
|
||||||
model = model_from_natsbench_tss(this_archid, self.datasetname, self.api)
|
|
||||||
meta_data = {
|
|
||||||
'datasetname': self.datasetname,
|
|
||||||
'archid': this_archid
|
|
||||||
}
|
|
||||||
arch_meta = ArchWithMetaData(model, meta_data)
|
|
||||||
nbhd_archs.append(arch_meta)
|
|
||||||
return nbhd_archs
|
|
||||||
|
|
||||||
|
|
||||||
def _get_op_list(self, string:str)->List[str]:
|
|
||||||
''' Reused from https://github.com/naszilla/naszilla/blob/master/naszilla/nas_bench_201/cell_201.py '''
|
|
||||||
# given a string, get the list of operations
|
|
||||||
tokens = string.split('|')
|
|
||||||
ops = [t.split('~')[0] for i,t in enumerate(tokens) if i not in [0,2,5,9]]
|
|
||||||
return ops
|
|
||||||
|
|
||||||
|
|
||||||
def _get_string_from_ops(self, ops):
|
|
||||||
''' Reused from https://github.com/naszilla/naszilla/blob/master/naszilla/nas_bench_201/cell_201.py '''
|
|
||||||
# given a list of operations, get the string
|
|
||||||
strings = ['|']
|
|
||||||
nodes = [0, 0, 1, 0, 1, 2]
|
|
||||||
for i, op in enumerate(ops):
|
|
||||||
strings.append(op+'~{}|'.format(nodes[i]))
|
|
||||||
if i < len(nodes) - 1 and nodes[i+1] == 0:
|
|
||||||
strings.append('+|')
|
|
||||||
return ''.join(strings)
|
|
||||||
|
|
||||||
@overrides
|
|
||||||
def get_arch_repr(self, arch: ArchWithMetaData) -> torch_geometric.data.Data:
|
|
||||||
string_rep = self.api.get_net_config(
|
|
||||||
arch.metadata['archid'], self.datasetname
|
|
||||||
)['arch_str']
|
|
||||||
|
|
||||||
return self.get_arch_repr_from_archstr(string_rep)
|
|
||||||
|
|
||||||
def get_arch_repr_from_archstr(self, string_rep: str) -> torch_geometric.data.Data:
|
|
||||||
model_arch = list(CellStructure.str2fullstructure(string_rep).nodes)
|
|
||||||
model_arch.insert(0, (('input', None),))
|
|
||||||
onehot = lambda x: [int(op == x) for op in self.OPS + ['input', 'output']]
|
|
||||||
|
|
||||||
# Node features and edges
|
|
||||||
node_features, edges = [], []
|
|
||||||
node_names = {}
|
|
||||||
|
|
||||||
for out_level, out_level_nodes in enumerate(model_arch):
|
|
||||||
node_names[out_level] = []
|
|
||||||
|
|
||||||
for op, in_level in out_level_nodes:
|
|
||||||
out_node = len(node_features)
|
|
||||||
|
|
||||||
if in_level is not None:
|
|
||||||
edges += [
|
|
||||||
[in_node, out_node] for in_node in node_names[in_level]
|
|
||||||
]
|
|
||||||
|
|
||||||
node_names[out_level].append(out_node)
|
|
||||||
node_features.append(onehot(op))
|
|
||||||
|
|
||||||
# Adds output node info
|
|
||||||
edges += [
|
|
||||||
[in_node, len(node_features)] for in_node in node_names[out_level]
|
|
||||||
]
|
|
||||||
node_features.append(onehot('output'))
|
|
||||||
|
|
||||||
# Returns torch_geometric.data.Data object
|
|
||||||
return torch_geometric.data.Data(
|
|
||||||
x=torch.tensor(node_features, dtype=torch.float),
|
|
||||||
edge_index=torch.tensor(edges, dtype=torch.long).T
|
|
||||||
)
|
|
|
@ -1,113 +0,0 @@
|
||||||
import sys
|
|
||||||
import math
|
|
||||||
from typing import List, Dict, Tuple, Optional
|
|
||||||
from functools import lru_cache
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
from torch_geometric.data import Data as GraphData
|
|
||||||
|
|
||||||
|
|
||||||
def get_graph_ngrams(graph: GraphData, n: int = 3,
|
|
||||||
node_features: Optional[List[int]] = None,
|
|
||||||
output_node_only: bool = False) -> List[Tuple[Tuple]]:
|
|
||||||
"""Lists all node n-grams from a torch_geometric graph
|
|
||||||
|
|
||||||
Args:
|
|
||||||
graph (torch_geometric.data.Data): Torch geometric graph
|
|
||||||
node_features (List[int]): List of node attributes that should be considered.
|
|
||||||
If None, all node attributes are considered.
|
|
||||||
n (int, optional): n-gram length.
|
|
||||||
output_node_only (bool, optional): If all of the listed node n-grams should end in
|
|
||||||
the output node. Defaults to False.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[Tuple[Tuple]]: List of node features n-grams (tuples of tuples)
|
|
||||||
"""
|
|
||||||
# Converts the edge list to a node dict
|
|
||||||
edges = graph.edge_index.T.numpy().tolist()
|
|
||||||
node_features = graph.x.numpy()[:, node_features] if node_features else graph.x.numpy()
|
|
||||||
|
|
||||||
graph_dict = {
|
|
||||||
node: {
|
|
||||||
'inputs': [],
|
|
||||||
'features': node_features[node]
|
|
||||||
} for node in range(graph.num_nodes)
|
|
||||||
}
|
|
||||||
|
|
||||||
for in_node, out_node in edges:
|
|
||||||
graph_dict[out_node]['inputs'].append(in_node)
|
|
||||||
|
|
||||||
@lru_cache(maxsize=20_000)
|
|
||||||
def _ngrams_ending_in(node_id: int, n: int):
|
|
||||||
node = graph_dict[node_id]
|
|
||||||
features = [tuple(node['features'].tolist())]
|
|
||||||
|
|
||||||
if n == 1 or (output_node_only and node_id == 0):
|
|
||||||
return [features]
|
|
||||||
|
|
||||||
if node['inputs'] is None and not output_node_only:
|
|
||||||
return [None]
|
|
||||||
|
|
||||||
return [
|
|
||||||
path + features
|
|
||||||
for p_node in node['inputs']
|
|
||||||
for path in _ngrams_ending_in(p_node, n-1)
|
|
||||||
if path
|
|
||||||
]
|
|
||||||
|
|
||||||
if output_node_only:
|
|
||||||
return [tuple(p) for p in _ngrams_ending_in(len(node_features) - 1, n)]
|
|
||||||
|
|
||||||
return [
|
|
||||||
tuple(path)
|
|
||||||
for terminal_node in graph_dict
|
|
||||||
for path in _ngrams_ending_in(terminal_node, n)
|
|
||||||
if path
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def get_graph_paths(graph: GraphData, node_features: Optional[List[int]] = None) -> List[Tuple[Tuple]]:
|
|
||||||
"""Lists all paths from a architecture graph.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
graph (torch_geometric.data.Data): Torch geometric graph
|
|
||||||
node_features (List[int]): List of node attributes that should be considered.
|
|
||||||
If None, all node attributes are considered.
|
|
||||||
the output node. Defaults to False.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[Tuple[Tuple]]: List of node features n-grams (tuples of tuples)
|
|
||||||
"""
|
|
||||||
return get_graph_ngrams(
|
|
||||||
graph, n=sys.maxsize, node_features=node_features,
|
|
||||||
output_node_only=True
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def graph_ngram_cossim(graph1: Dict, graph2: Dict, node_vars: List[str],
|
|
||||||
n: int, output_node_only: bool = False):
|
|
||||||
x, y = [get_graph_ngrams(g, node_vars, n, output_node_only) for g in [graph1, graph2]]
|
|
||||||
x, y = set(x), set(y)
|
|
||||||
norm_x, norm_y = math.sqrt(len(x)), math.sqrt(len(y))
|
|
||||||
|
|
||||||
return (
|
|
||||||
len(x.intersection(y)) / (norm_x * norm_y)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def graph_path_cossim(graph1: Dict, graph2: Dict, node_vars: List[str]):
|
|
||||||
return graph_ngram_cossim(graph1, graph2, node_vars, sys.maxsize, True)
|
|
||||||
|
|
||||||
|
|
||||||
def graph_ngram_jaccard(graph1: Dict, graph2: Dict, node_vars: List[str],
|
|
||||||
n: int, output_node_only: bool = False):
|
|
||||||
x, y = [get_graph_ngrams(g, node_vars, n, output_node_only) for g in [graph1, graph2]]
|
|
||||||
x, y = set(x), set(y)
|
|
||||||
|
|
||||||
return (
|
|
||||||
len(x.intersection(y)) / (len(x) + len(y) - len(x.intersection(y)))
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def graph_path_jaccard(graph1: Dict, graph2: Dict, node_vars: List[str]):
|
|
||||||
return graph_ngram_jaccard(graph1, graph2, node_vars, sys.maxsize, True)
|
|
|
@ -1,47 +0,0 @@
|
||||||
__include__: 'darts.yaml' # just use darts defaults
|
|
||||||
|
|
||||||
common:
|
|
||||||
checkpoint:
|
|
||||||
freq: -1 # essentially disable checkpointin to speedup download
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2 # number of edges each node will take input from
|
|
||||||
|
|
||||||
eval:
|
|
||||||
model_filename: False # disable model saving to speedup download
|
|
||||||
nasbench101:
|
|
||||||
arch_index: 0
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2
|
|
||||||
loader:
|
|
||||||
train_batch: 256
|
|
||||||
aug: '' # in natsbench paper they use random flip and crop which is part of standard transforms
|
|
||||||
trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: '_copy: /nas/eval/model_desc/aux_weight'
|
|
||||||
drop_path_prob: 0.0 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 108
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.2 # init learning rate
|
|
||||||
decay: 1.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: False # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
|
@ -1,47 +0,0 @@
|
||||||
__include__: 'darts.yaml' # just use darts defaults
|
|
||||||
|
|
||||||
common:
|
|
||||||
checkpoint:
|
|
||||||
freq: -1 # essentially disable checkpointin to speedup download
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2 # number of edges each node will take input from
|
|
||||||
|
|
||||||
eval:
|
|
||||||
model_filename: False # disable model saving to speedup download
|
|
||||||
nasbench101:
|
|
||||||
arch_index: 0
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2
|
|
||||||
loader:
|
|
||||||
train_batch: 256
|
|
||||||
aug: '' # in natsbench paper they use random flip and crop which is part of standard transforms
|
|
||||||
trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: '_copy: /nas/eval/model_desc/aux_weight'
|
|
||||||
drop_path_prob: 0.0 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 108
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.2 # init learning rate
|
|
||||||
decay: 1.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: False # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
|
@ -26,10 +26,6 @@ nas:
|
||||||
cell:
|
cell:
|
||||||
n_nodes: 5 # number of nodes in a cell if template desc is not provided
|
n_nodes: 5 # number of nodes in a cell if template desc is not provided
|
||||||
cell_post_op: 'proj_channels'
|
cell_post_op: 'proj_channels'
|
||||||
loader:
|
|
||||||
train_batch: 64
|
|
||||||
trainer:
|
|
||||||
epochs: 600
|
|
||||||
petridish:
|
petridish:
|
||||||
cell_count_scale: 1.0 # for eval first multiply number of cells used in search by this factor, limit to n_cells
|
cell_count_scale: 1.0 # for eval first multiply number of cells used in search by this factor, limit to n_cells
|
||||||
trainer:
|
trainer:
|
||||||
|
|
|
@ -1,28 +0,0 @@
|
||||||
__include__: 'darts.yaml' # just use darts defaults
|
|
||||||
|
|
||||||
#primitives: ['max_pool_3x3', 'avg_pool_3x3', 'skip_connect', 'sep_conv_3x3', 'sep_conv_5x5', 'dil_conv_3x3', 'dil_conv_5x5']
|
|
||||||
|
|
||||||
primitives: ['max_pool_3x3', 'avg_pool_3x3', 'skip_connect']
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2 # number of edges each node will take input from
|
|
||||||
|
|
||||||
eval:
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2
|
|
||||||
trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
epochs: 600
|
|
||||||
aux_weight: 0.0
|
|
||||||
drop_path_prob: 0.0
|
|
||||||
proxynas:
|
|
||||||
val_top1_acc_threshold: 0.85 # after some accuracy we will shift into training only the last layer
|
|
||||||
freeze_epochs: 200
|
|
||||||
freeze_lr: 0.001
|
|
||||||
freeze_decay: 0.0
|
|
||||||
freeze_momentum: 0.0
|
|
||||||
train_regular: True
|
|
||||||
identifiers_to_unfreeze: ['logits_op._op']
|
|
||||||
aux_weight: 0.0 # disable auxiliary loss part during finetuning
|
|
|
@ -1,62 +0,0 @@
|
||||||
__include__: 'darts.yaml' # just use darts defaults
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2 # number of edges each node will take input from
|
|
||||||
|
|
||||||
eval:
|
|
||||||
dartsspace:
|
|
||||||
arch_index: 66
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2
|
|
||||||
n_cells: 8
|
|
||||||
loader:
|
|
||||||
val_ratio: 0.0
|
|
||||||
train_batch: 96
|
|
||||||
freeze_loader:
|
|
||||||
train_batch: 96 # batch size for freeze training
|
|
||||||
trainer:
|
|
||||||
use_val: False
|
|
||||||
plotsdir: ''
|
|
||||||
epochs: 20
|
|
||||||
top1_acc_threshold: 0.60 # after some accuracy we will shift into training only the last 'n' layers
|
|
||||||
train_regular: True # if False the full regular training of the architecture will be bypassed
|
|
||||||
train_fear: False
|
|
||||||
|
|
||||||
freeze_trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
bypass_freeze: False
|
|
||||||
identifiers_to_unfreeze: ['logits_op._op', 'cells.7', 'cells.6'] # last few layer names in DARTS space are 'logits_op._op': Linear, 'cells.19': prefix for all cell 19 parameters
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
drop_path_prob: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 10
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.025 # init learning rate
|
|
||||||
decay: 3.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: False # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.001 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'eval_test'
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
logger_freq: 0
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
|
@ -1,88 +0,0 @@
|
||||||
__include__: 'darts.yaml' # just use darts defaults
|
|
||||||
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2 # number of edges each node will take input from
|
|
||||||
|
|
||||||
eval:
|
|
||||||
natsbench:
|
|
||||||
arch_index: 288
|
|
||||||
natsbench_tss_fast: 'NATS-tss-v1_0-3ffb9-simple' # folder name in dataroot/natsbench that contains the tss fast mode folder
|
|
||||||
model_desc:
|
|
||||||
num_edges_to_sample: 2
|
|
||||||
loader:
|
|
||||||
val_ratio: 0.2
|
|
||||||
train_batch: 256
|
|
||||||
aug: '' # in natsbench paper they use random flip and crop, which are part of the regular transforms
|
|
||||||
naswotrain:
|
|
||||||
train_batch: 256 # batch size for computing trainingless score
|
|
||||||
freeze_loader:
|
|
||||||
train_batch: 512 # batch size for freeze training. 2048 works reliably on V100 with cell13 onwards unfrozen
|
|
||||||
trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
use_val: False
|
|
||||||
top1_acc_threshold: 0.2 # after some accuracy we will shift into training only the last 'n' layers
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: '_copy: /nas/eval/model_desc/aux_weight'
|
|
||||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 200
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: True # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
|
|
||||||
freeze_trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
bypass_freeze: True # if true will not freeze anything. identifiers_to_unfreeze has no effect.
|
|
||||||
identifiers_to_unfreeze: ['classifier', 'lastact', 'cells.16', 'cells.15', 'cells.14', 'cells.13'] # last few layer names in natsbench: lastact, lastact.0, lastact.1: BN-Relu, global_pooling: global avg. pooling (doesn't get exposed as a named param though), classifier: linear layer
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
drop_path_prob: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 5
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: True # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'eval_test'
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
logger_freq: 0
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
|
@ -1,162 +0,0 @@
|
||||||
__include__: "../datasets/cifar10.yaml" # default dataset settings are for cifar
|
|
||||||
|
|
||||||
common:
|
|
||||||
experiment_name: 'throwaway' # you should supply from command line
|
|
||||||
experiment_desc: 'throwaway'
|
|
||||||
logdir: '~/logdir'
|
|
||||||
log_prefix: 'log' # prefix for log files that will becreated (log.log and log.yaml), no log files if ''
|
|
||||||
log_level: 20 # logging.INFO
|
|
||||||
backup_existing_log_file: False # should we overwrite existing log file without making a copy?
|
|
||||||
yaml_log: True # if True, structured logs as yaml are also generated
|
|
||||||
seed: 2.0
|
|
||||||
tb_enable: False # if True then TensorBoard logging is enabled (may impact perf)
|
|
||||||
tb_dir: '$expdir/tb' # path where tensorboard logs would be stored
|
|
||||||
checkpoint:
|
|
||||||
filename: '$expdir/checkpoint.pth'
|
|
||||||
freq: 10
|
|
||||||
|
|
||||||
# reddis address of Ray cluster. Use None for single node run
|
|
||||||
# otherwise it should something like host:6379. Make sure to run on head node:
|
|
||||||
# "ray start --head --redis-port=6379"
|
|
||||||
redis: null
|
|
||||||
apex: # this is overriden in search and eval individually
|
|
||||||
enabled: False # global switch to disable everything apex
|
|
||||||
distributed_enabled: True # enable/disable distributed mode
|
|
||||||
mixed_prec_enabled: True # switch to disable amp mixed precision
|
|
||||||
gpus: '' # use GPU IDs specified here (comma separated), if '' then use all GPUs
|
|
||||||
opt_level: 'O2' # optimization level for mixed precision
|
|
||||||
bn_fp32: True # keep BN in fp32
|
|
||||||
loss_scale: "dynamic" # loss scaling mode for mixed prec, must be string reprenting floar ot "dynamic"
|
|
||||||
sync_bn: False # should be replace BNs with sync BNs for distributed model
|
|
||||||
scale_lr: True # enable/disable distributed mode
|
|
||||||
min_world_size: 0 # allows to confirm we are indeed in distributed setting
|
|
||||||
detect_anomaly: False # if True, PyTorch code will run 6X slower
|
|
||||||
seed: '_copy: /common/seed'
|
|
||||||
ray:
|
|
||||||
enabled: False # initialize ray. Note: ray cannot be used if apex distributed is enabled
|
|
||||||
local_mode: False # if True then ray runs in serial mode
|
|
||||||
|
|
||||||
smoke_test: False
|
|
||||||
only_eval: False
|
|
||||||
resume: True
|
|
||||||
|
|
||||||
dataset: {} # default dataset settings comes from __include__ on the top
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
max_num_models: 300
|
|
||||||
ratio_fastest_duration: 2.0
|
|
||||||
finalizer: 'default' # options are 'random' or 'default'
|
|
||||||
data_parallel: False
|
|
||||||
checkpoint:
|
|
||||||
_copy: '/common/checkpoint'
|
|
||||||
resume: '_copy: /common/resume'
|
|
||||||
full_desc_filename: '$expdir/full_model_desc.yaml' # arch before it was finalized
|
|
||||||
final_desc_filename: '$expdir/final_model_desc.yaml' # final arch is saved in this file
|
|
||||||
metrics_dir: '$expdir/models/{reductions}/{cells}/{nodes}/{search_iter}' # where metrics and model stats would be saved from each pareto iteration
|
|
||||||
model_desc:
|
|
||||||
n_reductions: 2 # number of reductions to be applied
|
|
||||||
n_cells: 8 # number of cells
|
|
||||||
num_edges_to_sample: 2 # number of incoming edges per node to be randomly sampled
|
|
||||||
dataset:
|
|
||||||
_copy: '/dataset'
|
|
||||||
max_final_edges: 2 # max edge that can be in final arch per node
|
|
||||||
model_post_op: 'pool_adaptive_avg2d'
|
|
||||||
params: {}
|
|
||||||
aux_weight: 0.4 # weight for loss from auxiliary towers in test time arch
|
|
||||||
aux_tower_stride: 3 # stride that aux tower should use, 3 is good for 32x32 images, 2 for imagenet
|
|
||||||
model_stems:
|
|
||||||
ops: ['stem_conv3x3', 'stem_conv3x3']
|
|
||||||
stem_multiplier: 3 # output channels multiplier for the stem
|
|
||||||
init_node_ch: 36 # num of input/output channels for nodes in 1st cell. NOTE: we match that in eval since this is discrete search.
|
|
||||||
cell:
|
|
||||||
n_nodes: 4 # number of nodes in a cell
|
|
||||||
cell_post_op: 'concate_channels'
|
|
||||||
loader:
|
|
||||||
apex:
|
|
||||||
_copy: '../../trainer_full/apex'
|
|
||||||
aug: '' # additional augmentations to use
|
|
||||||
cutout: 16 # cutout length, use cutout augmentation when > 0
|
|
||||||
load_train: True # load train split of dataset
|
|
||||||
train_batch: 96
|
|
||||||
freeze_loader:
|
|
||||||
train_batch: 96
|
|
||||||
train_workers: 4 # if null then gpu_count*4
|
|
||||||
test_workers: '_copy: ../train_workers' # if null then 4
|
|
||||||
load_test: True # load test split of dataset
|
|
||||||
test_batch: 1024
|
|
||||||
val_ratio: 0.0 #split portion for test set, 0 to 1
|
|
||||||
val_fold: 0 #Fold number to use (0 to 4)
|
|
||||||
cv_num: 5 # total number of folds available
|
|
||||||
dataset:
|
|
||||||
_copy: '/dataset'
|
|
||||||
freeze_trainer:
|
|
||||||
bypass_freeze: True
|
|
||||||
identifiers_to_unfreeze: ['logits_op._op', 'cells.7', 'cells.6'] # last few layer names in DARTS space are 'logits_op._op': Linear, 'cells.19': prefix for all cell 19 parameters
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: '_copy: /nas/search/model_desc/aux_weight'
|
|
||||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'arch_train'
|
|
||||||
epochs: 10
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
# additional vals for the derived class
|
|
||||||
plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.025 # init learning rate
|
|
||||||
decay: 3.0e-4
|
|
||||||
momentum: 0.9 # pytorch default is 0
|
|
||||||
nesterov: False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.001 # min learning rate, this will be used in eta_min param of scheduler
|
|
||||||
warmup: null
|
|
||||||
validation:
|
|
||||||
title: 'search_val'
|
|
||||||
logger_freq: 0
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
trainer_full:
|
|
||||||
top1_acc_threshold: 0.1
|
|
||||||
use_val: False
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: '_copy: /nas/search/model_desc/aux_weight'
|
|
||||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'arch_train'
|
|
||||||
epochs: 100
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
# additional vals for the derived class
|
|
||||||
plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.025 # init learning rate
|
|
||||||
decay: 3.0e-4
|
|
||||||
momentum: 0.9 # pytorch default is 0
|
|
||||||
nesterov: False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.001 # min learning rate, this will be used in eta_min param of scheduler
|
|
||||||
warmup: null
|
|
||||||
validation:
|
|
||||||
title: 'search_val'
|
|
||||||
logger_freq: 0
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
|
@ -1,148 +0,0 @@
|
||||||
__include__: "../datasets/cifar10.yaml" # default dataset settings are for cifar
|
|
||||||
|
|
||||||
common:
|
|
||||||
experiment_name: 'throwaway' # you should supply from command line
|
|
||||||
experiment_desc: 'throwaway'
|
|
||||||
logdir: '~/logdir'
|
|
||||||
log_prefix: 'log' # prefix for log files that will becreated (log.log and log.yaml), no log files if ''
|
|
||||||
log_level: 20 # logging.INFO
|
|
||||||
backup_existing_log_file: False # should we overwrite existing log file without making a copy?
|
|
||||||
yaml_log: True # if True, structured logs as yaml are also generated
|
|
||||||
seed: 2.0
|
|
||||||
tb_enable: False # if True then TensorBoard logging is enabled (may impact perf)
|
|
||||||
tb_dir: '$expdir/tb' # path where tensorboard logs would be stored
|
|
||||||
checkpoint:
|
|
||||||
filename: '$expdir/checkpoint.pth'
|
|
||||||
freq: 10
|
|
||||||
|
|
||||||
# reddis address of Ray cluster. Use None for single node run
|
|
||||||
# otherwise it should something like host:6379. Make sure to run on head node:
|
|
||||||
# "ray start --head --redis-port=6379"
|
|
||||||
redis: null
|
|
||||||
apex: # this is overriden in search and eval individually
|
|
||||||
enabled: False # global switch to disable everything apex
|
|
||||||
distributed_enabled: True # enable/disable distributed mode
|
|
||||||
mixed_prec_enabled: True # switch to disable amp mixed precision
|
|
||||||
gpus: '' # use GPU IDs specified here (comma separated), if '' then use all GPUs
|
|
||||||
opt_level: 'O2' # optimization level for mixed precision
|
|
||||||
bn_fp32: True # keep BN in fp32
|
|
||||||
loss_scale: "dynamic" # loss scaling mode for mixed prec, must be string reprenting floar ot "dynamic"
|
|
||||||
sync_bn: False # should be replace BNs with sync BNs for distributed model
|
|
||||||
scale_lr: True # enable/disable distributed mode
|
|
||||||
min_world_size: 0 # allows to confirm we are indeed in distributed setting
|
|
||||||
detect_anomaly: False # if True, PyTorch code will run 6X slower
|
|
||||||
seed: '_copy: /common/seed'
|
|
||||||
ray:
|
|
||||||
enabled: False # initialize ray. Note: ray cannot be used if apex distributed is enabled
|
|
||||||
local_mode: False # if True then ray runs in serial mode
|
|
||||||
|
|
||||||
smoke_test: False
|
|
||||||
only_eval: False
|
|
||||||
resume: True
|
|
||||||
|
|
||||||
dataset: {} # default dataset settings comes from __include__ on the top
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
max_num_models: 2
|
|
||||||
ratio_fastest_duration: 1.2
|
|
||||||
natsbench:
|
|
||||||
natsbench_tss_fast: 'NATS-tss-v1_0-3ffb9-simple' # folder name in dataroot/natsbench that contains the tss fast mode folder
|
|
||||||
finalizer: 'default' # options are 'random' or 'default'
|
|
||||||
data_parallel: False
|
|
||||||
checkpoint:
|
|
||||||
_copy: '/common/checkpoint'
|
|
||||||
resume: '_copy: /common/resume'
|
|
||||||
full_desc_filename: '$expdir/full_model_desc.yaml' # arch before it was finalized
|
|
||||||
final_desc_filename: '$expdir/final_model_desc.yaml' # final arch is saved in this file
|
|
||||||
loader:
|
|
||||||
apex:
|
|
||||||
_copy: '../../trainer/apex'
|
|
||||||
aug: '' # additional augmentations to use
|
|
||||||
cutout: 0 # cutout length, use cutout augmentation when > 0
|
|
||||||
load_train: True # load train split of dataset
|
|
||||||
train_batch: 256
|
|
||||||
freeze_loader:
|
|
||||||
train_batch: 1024 # batch size for freeze training.
|
|
||||||
train_workers: 4 # if null then gpu_count*4
|
|
||||||
test_workers: '_copy: ../train_workers' # if null then 4
|
|
||||||
load_test: False # load test split of dataset
|
|
||||||
test_batch: 1024
|
|
||||||
val_ratio: 0.0 #split portion for test set, 0 to 1
|
|
||||||
val_fold: 0 #Fold number to use (0 to 4)
|
|
||||||
cv_num: 5 # total number of folds available
|
|
||||||
dataset:
|
|
||||||
_copy: '/dataset'
|
|
||||||
trainer:
|
|
||||||
use_val: False
|
|
||||||
top1_acc_threshold: 0.1 # after some accuracy we will shift into training only the last 'n' layers
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0
|
|
||||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'arch_train'
|
|
||||||
epochs: 200
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
# additional vals for the derived class
|
|
||||||
plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4
|
|
||||||
momentum: 0.9 # pytorch default is 0
|
|
||||||
nesterov: True
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate, this will be used in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'search_val'
|
|
||||||
logger_freq: 0
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
|
|
||||||
freeze_trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
identifiers_to_unfreeze: ['classifier', 'lastact', 'cells.16', 'cells.15', 'cells.14', 'cells.13'] # last few layer names in natsbench: lastact, lastact.0, lastact.1: BN-Relu, global_pooling: global avg. pooling (doesn't get exposed as a named param though), classifier: linear layer
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
drop_path_prob: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 10
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: True # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'eval_test'
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
logger_freq: 0
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
|
@ -1,183 +0,0 @@
|
||||||
__include__: "../datasets/cifar10.yaml" # default dataset settings are for cifar
|
|
||||||
|
|
||||||
common:
|
|
||||||
experiment_name: 'throwaway' # you should supply from command line
|
|
||||||
experiment_desc: 'throwaway'
|
|
||||||
logdir: '~/logdir'
|
|
||||||
log_prefix: 'log' # prefix for log files that will becreated (log.log and log.yaml), no log files if ''
|
|
||||||
log_level: 20 # logging.INFO
|
|
||||||
backup_existing_log_file: False # should we overwrite existing log file without making a copy?
|
|
||||||
yaml_log: True # if True, structured logs as yaml are also generated
|
|
||||||
seed: 2.0
|
|
||||||
tb_enable: False # if True then TensorBoard logging is enabled (may impact perf)
|
|
||||||
tb_dir: '$expdir/tb' # path where tensorboard logs would be stored
|
|
||||||
checkpoint:
|
|
||||||
filename: '$expdir/checkpoint.pth'
|
|
||||||
freq: 10
|
|
||||||
|
|
||||||
# reddis address of Ray cluster. Use None for single node run
|
|
||||||
# otherwise it should something like host:6379. Make sure to run on head node:
|
|
||||||
# "ray start --head --redis-port=6379"
|
|
||||||
redis: null
|
|
||||||
apex: # this is overriden in search and eval individually
|
|
||||||
enabled: False # global switch to disable everything apex
|
|
||||||
distributed_enabled: True # enable/disable distributed mode
|
|
||||||
mixed_prec_enabled: True # switch to disable amp mixed precision
|
|
||||||
gpus: '' # use GPU IDs specified here (comma separated), if '' then use all GPUs
|
|
||||||
opt_level: 'O2' # optimization level for mixed precision
|
|
||||||
bn_fp32: True # keep BN in fp32
|
|
||||||
loss_scale: "dynamic" # loss scaling mode for mixed prec, must be string reprenting floar ot "dynamic"
|
|
||||||
sync_bn: False # should be replace BNs with sync BNs for distributed model
|
|
||||||
scale_lr: True # enable/disable distributed mode
|
|
||||||
min_world_size: 0 # allows to confirm we are indeed in distributed setting
|
|
||||||
detect_anomaly: False # if True, PyTorch code will run 6X slower
|
|
||||||
seed: '_copy: /common/seed'
|
|
||||||
ray:
|
|
||||||
enabled: False # initialize ray. Note: ray cannot be used if apex distributed is enabled
|
|
||||||
local_mode: False # if True then ray runs in serial mode
|
|
||||||
|
|
||||||
smoke_test: False
|
|
||||||
only_eval: False
|
|
||||||
resume: True
|
|
||||||
|
|
||||||
dataset: {} # default dataset settings comes from __include__ on the top
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
max_num_models: 3
|
|
||||||
ratio_fastest_duration: 1.2
|
|
||||||
natsbench:
|
|
||||||
natsbench_tss_fast: 'NATS-tss-v1_0-3ffb9-simple' # folder name in dataroot/natsbench that contains the tss fast mode folder
|
|
||||||
finalizer: 'default' # options are 'random' or 'default'
|
|
||||||
data_parallel: False
|
|
||||||
checkpoint:
|
|
||||||
_copy: '/common/checkpoint'
|
|
||||||
resume: '_copy: /common/resume'
|
|
||||||
full_desc_filename: '$expdir/full_model_desc.yaml' # arch before it was finalized
|
|
||||||
final_desc_filename: '$expdir/final_model_desc.yaml' # final arch is saved in this file
|
|
||||||
loader:
|
|
||||||
apex:
|
|
||||||
_copy: '../../trainer/apex'
|
|
||||||
aug: '' # additional augmentations to use
|
|
||||||
cutout: 0 # cutout length, use cutout augmentation when > 0
|
|
||||||
load_train: True # load train split of dataset
|
|
||||||
train_batch: 256
|
|
||||||
freeze_loader:
|
|
||||||
train_batch: 1024 # batch size for freeze training.
|
|
||||||
train_workers: 4 # if null then gpu_count*4
|
|
||||||
test_workers: '_copy: ../train_workers' # if null then 4
|
|
||||||
load_test: False # load test split of dataset
|
|
||||||
test_batch: 1024
|
|
||||||
val_ratio: 0.0 #split portion for test set, 0 to 1
|
|
||||||
val_fold: 0 #Fold number to use (0 to 4)
|
|
||||||
cv_num: 5 # total number of folds available
|
|
||||||
dataset:
|
|
||||||
_copy: '/dataset'
|
|
||||||
trainer:
|
|
||||||
use_val: False
|
|
||||||
top1_acc_threshold: 0.3 # after some accuracy we will shift into training only the last 'n' layers
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0
|
|
||||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'arch_train'
|
|
||||||
epochs: 200
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
# additional vals for the derived class
|
|
||||||
plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4
|
|
||||||
momentum: 0.9 # pytorch default is 0
|
|
||||||
nesterov: True
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate, this will be used in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'search_val'
|
|
||||||
logger_freq: 0
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
|
|
||||||
freeze_trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
identifiers_to_unfreeze: ['classifier', 'lastact', 'cells.16', 'cells.15', 'cells.14', 'cells.13'] # last few layer names in natsbench: lastact, lastact.0, lastact.1: BN-Relu, global_pooling: global avg. pooling (doesn't get exposed as a named param though), classifier: linear layer
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
drop_path_prob: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 5
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: True # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'eval_test'
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
logger_freq: 0
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
|
|
||||||
post_trainer:
|
|
||||||
plotsdir: ''
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
drop_path_prob: 0.0 # very important that this is 0.0 for freeze training
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'eval_train'
|
|
||||||
epochs: 3
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4 # pytorch default is 0.0
|
|
||||||
momentum: 0.9 # pytorch default is 0.0
|
|
||||||
nesterov: True # pytorch default is False
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate to be set in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'eval_test'
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
logger_freq: 0
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
|
@ -1,111 +0,0 @@
|
||||||
__include__: "../datasets/cifar10.yaml" # default dataset settings are for cifar
|
|
||||||
|
|
||||||
common:
|
|
||||||
experiment_name: 'throwaway' # you should supply from command line
|
|
||||||
experiment_desc: 'throwaway'
|
|
||||||
logdir: '~/logdir'
|
|
||||||
log_prefix: 'log' # prefix for log files that will becreated (log.log and log.yaml), no log files if ''
|
|
||||||
log_level: 20 # logging.INFO
|
|
||||||
backup_existing_log_file: False # should we overwrite existing log file without making a copy?
|
|
||||||
yaml_log: True # if True, structured logs as yaml are also generated
|
|
||||||
seed: 2.0
|
|
||||||
tb_enable: False # if True then TensorBoard logging is enabled (may impact perf)
|
|
||||||
tb_dir: '$expdir/tb' # path where tensorboard logs would be stored
|
|
||||||
checkpoint:
|
|
||||||
filename: '$expdir/checkpoint.pth'
|
|
||||||
freq: 10
|
|
||||||
|
|
||||||
# reddis address of Ray cluster. Use None for single node run
|
|
||||||
# otherwise it should something like host:6379. Make sure to run on head node:
|
|
||||||
# "ray start --head --redis-port=6379"
|
|
||||||
redis: null
|
|
||||||
apex: # this is overriden in search and eval individually
|
|
||||||
enabled: False # global switch to disable everything apex
|
|
||||||
distributed_enabled: True # enable/disable distributed mode
|
|
||||||
mixed_prec_enabled: True # switch to disable amp mixed precision
|
|
||||||
gpus: '' # use GPU IDs specified here (comma separated), if '' then use all GPUs
|
|
||||||
opt_level: 'O2' # optimization level for mixed precision
|
|
||||||
bn_fp32: True # keep BN in fp32
|
|
||||||
loss_scale: "dynamic" # loss scaling mode for mixed prec, must be string reprenting floar ot "dynamic"
|
|
||||||
sync_bn: False # should be replace BNs with sync BNs for distributed model
|
|
||||||
scale_lr: True # enable/disable distributed mode
|
|
||||||
min_world_size: 0 # allows to confirm we are indeed in distributed setting
|
|
||||||
detect_anomaly: False # if True, PyTorch code will run 6X slower
|
|
||||||
seed: '_copy: /common/seed'
|
|
||||||
ray:
|
|
||||||
enabled: False # initialize ray. Note: ray cannot be used if apex distributed is enabled
|
|
||||||
local_mode: False # if True then ray runs in serial mode
|
|
||||||
|
|
||||||
smoke_test: False
|
|
||||||
only_eval: False
|
|
||||||
resume: True
|
|
||||||
|
|
||||||
dataset: {} # default dataset settings comes from __include__ on the top
|
|
||||||
|
|
||||||
nas:
|
|
||||||
search:
|
|
||||||
max_num_models: 2
|
|
||||||
natsbench:
|
|
||||||
natsbench_tss_fast: 'NATS-tss-v1_0-3ffb9-simple' # folder name in dataroot/natsbench that contains the tss fast mode folder
|
|
||||||
finalizer: 'default' # options are 'random' or 'default'
|
|
||||||
data_parallel: False
|
|
||||||
checkpoint:
|
|
||||||
_copy: '/common/checkpoint'
|
|
||||||
resume: '_copy: /common/resume'
|
|
||||||
full_desc_filename: '$expdir/full_model_desc.yaml' # arch before it was finalized
|
|
||||||
final_desc_filename: '$expdir/final_model_desc.yaml' # final arch is saved in this file
|
|
||||||
loader:
|
|
||||||
apex:
|
|
||||||
_copy: '../../trainer/apex'
|
|
||||||
aug: '' # additional augmentations to use
|
|
||||||
cutout: 0 # cutout length, use cutout augmentation when > 0
|
|
||||||
load_train: True # load train split of dataset
|
|
||||||
train_batch: 1024
|
|
||||||
train_workers: 4 # if null then gpu_count*4
|
|
||||||
test_workers: '_copy: ../train_workers' # if null then 4
|
|
||||||
load_test: False # load test split of dataset
|
|
||||||
test_batch: 1024
|
|
||||||
val_ratio: 0.0 #split portion for test set, 0 to 1
|
|
||||||
val_fold: 0 #Fold number to use (0 to 4)
|
|
||||||
cv_num: 5 # total number of folds available
|
|
||||||
dataset:
|
|
||||||
_copy: '/dataset'
|
|
||||||
trainer:
|
|
||||||
use_val: False
|
|
||||||
top1_acc_threshold: 0.1 # after some accuracy we will shift into training only the last 'n' layers
|
|
||||||
apex:
|
|
||||||
_copy: '/common/apex'
|
|
||||||
aux_weight: 0.0
|
|
||||||
drop_path_prob: 0.2 # probability that given edge will be dropped
|
|
||||||
grad_clip: 5.0 # grads above this value is clipped
|
|
||||||
logger_freq: 1000 # after every N updates dump loss and other metrics in logger
|
|
||||||
title: 'arch_train'
|
|
||||||
epochs: 16
|
|
||||||
batch_chunks: 1 # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
# additional vals for the derived class
|
|
||||||
plotsdir: '' #empty string means no plots, other wise plots are generated for each epoch in this dir
|
|
||||||
l1_alphas: 0.0 # weight to be applied to sum(abs(alphas)) to loss term
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
optimizer:
|
|
||||||
type: 'sgd'
|
|
||||||
lr: 0.1 # init learning rate
|
|
||||||
decay: 5.0e-4
|
|
||||||
momentum: 0.9 # pytorch default is 0
|
|
||||||
nesterov: True
|
|
||||||
decay_bn: .NaN # if NaN then same as decay otherwise apply different decay to BN layers
|
|
||||||
lr_schedule:
|
|
||||||
type: 'cosine'
|
|
||||||
min_lr: 0.000 # min learning rate, this will be used in eta_min param of scheduler
|
|
||||||
warmup: # increases LR for 0 to current in specified epochs and then hands over to main scheduler
|
|
||||||
multiplier: 1
|
|
||||||
epochs: 0 # 0 disables warmup
|
|
||||||
validation:
|
|
||||||
title: 'search_val'
|
|
||||||
logger_freq: 0
|
|
||||||
batch_chunks: '_copy: ../../batch_chunks' # split batch into these many chunks and accumulate gradients so we can support GPUs with lower RAM
|
|
||||||
freq: 1 # perform validation only every N epochs
|
|
||||||
lossfn:
|
|
||||||
type: 'CrossEntropyLoss'
|
|
||||||
|
|
||||||
|
|
|
@ -1,348 +0,0 @@
|
||||||
# Reproducing Experimental Results in [FEAR: Ranking Architectures by Feature Extraction Capabilities](anonymous)
|
|
||||||
|
|
||||||
Since `shortreg` and `FEAR` with different hyperparameters is not
|
|
||||||
contained in Natsbench, the experiments require actually partially training
|
|
||||||
1000 architectures sampled from Natsbench Topology search space.
|
|
||||||
Consequently this requires significant compute. To support ease of reproduction,
|
|
||||||
we will also make public the associated log files upon publication.
|
|
||||||
|
|
||||||
## Install [Archai](https://github.com/microsoft/archai/tree/master/archai)
|
|
||||||
We utilize the open-source MIT licensed Archai NAS framework for the
|
|
||||||
experiments in this work. Please follow the
|
|
||||||
[installation instructions](https://github.com/microsoft/archai/blob/master/docs/install.md)
|
|
||||||
provided by the authors of the framework to install the latest version.
|
|
||||||
|
|
||||||
## Download datasets
|
|
||||||
Make a directory named `~/dataroot`
|
|
||||||
|
|
||||||
|
|
||||||
## Reproducing figures 3, 6, 7, 8
|
|
||||||
|
|
||||||
In the paper figures 3, 6, 7, 8 represent the plots of average duration
|
|
||||||
per architecture vs. Spearman’s correlation and
|
|
||||||
average duration per architecture vs. common ratio over the top x% of the
|
|
||||||
1000 architectures sampled from Natsbench topological search space on
|
|
||||||
CIFAR10, CIFAR100, ImageNet16-120. We also show the various zero-cost
|
|
||||||
measures from Abdelfattah et al.(22) in green.
|
|
||||||
|
|
||||||
There are three sets of experiments to be run and their corresponding
|
|
||||||
logs processed and passed to a script that plots the figures.
|
|
||||||
|
|
||||||
* `shortreg`:
|
|
||||||
|
|
||||||
The command line that will run `shortreg` (regular training of a neural network with shortened epochs):
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos natsbench_regular_eval \
|
|
||||||
--common.seed 36 \
|
|
||||||
--nas.eval.loader.train_batch <batch_size> \
|
|
||||||
--nas.eval.trainer.epochs <num_epochs> \
|
|
||||||
--nas.eval.natsbench.arch_index <arch_id> \
|
|
||||||
--exp-prefix <exp_name> \
|
|
||||||
--datasets <datasets>
|
|
||||||
```
|
|
||||||
|
|
||||||
`--nas.eval.loader.train_batch <batch_size>` is the batch size to vary. For example on
|
|
||||||
CIFAR100 we vary batch size in 256,512,1024,2048.
|
|
||||||
|
|
||||||
`--nas.eval.trainer.epochs <num_epochs>` is the number of epochs of training to vary.
|
|
||||||
For example on CIFAR100 we vary number of training epochs in 10,20,30.
|
|
||||||
|
|
||||||
`<arch_id>` is an architecture id in the list of 1000 uniform random architectures
|
|
||||||
sampled from Natsbench. For the exact list of architectures see the list in `main_proxynas_nb_wrapper.py`.
|
|
||||||
which also shows a simple way to distribute these 1000 architectures across machines.
|
|
||||||
|
|
||||||
`<exp_name>` is an appropriately chosen experiment name.
|
|
||||||
|
|
||||||
`<datasets>` is one of CIFAR10, CIFAR100, ImageNet16-120.
|
|
||||||
|
|
||||||
Each of the combinations above produces a folder with name `<exp_name>` containing
|
|
||||||
corresponding log files. Each log must be analyzed by the analysis script:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/reports/fear_analysis/analysis_regular_natsbench_space.py \
|
|
||||||
--results-dir /path/to/exp_name \
|
|
||||||
--out-dir /path/to/processed/results
|
|
||||||
```
|
|
||||||
|
|
||||||
where `/path/to/processed/results` will be a folder created by the
|
|
||||||
script to save processed relevant data needed later on the for creating
|
|
||||||
plots over the 1000 architectures.
|
|
||||||
|
|
||||||
* `FEAR`
|
|
||||||
|
|
||||||
The command line that will run `FEAR` to evaluate each architecture:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos proxynas_natsbench_space \
|
|
||||||
--common.seed 36 \
|
|
||||||
--nas.eval.loader.freeze_loader.train_batch <freeze_batch_size> \
|
|
||||||
--nas.eval.freeze_trainer.epochs <freeze_num_epochs> \
|
|
||||||
--nas.eval.natsbench.arch_index <arch_id> \
|
|
||||||
--nas.eval.trainer.top1_acc_threshold <top1_acc_threshold> \
|
|
||||||
--exp-prefix <exp_name> \
|
|
||||||
--datasets <datasets>
|
|
||||||
```
|
|
||||||
|
|
||||||
`<freeze_batch_size>` is the batch size used for the second stage
|
|
||||||
where most of the architecture is frozen and only the last few
|
|
||||||
layers are trained for a few more epochs.
|
|
||||||
|
|
||||||
`<top1_acc_threshold>` is the training accuracy threshold
|
|
||||||
up to which the entire network is trained before entering the
|
|
||||||
second phase. This is dataset dependent and found by a shallow
|
|
||||||
pipeline. CIFAR10:0.6, CIFAR100:0.3, ImageNet16-120:0.2.
|
|
||||||
|
|
||||||
`freeze_num_epochs` is the number of epochs to train the network
|
|
||||||
in the second phase when most of the network is frozen.
|
|
||||||
|
|
||||||
Each of the combinations above produces a folder with name `<exp_name>` containing
|
|
||||||
corresponding log files. Each log must be analyzed by the analysis script:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py
|
|
||||||
--results-dir /path/to/exp_name \
|
|
||||||
--out-dir /path/to/processed/results
|
|
||||||
```
|
|
||||||
|
|
||||||
where `/path/to/processed/results` will be a folder created by the
|
|
||||||
script to save processed relevant data needed later on for creating
|
|
||||||
plots over the 1000 architectures.
|
|
||||||
|
|
||||||
* `zero cost` measures
|
|
||||||
|
|
||||||
The command line that will compute `zero cost` scores for each architecture:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos zerocost_natsbench_space \
|
|
||||||
--nas.eval.natsbench.arch_index <arch_id> \
|
|
||||||
--datasets <dataset>
|
|
||||||
```
|
|
||||||
|
|
||||||
Each of the combinations above produces a folder with name `<exp_name>` containing
|
|
||||||
corresponding log files. Each log must be analyzed by the analysis script:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/reports/fear_analysis/analysis_natsbench_zerocost.py \
|
|
||||||
--results-dir /path/to/exp_name \
|
|
||||||
--out-dir /path/to/processed/results
|
|
||||||
```
|
|
||||||
|
|
||||||
where `/path/to/processed/results` will be a folder created by the
|
|
||||||
script to save processed relevant data needed later on the for creating
|
|
||||||
plots over the 1000 architectures.
|
|
||||||
|
|
||||||
|
|
||||||
* Collating all methods into a single plot:
|
|
||||||
|
|
||||||
Now that `shortreg`, `FEAR` and `zero-cost` measures have all been run and
|
|
||||||
processed on the same set of 1000 architectures, one can use:
|
|
||||||
|
|
||||||
```
|
|
||||||
python /scripts/reports/fear_plots/cross_exp_plots.py \
|
|
||||||
--dataset <dataset_name>
|
|
||||||
--conf-location scripts/reports/fear_plots/cross_exp_conf.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
where `<dataset_name>` can take on values `natsbench_cifar10`,
|
|
||||||
`natsbench_cifar100` or `natsbench_imagenet16-120` respectively.
|
|
||||||
|
|
||||||
`cross_exp_conf.yaml` has to be edited to input the exact names
|
|
||||||
of various experiments used but should be pretty self-explanatory.
|
|
||||||
|
|
||||||
Note Table 2 in the paper is produced by manually inspecting
|
|
||||||
figures 3, 6, 7, 8 produced by the procedure above.
|
|
||||||
|
|
||||||
## Reproducing Table 3
|
|
||||||
|
|
||||||
Table 3 is produced by running `FEAR` and the `zero-cost` measures
|
|
||||||
on a dataset we term as Synthetic CIFAR10. So the first step is to
|
|
||||||
reproduce this dataset. Note that since this dataset is produced
|
|
||||||
by a random process, we will make the exact instance used in the
|
|
||||||
paper available upon acceptance and for the community to run further
|
|
||||||
experiments on. Since this dataset is not part of any benchmark
|
|
||||||
we first fully train the 1000 architectures from Natsbench on this dataset
|
|
||||||
to produce *groundtruth* test accuracies. We keep the same
|
|
||||||
training hyperparameters as used in Natsbench and train each architecture for
|
|
||||||
200 epochs.
|
|
||||||
|
|
||||||
* Reproducing Synthetic CIFAR10.
|
|
||||||
|
|
||||||
Edit `out_dir` in `scripts/datasets/synthetic_gen/gen_synthetic_dataset.py`
|
|
||||||
to reflect a path on disk you want to save the dataset in.
|
|
||||||
|
|
||||||
Then simply run `python scripts/datasets/synthetic_gen/gen_synthetic_dataset.py`
|
|
||||||
to generate the dataset.
|
|
||||||
|
|
||||||
* Fully training 1000 architectures on Synthetic CIFAR10.
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos natsbench_regular_eval \
|
|
||||||
--common.seed 36 \
|
|
||||||
--nas.eval.loader.train_batch 256 \
|
|
||||||
--nas.eval.trainer.epochs 200 \
|
|
||||||
--nas.eval.natsbench.arch_index <arch_id> \
|
|
||||||
--exp-prefix <exp_name> \
|
|
||||||
--datasets synthetic_cifar10
|
|
||||||
```
|
|
||||||
|
|
||||||
followed by an analysis script on the log files generated
|
|
||||||
by the full training:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/reports/fear_analysis/analysis_natsbench_nonstandard_generate_benchmark.py \
|
|
||||||
--results-dir /path/to/logs/from/full/training
|
|
||||||
--out-dir /path/to/folder/for/saving/benchmark
|
|
||||||
```
|
|
||||||
|
|
||||||
This will generate a file named `archid_test_accuracy_synthetic_cifar10.yaml`
|
|
||||||
which contains for every architecture id in the set of 1000 used, the test
|
|
||||||
accuracy it obtained on this synthetic dataset. This file is then passed in
|
|
||||||
to downstream analysis scripts as detailed below.
|
|
||||||
|
|
||||||
* `zero-cost` on Synthetic CIFAR10.
|
|
||||||
|
|
||||||
Same as running zero-cost measures on any other dataset:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos zerocost_natsbench_space \
|
|
||||||
--datasets synthetic_cifar10
|
|
||||||
```
|
|
||||||
|
|
||||||
* `FEAR` on Synthetic CIFAR10.
|
|
||||||
|
|
||||||
Same as running `FEAR` on any other dataset
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos proxynas_natsbench_space \
|
|
||||||
--common.seed 36 \
|
|
||||||
--nas.eval.loader.freeze_loader.train_batch 1024 \
|
|
||||||
--nas.eval.freeze_trainer.epochs 5 \
|
|
||||||
--nas.eval.natsbench.arch_index <arch_id> \
|
|
||||||
--nas.eval.trainer.top1_acc_threshold 0.15 \
|
|
||||||
--exp-prefix <exp_name> \
|
|
||||||
--datasets synthetic_cifar10
|
|
||||||
```
|
|
||||||
|
|
||||||
Each of the combinations above produces a folder with name `<exp_name>` containing
|
|
||||||
corresponding log files. Each log must be analyzed by the analysis script:
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py
|
|
||||||
--results-dir /path/to/exp_name \
|
|
||||||
--out-dir /path/to/processed/results \
|
|
||||||
--reg-evals-file /path/to/archid_test_accuracy_synthetic_cifar10.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
where `/path/to/processed/results` will be a folder created by the
|
|
||||||
script to save processed relevant data needed later on for creating
|
|
||||||
plots over the 1000 architectures. Note the use of `archid_test_accuracy_synthetic_cifar10.yaml`
|
|
||||||
since this dataset is not part of the Natsbench benchmark.
|
|
||||||
|
|
||||||
* Collating all methods into a single plot:
|
|
||||||
Now that ranking methods and the full training have been run, the
|
|
||||||
plots comparing all the methods can be generated using the same
|
|
||||||
process and scripts as for benchmark datasets like
|
|
||||||
CIFAR10, CIFAR100 detailed above.
|
|
||||||
|
|
||||||
## Reproducing Figure 4
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos zerocost_natsbench_epochs_space \
|
|
||||||
--nas.eval.natsbench.arch_index <arch_id> \
|
|
||||||
--datasets cifar10
|
|
||||||
```
|
|
||||||
to produce the logs of running `zero-cost` measures after every epoch of training for 200 epochs
|
|
||||||
on each of the 1000 architectures.
|
|
||||||
|
|
||||||
then point the analysis script to the folder of results for analysis
|
|
||||||
```
|
|
||||||
python scripts/reports/fear_analysis/analysis_natsbench_zerocost_epochs.py \
|
|
||||||
--results-dir /path/to/results \
|
|
||||||
--out-dir /path/to/save/dir
|
|
||||||
```
|
|
||||||
|
|
||||||
## Reproducing Random Search Results
|
|
||||||
|
|
||||||
* Random Search with FEAR
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos random_natsbench_tss_far \
|
|
||||||
--datasets <dataset_name> \
|
|
||||||
--nas.search.trainer.top1_acc_threshold <dataset_specific_threshold> \
|
|
||||||
--nas.search.max_num_models 500 \
|
|
||||||
--nas.search.ratio_fastest_duration 4 \
|
|
||||||
--common.seed <seed> \
|
|
||||||
--no-eval
|
|
||||||
```
|
|
||||||
|
|
||||||
Analysis:
|
|
||||||
```
|
|
||||||
python /scripts/reports/fear_analysis/analysis_random_search_natsbench_tss_far.py \
|
|
||||||
--results-dir /path/to/results \
|
|
||||||
--out-dir /path/to/save/dir
|
|
||||||
```
|
|
||||||
|
|
||||||
* Random Search with `shortreg`
|
|
||||||
|
|
||||||
```
|
|
||||||
python scripts/main.py \
|
|
||||||
--full \
|
|
||||||
--algos random_natsbench_tss_reg \
|
|
||||||
--datasets <dataset_name> \
|
|
||||||
--nas.search.max_num_models 500 \
|
|
||||||
--common.seed <seed> \
|
|
||||||
--no-eval
|
|
||||||
```
|
|
||||||
|
|
||||||
Analysis:
|
|
||||||
|
|
||||||
```
|
|
||||||
python /scripts/reports/fear_analysis/analysis_random_search_natsbench_tss_reg.py \
|
|
||||||
--results-dir /path/to/results \
|
|
||||||
--out-dir /path/to/save/dir
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,450 +0,0 @@
|
||||||
@REM cifar10
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte5_ct256_ftt0.5 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte10_ct256_ftt0.5 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte5_ct256_ftt0.4 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte10_ct256_ftt0.4 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte5_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte10_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr1.5_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr1.5_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr1.5_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr1.5_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr1.5_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr1.5_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr0.1_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr0.1_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr0.1_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr0.1_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb2048_ftlr1.5_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr1.5_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr1.5_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr1.5_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr1.5_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr1.5_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr1.5_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr0.1_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr0.1_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr0.1_fte5_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr0.1_fte10_ct256_ftt0.6_c9 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb256_ftlr0.1_fte15_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr1.5_fte5_ct256_ftt0.6_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr1.5_fte10_ct256_ftt0.6_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr0.1_fte5_ct256_ftt0.6_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_fb512_ftlr0.1_fte10_ct256_ftt0.6_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_classifier --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c16 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c15 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c14 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c12 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c11 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c10 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c9 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.55_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.50_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.45_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte5_ct256_ftt0.40_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte15_ct256_ftt0.6_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte20_ct256_ftt0.6_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte25_ct256_ftt0.6_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_fb1024_ftlr0.1_fte30_ct256_ftt0.6_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e01 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e02 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e04 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e06 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e08 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b512_e01 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b512_e02 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b512_e04 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b512_e06 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b512_e08 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b512_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e01 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e02 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e04 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e06 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e08 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e01_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e02_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e04_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e06_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e08_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e10_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e20_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b1024_e30_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e01_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e02_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e04_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e06_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e08_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e10_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e20_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_reg_b256_e30_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM cifar100
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb1024_ftlr0.1_fte5_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb1024_ftlr0.1_fte10_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte10_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb256_ftlr0.1_fte5_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb256_ftlr0.1_fte10_ct256_ftt0.3 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb1024_ftlr0.1_fte5_ct256_ftt0.3_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb1024_ftlr0.1_fte10_ct256_ftt0.3_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte10_ct256_ftt0.3_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_classifier --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c16 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c15 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c14 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c12 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c11 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c10 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_c9 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.25_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.20_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.15_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.10_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte10_ct256_ftt0.3_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte15_ct256_ftt0.3_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte20_ct256_ftt0.3_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte25_ct256_ftt0.3_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_c100_fb512_ftlr0.1_fte30_ct256_ftt0.3_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b256_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b256_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b256_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b512_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b512_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b512_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b1024_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b1024_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b1024_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b2048_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b2048_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b2048_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b1024_e10_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b1024_e20_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_c100_reg_b1024_e30_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM ImageNet16-120
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.5_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.5_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.5_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.5_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.5_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.5_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.5_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.5_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr1.0_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr1.0_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr1.0_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr1.0_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr1.0_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr1.0_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr1.0_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr1.0_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.5_fte5_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.5_fte10_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.5_fte5_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.5_fte10_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr1.0_fte5_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr1.0_fte10_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr1.0_fte5_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr1.0_fte10_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte15_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte15_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.1_val --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte30_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte5_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.1_fte5_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.1_fte10_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.5_fte5_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.5_fte10_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.5_fte5_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.5_fte10_ct256_ftt0.2_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c14 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c15 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c16 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_lastact --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte5_ct256_ftt0.1_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.1_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte5_ct256_ftt0.1_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte10_ct256_ftt0.1_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte5_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte5_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte10_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte5_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.1_fte5_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.1_fte10_ct256_ftt0.1 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb1024_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_i6_fb256_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b1024_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b1024_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b1024_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i6_reg_b2048_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i6_reg_b2048_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i6_reg_b2048_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e01_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e02_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e04_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e06_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e08_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e12 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e14 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e16 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b256_e18 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e22_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e24_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e26_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e28_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e10_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e20_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb_i16_reg_b512_e30_scu --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_classifier --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c16 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c12 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c11 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c10 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c9 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.25_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.15_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.10_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte15_ct256_ftt0.2_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte20_ct256_ftt0.2_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte25_ct256_ftt0.2_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_freeze_natsbench_space_new.py --results-dir F:\\archaiphilly\\phillytools\\ft_i6_fb512_ftlr0.1_fte30_ct256_ftt0.2_c13 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM Flower 102
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb2048_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb2048_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb2048_ftlr0.1_fte15_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb2048_ftlr0.1_fte30_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb1024_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb1024_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb1024_ftlr0.1_fte15_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb1024_ftlr0.1_fte30_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb512_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb512_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb512_ftlr0.1_fte15_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb256_ftlr0.1_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb256_ftlr0.1_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_f102_fb256_ftlr0.1_fte15_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_f102_b256_reg200\\archid_testacc.yaml
|
|
||||||
|
|
||||||
@REM Synthetic Cifar 10
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_reg_b256_e200_sc10\\arch_id_test_accuracy_synthetic_cifar10.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15_c14 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_reg_b256_e200_sc10\\arch_id_test_accuracy_synthetic_cifar10.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15_c15 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_reg_b256_e200_sc10\\arch_id_test_accuracy_synthetic_cifar10.yaml
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space_new.py --results-dir D:\\archaiphilly\\phillytools\\ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15_c16 --out-dir D:\\archai_experiment_reports --reg-evals-file D:\\archai_experiment_reports\\nb_reg_b256_e200_sc10\\arch_id_test_accuracy_synthetic_cifar10.yaml
|
|
||||||
|
|
||||||
@REM Nasbench 101
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb256_ftlr0.2_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb256_ftlr0.2_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb1024_ftlr0.2_fte5_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb1024_ftlr0.2_fte10_ct256_ftt0.2 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb2048_ftlr0.2_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb2048_ftlr0.2_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb2048_ftlr1.0_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\ft_nb101_fb2048_ftlr1.0_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s1_fb2048_ftlr0.001_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s1_fb2048_ftlr0.01_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s1_fb2048_ftlr0.025_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s1_fb3072_ftlr0.001_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s1_fb3072_ftlr0.01_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s1_fb3072_ftlr0.025_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb2048_ftlr0.001_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb2048_ftlr0.01_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb2048_ftlr0.025_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb3072_ftlr0.001_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb3072_ftlr0.01_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb3072_ftlr0.025_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_se_fb256_ftlr0.001_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_se_fb256_ftlr0.001_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_se_fb256_ftlr0.01_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_se_fb256_ftlr0.01_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_se_fb256_ftlr0.025_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_se_fb256_ftlr0.025_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s123_fb256_ftlr0.001_fte5_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
python scripts/reports/analysis_freeze_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s123_fb256_ftlr0.001_fte10_ct256_ftt0.6 --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/temp_analysis.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.001_fte5_nocond --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/temp_analysis.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.001_fte10_nocond --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/temp_analysis.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.01_fte5_nocond --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/temp_analysis.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.01_fte10_nocond --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
@REM python scripts/reports/temp_analysis.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.025_fte5_nocond --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/temp_analysis.py --results-dir D:\\archaiphilly\\phillytools\\fa_nb1_s3_fb256_ftlr0.025_fte10_nocond --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e01 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e02 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e04 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e06 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e08 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e10 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e20 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e30 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e108 --out-dir D:\\archai_experiment_reports
|
|
||||||
@REM python scripts/reports/analysis_regular_natsbench_space.py --results-dir D:\\archaiphilly\\phillytools\\nb101_reg_b256_e108_rms --out-dir D:\\archai_experiment_reports
|
|
||||||
|
|
||||||
|
|
||||||
@REM Local Search
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_ftt0.6_max300_ratio2.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_ftt0.6_max300_ratio4.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_ftt0.6_max300_ratio8.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_max300_b1024_e02_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_max300_b1024_e20_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_max300_b1024_e30_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_max300_b1024_e40_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_max300_b1024_e50_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar10
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_c100_ftt0.3_max300_ratio2.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_c100_ftt0.3_max300_ratio4.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_c100_ftt0.3_max300_ratio8.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_c100_max300_b1024_e02_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_c100_max300_b1024_e20_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_c100_max300_b1024_e30_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_c100_max300_b1024_e40_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_c100_max300_b1024_e50_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset cifar100
|
|
||||||
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_i16_ftt0.2_max300_ratio2.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_i16_ftt0.2_max300_ratio4.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_far_i16_ftt0.2_max300_ratio8.0_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_i16_max300_b1024_e02_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_i16_max300_b1024_e20_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_i16_max300_b1024_e30_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_i16_max300_b1024_e40_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
@REM python scripts/reports/fear_analysis/analysis_local_search_natsbench_tss.py --results-dir F:\\archaiphilly\\phillytools\\ls_reg_i16_max300_b1024_e50_fixedseeds --out-dir F:\\archai_experiment_reports --natsbench_loc C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9-simple --dataset ImageNet16-120
|
|
||||||
|
|
||||||
@REM Random Search on DARTS
|
|
||||||
|
|
||||||
python scripts/reports/fear_analysis/analysis_random_search_darts_reg.py --results-dir F:\\archaiphilly\\phillytools\\rs_darts_reg_e5 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_random_search_darts_reg.py --results-dir F:\\archaiphilly\\phillytools\\rs_darts_reg_e10 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_random_search_darts_reg.py --results-dir F:\\archaiphilly\\phillytools\\rs_darts_reg_e15 --out-dir F:\\archai_experiment_reports
|
|
||||||
|
|
||||||
python scripts/reports/fear_analysis/analysis_random_search_darts_far.py --results-dir F:\\archaiphilly\\phillytools\\rs_darts_far_ftt0.6_fte10_ratio_1.2 --out-dir F:\\archai_experiment_reports
|
|
||||||
python scripts/reports/fear_analysis/analysis_random_search_darts_far.py --results-dir F:\\archaiphilly\\phillytools\\rs_darts_far_ftt0.6_fte10_ratio_4.0 --out-dir F:\\archai_experiment_reports
|
|
|
@ -90,10 +90,8 @@ def main():
|
||||||
for dataset_key in ['dataset', 'dataset_search', 'dataset_eval']:
|
for dataset_key in ['dataset', 'dataset_search', 'dataset_eval']:
|
||||||
if dataset_key in conf:
|
if dataset_key in conf:
|
||||||
print(f'dataset_key: {dataset_key}')
|
print(f'dataset_key: {dataset_key}')
|
||||||
conf_data = conf[dataset_key]
|
conf_dataset = conf[dataset_key]
|
||||||
print('conf_data:')
|
untar_dataset(dataset_key, pt_data_dir, conf_dataset, args.dataroot)
|
||||||
print(conf_data)
|
|
||||||
untar_dataset(pt_data_dir, conf_data, args.dataroot)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Различия файлов скрыты, потому что одна или несколько строк слишком длинны
|
@ -1,40 +0,0 @@
|
||||||
# this is installed via pip
|
|
||||||
from nats_bench import create
|
|
||||||
|
|
||||||
from archai.algos.natsbench.lib.models import get_cell_based_tiny_net
|
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
# Create the API instance for the topology search space in NATS
|
|
||||||
api = create('/home/dedey/dataroot/natsbench/NATS-tss-v1_0-3ffb9-simple', 'tss', fast_mode=True, verbose=True)
|
|
||||||
|
|
||||||
# slow mode (NOTE: uses up 30GB RAM)
|
|
||||||
# api = create('C:\\Users\\dedey\\dataroot\\natsbench\\NATS-tss-v1_0-3ffb9.pickle.pbz2', 'tss', fast_mode=False, verbose=True)
|
|
||||||
|
|
||||||
# Query the loss / accuracy / time for n-th candidate architecture on CIFAR-10
|
|
||||||
# info is a dict, where you can easily figure out the meaning by key
|
|
||||||
info = api.get_more_info(100, 'cifar10')
|
|
||||||
|
|
||||||
# Query the flops, params, latency. info is a dict.
|
|
||||||
cost_info = api.get_cost_info(12, 'cifar10')
|
|
||||||
|
|
||||||
# Show information of an architecture index
|
|
||||||
# api.show(100)
|
|
||||||
|
|
||||||
# Query by index to get all runs individually (see paper appendix)
|
|
||||||
data = api.query_by_index(284, dataname='cifar10', hp='200')
|
|
||||||
data[777].train_acc1es[199]
|
|
||||||
|
|
||||||
info = api.get_more_info(1528, 'cifar10', hp=200, is_random=False)
|
|
||||||
|
|
||||||
# Create the instance of th 12-th candidate for CIFAR-10
|
|
||||||
config = api.get_net_config(12, 'cifar10')
|
|
||||||
# network is a nn.Module subclass. the last few modules have names
|
|
||||||
# lastact, lastact.0, lastact.1, global_pooling, classifier
|
|
||||||
# which we can freeze train as usual
|
|
||||||
network = get_cell_based_tiny_net(config)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,42 +0,0 @@
|
||||||
# this is installed via pip
|
|
||||||
from nats_bench import create
|
|
||||||
|
|
||||||
from archai.algos.natsbench.lib.models import get_cell_based_tiny_net
|
|
||||||
|
|
||||||
def main():
|
|
||||||
|
|
||||||
# Create the API instance for the topology search space in NATS
|
|
||||||
api = create('C:\\Users\\dedey\\dataroot\\natsbench\\NATS-sss-v1_0-50262-simple', 'sss', fast_mode=True, verbose=True)
|
|
||||||
|
|
||||||
# slow mode (NOTE: uses up lots of RAM)
|
|
||||||
# api = create('C:\\Users\\dedey\\dataroot\\natsbench\\NATS-sss-v1_0-50262.pickle.pbz2', 'tss', fast_mode=False, verbose=True)
|
|
||||||
|
|
||||||
# Query the loss / accuracy / time for n-th candidate architecture on CIFAR-10
|
|
||||||
# info is a dict, where you can easily figure out the meaning by key
|
|
||||||
info = api.get_more_info(100, 'cifar10')
|
|
||||||
|
|
||||||
# Query the flops, params, latency. info is a dict.
|
|
||||||
cost_info = api.get_cost_info(12, 'cifar10')
|
|
||||||
|
|
||||||
# Show information of an architecture index
|
|
||||||
# api.show(100)
|
|
||||||
|
|
||||||
# Query by index to get all runs individually (see paper appendix)
|
|
||||||
data = api.query_by_index(284, dataname='cifar10', hp='90')
|
|
||||||
data[777].train_acc1es[89]
|
|
||||||
|
|
||||||
info = api.get_more_info(1528, 'cifar10', hp=90, is_random=False)
|
|
||||||
|
|
||||||
# Create the instance of th 12-th candidate for CIFAR-10
|
|
||||||
config = api.get_net_config(12, 'cifar10')
|
|
||||||
# network is a nn.Module subclass. the last few modules have names
|
|
||||||
# lastact, lastact.0, lastact.1, global_pooling, classifier
|
|
||||||
# which we can freeze train as usual
|
|
||||||
network = get_cell_based_tiny_net(config)
|
|
||||||
|
|
||||||
print('Done')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,95 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import re
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from analysis_utils import epoch_nodes, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results from pt')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
job_count = 0
|
|
||||||
for job_dir in results_dir.iterdir():
|
|
||||||
job_count += 1
|
|
||||||
for subdir in job_dir.iterdir():
|
|
||||||
if not subdir.is_dir():
|
|
||||||
continue
|
|
||||||
# currently we expect that each job was ExperimentRunner job which should have
|
|
||||||
# _search or _eval folders
|
|
||||||
if subdir.stem.endswith('_search'):
|
|
||||||
sub_job = 'search'
|
|
||||||
elif subdir.stem.endswith('_eval'):
|
|
||||||
sub_job = 'eval'
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f'Sub directory "{subdir}" in job "{job_dir}" must '
|
|
||||||
'end with either _search or _eval which '
|
|
||||||
'should be the case if ExperimentRunner was used.')
|
|
||||||
|
|
||||||
logs_filepath = os.path.join(str(subdir), 'log.yaml')
|
|
||||||
if os.path.isfile(logs_filepath):
|
|
||||||
fix_yaml(logs_filepath)
|
|
||||||
with open(logs_filepath, 'r') as f:
|
|
||||||
key = job_dir.name + ':' + sub_job
|
|
||||||
logs[key] = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
# create list of epoch nodes having same path in the logs
|
|
||||||
grouped_logs = group_multi_runs(logs)
|
|
||||||
collated_grouped_logs = collect_epoch_nodes(grouped_logs)
|
|
||||||
summary_text, details_text = '', ''
|
|
||||||
|
|
||||||
for log_key, grouped_logs in collated_grouped_logs.items():
|
|
||||||
# for each path for epochs nodes, compute stats
|
|
||||||
for node_path, logs_epochs_nodes in grouped_logs.items():
|
|
||||||
collated_epoch_stats = get_epoch_stats(node_path, logs_epochs_nodes)
|
|
||||||
summary_text += get_summary_text(log_key, out_dir, node_path, collated_epoch_stats, len(logs_epochs_nodes))
|
|
||||||
details_text += get_details_text(log_key, out_dir, node_path, collated_epoch_stats, len(logs_epochs_nodes))
|
|
||||||
|
|
||||||
write_report('summary.md', **vars())
|
|
||||||
write_report('details.md', **vars())
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,171 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import re
|
|
||||||
<<<<<<< HEAD
|
|
||||||
from tqdm import tqdm
|
|
||||||
=======
|
|
||||||
>>>>>>> d6e6e107 (Added analysis scripts to compute kendall tau and spearman's correlation for proxynas experiments.)
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
import matplotlib
|
|
||||||
matplotlib.use('Agg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from analysis_utils import epoch_nodes, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results from pt')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
job_count = 0
|
|
||||||
<<<<<<< HEAD
|
|
||||||
for job_dir in tqdm(results_dir.iterdir()):
|
|
||||||
if job_dir.is_dir():
|
|
||||||
job_count += 1
|
|
||||||
for subdir in job_dir.iterdir():
|
|
||||||
if not subdir.is_dir():
|
|
||||||
continue
|
|
||||||
# currently we expect that each job was ExperimentRunner job which should have
|
|
||||||
# _search or _eval folders
|
|
||||||
if subdir.stem.endswith('_search'):
|
|
||||||
sub_job = 'search'
|
|
||||||
elif subdir.stem.endswith('_eval'):
|
|
||||||
sub_job = 'eval'
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f'Sub directory "{subdir}" in job "{job_dir}" must '
|
|
||||||
'end with either _search or _eval which '
|
|
||||||
'should be the case if ExperimentRunner was used.')
|
|
||||||
|
|
||||||
logs_filepath = os.path.join(str(subdir), 'log.yaml')
|
|
||||||
if os.path.isfile(logs_filepath):
|
|
||||||
fix_yaml(logs_filepath)
|
|
||||||
with open(logs_filepath, 'r') as f:
|
|
||||||
key = job_dir.name + ':' + sub_job
|
|
||||||
logs[key] = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
all_good = True
|
|
||||||
if 'eval_arch' not in logs[key].keys():
|
|
||||||
print(f'eval_arch not in {key}')
|
|
||||||
all_good = False
|
|
||||||
if 'freeze_evaluate' not in logs[key].keys():
|
|
||||||
print(f'freeze_evaluate not in {key}')
|
|
||||||
all_good = False
|
|
||||||
if all_good:
|
|
||||||
print(f'{key} is all good')
|
|
||||||
=======
|
|
||||||
for job_dir in results_dir.iterdir():
|
|
||||||
job_count += 1
|
|
||||||
for subdir in job_dir.iterdir():
|
|
||||||
if not subdir.is_dir():
|
|
||||||
continue
|
|
||||||
# currently we expect that each job was ExperimentRunner job which should have
|
|
||||||
# _search or _eval folders
|
|
||||||
if subdir.stem.endswith('_search'):
|
|
||||||
sub_job = 'search'
|
|
||||||
elif subdir.stem.endswith('_eval'):
|
|
||||||
sub_job = 'eval'
|
|
||||||
else:
|
|
||||||
raise RuntimeError(f'Sub directory "{subdir}" in job "{job_dir}" must '
|
|
||||||
'end with either _search or _eval which '
|
|
||||||
'should be the case if ExperimentRunner was used.')
|
|
||||||
|
|
||||||
logs_filepath = os.path.join(str(subdir), 'log.yaml')
|
|
||||||
if os.path.isfile(logs_filepath):
|
|
||||||
fix_yaml(logs_filepath)
|
|
||||||
with open(logs_filepath, 'r') as f:
|
|
||||||
key = job_dir.name + ':' + sub_job
|
|
||||||
logs[key] = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
>>>>>>> d6e6e107 (Added analysis scripts to compute kendall tau and spearman's correlation for proxynas experiments.)
|
|
||||||
|
|
||||||
|
|
||||||
# logs['proxynas_parameterless_seed_198980.3100969506_freeze_lr_0.001:eval']['eval_arch']['eval_train']['best_val']['top1']
|
|
||||||
# logs['proxynas_parameterless_seed_198980.3100969506_freeze_lr_0.001:eval']['freeze_evaluate']['eval_arch']['eval_train']['best_val']['top1']
|
|
||||||
all_reg_evals = []
|
|
||||||
all_freeze_evals = []
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'eval' in key:
|
|
||||||
<<<<<<< HEAD
|
|
||||||
try:
|
|
||||||
reg_eval_top1 = logs[key]['eval_arch']['eval_train']['best_val']['top1']
|
|
||||||
freeze_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['eval_train']['best_val']['top1']
|
|
||||||
|
|
||||||
all_reg_evals.append(reg_eval_top1)
|
|
||||||
all_freeze_evals.append(freeze_eval_top1)
|
|
||||||
except KeyError as err:
|
|
||||||
print(f'KeyError {err} in {key}')
|
|
||||||
=======
|
|
||||||
reg_eval_top1 = logs[key]['eval_arch']['eval_train']['best_val']['top1']
|
|
||||||
freeze_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['eval_train']['best_val']['top1']
|
|
||||||
|
|
||||||
all_reg_evals.append(reg_eval_top1)
|
|
||||||
all_freeze_evals.append(freeze_eval_top1)
|
|
||||||
>>>>>>> d6e6e107 (Added analysis scripts to compute kendall tau and spearman's correlation for proxynas experiments.)
|
|
||||||
|
|
||||||
tau, p_value = kendalltau(all_reg_evals, all_freeze_evals)
|
|
||||||
spe, sp_value = spearmanr(all_reg_evals, all_freeze_evals)
|
|
||||||
print(f'Kendall Tau score: {tau}, p_value {p_value}')
|
|
||||||
print(f'Spearman corr: {spe}, p_value {sp_value}')
|
|
||||||
<<<<<<< HEAD
|
|
||||||
results_savename = os.path.join(results_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'w') as f:
|
|
||||||
f.write(f'Kendall Tau score: {tau}, p_value {p_value}')
|
|
||||||
f.write(f'Spearman corr: {spe}, p_value {sp_value}')
|
|
||||||
|
|
||||||
=======
|
|
||||||
>>>>>>> d6e6e107 (Added analysis scripts to compute kendall tau and spearman's correlation for proxynas experiments.)
|
|
||||||
|
|
||||||
plt.scatter(all_reg_evals, all_freeze_evals)
|
|
||||||
plt.xlabel('Val top1 at 600 epochs')
|
|
||||||
plt.ylabel('Freeze training')
|
|
||||||
<<<<<<< HEAD
|
|
||||||
plt.title('Freeze training at 0.75 val top1 followed by 200 epochs')
|
|
||||||
savename = os.path.join(results_dir, 'proxynas_0.75_freeze_training_200_epochs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
|
|
||||||
=======
|
|
||||||
plt.title('Freeze training at 0.6 val top1 followed by 200 epochs')
|
|
||||||
plt.savefig('proxynas_0.6_freeze_training_200_epochs.png',
|
|
||||||
dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
>>>>>>> d6e6e107 (Added analysis scripts to compute kendall tau and spearman's correlation for proxynas experiments.)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -365,4 +365,4 @@ def write_report(template_filename:str, **kwargs)->None:
|
||||||
print(f'report written to: {outfilepath}')
|
print(f'report written to: {outfilepath}')
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
|
@ -1,57 +0,0 @@
|
||||||
import os
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
from collections import OrderedDict, defaultdict
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
import json
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, pearsonr
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--list-of-npys', '-l', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='txt file containing list of full path to npy files')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# load list of npys
|
|
||||||
with open(args.list_of_npys, 'r') as f:
|
|
||||||
list_data = f.readlines()
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = os.path.join(args.out_dir)
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# load all the npy files
|
|
||||||
npys = []
|
|
||||||
for l in list_data:
|
|
||||||
npys.append(np.load(l.rstrip()))
|
|
||||||
|
|
||||||
avg_heatmap = sum(npys)/len(npys)
|
|
||||||
|
|
||||||
ZEROCOST_MEASURES_PF = ['grad_norm', 'snip', 'grasp', 'fisher', 'jacob_cov', 'synflow', 'params', 'flops', 'gt']
|
|
||||||
|
|
||||||
fig = px.imshow(avg_heatmap, text_auto="0.1f", x=ZEROCOST_MEASURES_PF, y=ZEROCOST_MEASURES_PF)
|
|
||||||
fig.update_layout(font=dict(size=36)) # font size
|
|
||||||
savename_html = os.path.join(out_dir, f'avg_all_pairs_zc_spe.html')
|
|
||||||
savename_png = os.path.join(out_dir, f'avg_all_pairs_zc_spe.png')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
fig.write_image(savename_png, width=1500, height=1500, scale=1)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,208 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict, defaultdict
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, sem
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
#import matplotlib
|
|
||||||
#matplotlib.use('TkAgg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from archai.common.analysis_utils import epoch_nodes, parse_a_job, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Generates the darts space benchmark')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results from pt')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
confs = {}
|
|
||||||
job_dirs = list(results_dir.iterdir())
|
|
||||||
|
|
||||||
# # test single job parsing for debugging
|
|
||||||
# # WARNING: very slow, just use for debugging
|
|
||||||
# for job_dir in job_dirs:
|
|
||||||
# a = parse_a_job(job_dir)
|
|
||||||
|
|
||||||
# parallel parsing of yaml logs
|
|
||||||
num_workers = 60
|
|
||||||
with Pool(num_workers) as p:
|
|
||||||
a = p.map(parse_a_job, job_dirs)
|
|
||||||
|
|
||||||
for storage in a:
|
|
||||||
for key, val in storage.items():
|
|
||||||
logs[key] = val[0]
|
|
||||||
confs[key] = val[1]
|
|
||||||
|
|
||||||
# remove all search jobs
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if 'search' in key:
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
# check for problematic logs
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'best_test' not in logs[key]['regular_evaluate']['eval_arch']['eval_train']:
|
|
||||||
print(f'problem in {key}.')
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if '0' not in logs[key]['regular_evaluate']['eval_arch']['eval_train']['epochs']:
|
|
||||||
print(f'problem in {key}. missing training epoch 0 somehow.')
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if '99' not in logs[key]['regular_evaluate']['eval_arch']['eval_train']['epochs']:
|
|
||||||
print(f'problem in {key}. missing training epoch 99 somehow.')
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
|
|
||||||
archid_testacc = {}
|
|
||||||
archid_params = {}
|
|
||||||
archid_flops = {}
|
|
||||||
archid_trainacc_at_n_epoch = {}
|
|
||||||
n_epoch = '99'
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'eval' in key:
|
|
||||||
try:
|
|
||||||
dataset_name = confs[key]['dataset']['name']
|
|
||||||
if dataset_name == 'darcyflow':
|
|
||||||
test_acc = -logs[key]['regular_evaluate']['eval_arch']['eval_train']['best_test']['loss']
|
|
||||||
train_acc_at_n = -logs[key]['regular_evaluate']['eval_arch']['eval_train']['epochs'][n_epoch]['train']['loss']
|
|
||||||
else:
|
|
||||||
test_acc = logs[key]['regular_evaluate']['eval_arch']['eval_train']['best_test']['top1']
|
|
||||||
train_acc_at_n = logs[key]['regular_evaluate']['eval_arch']['eval_train']['epochs'][n_epoch]['train']['top1']
|
|
||||||
arch_id = confs[key]['nas']['eval']['dartsspace']['arch_index']
|
|
||||||
archid_testacc[arch_id] = test_acc
|
|
||||||
archid_trainacc_at_n_epoch[arch_id] = train_acc_at_n
|
|
||||||
|
|
||||||
# get the number of params if in logs (most have it unless the early part is missing)
|
|
||||||
if 'num_params' in logs[key]['regular_evaluate']['eval_arch']['eval_train']:
|
|
||||||
num_params = logs[key]['regular_evaluate']['eval_arch']['eval_train']['num_params']
|
|
||||||
archid_params[arch_id] = num_params
|
|
||||||
mega_flops_per_batch = logs[key]['regular_evaluate']['eval_arch']['eval_train']['mega_flops_per_batch']
|
|
||||||
archid_flops[arch_id] = mega_flops_per_batch
|
|
||||||
|
|
||||||
except KeyError as err:
|
|
||||||
print(f'KeyError {err} not in {key}!')
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
print(f'Number of archs in benchmark {len(archid_params)}')
|
|
||||||
|
|
||||||
# sanity check
|
|
||||||
assert len(archid_testacc) == len(archid_flops)
|
|
||||||
assert len(archid_testacc) == len(archid_params)
|
|
||||||
assert len(archid_testacc) == len(archid_trainacc_at_n_epoch)
|
|
||||||
|
|
||||||
# save accuracies
|
|
||||||
savename = os.path.join(out_dir, 'arch_id_test_accuracy.yaml')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(archid_testacc, f)
|
|
||||||
|
|
||||||
# save params flops
|
|
||||||
arch_id_params_flops = dict()
|
|
||||||
savename = os.path.join(out_dir, 'arch_id_params_flops.yaml')
|
|
||||||
for archid in archid_params.keys():
|
|
||||||
num_params = archid_params[archid]
|
|
||||||
num_flops = archid_flops[archid]
|
|
||||||
arch_id_params_flops[archid] = {'params': num_params, 'flops': num_flops}
|
|
||||||
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(arch_id_params_flops, f)
|
|
||||||
|
|
||||||
# plot test accuracy vs. number of params
|
|
||||||
# to see how the distribution looks
|
|
||||||
testaccs = []
|
|
||||||
params = []
|
|
||||||
flops = []
|
|
||||||
trainaccs = []
|
|
||||||
for archid in archid_params.keys():
|
|
||||||
num_params = archid_params[archid]
|
|
||||||
test_acc = archid_testacc[archid]
|
|
||||||
num_flops = archid_flops[archid]
|
|
||||||
trainacc = archid_trainacc_at_n_epoch[archid]
|
|
||||||
testaccs.append(test_acc)
|
|
||||||
params.append(num_params)
|
|
||||||
flops.append(num_flops)
|
|
||||||
trainaccs.append(trainacc)
|
|
||||||
|
|
||||||
fig = go.Figure()
|
|
||||||
fig.add_trace(go.Scatter(x=testaccs, y=params, mode='markers'))
|
|
||||||
fig.update_layout(xaxis_title="Test Accuracy",
|
|
||||||
yaxis_title="Parameters")
|
|
||||||
fig.update_layout(font=dict(size=36)) # font size
|
|
||||||
fig.update_traces(marker=dict(size=20)) # marker size
|
|
||||||
|
|
||||||
savename_html = os.path.join(out_dir, 'darts_space_params_vs_test_acc.html')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
|
|
||||||
savename_png = os.path.join(out_dir, 'darts_space_params_vs_test_acc.png')
|
|
||||||
fig.write_image(savename_png, width=1500, height=1500, scale=1)
|
|
||||||
|
|
||||||
# compute spearman correlation of #params vs. test accuracy
|
|
||||||
param_spe, param_sp_value = spearmanr(testaccs, params)
|
|
||||||
flop_spe, flop_sp_value = spearmanr(testaccs, flops)
|
|
||||||
print(f'Spearman correlation of #params vs. test accuracy is {param_spe}')
|
|
||||||
print(f'Spearman correlation of #flops vs. test accuracy is {flop_spe}')
|
|
||||||
savename = os.path.join(out_dir, 'darts_space_params_flops_spe.txt')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
f.write(f'Spe #params vs. test accuracy: {param_spe}')
|
|
||||||
f.write(f'Spe #flops vs. test accuracy: {flop_spe}')
|
|
||||||
|
|
||||||
# compute spearman correlation of training acc at 'n' epoch vs. test accuracy
|
|
||||||
n_epoch_spe, _ = spearmanr(testaccs, trainaccs)
|
|
||||||
print(f'Spe training acc at {n_epoch}: {n_epoch_spe}')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,209 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict, defaultdict
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, sem
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
#import matplotlib
|
|
||||||
#matplotlib.use('TkAgg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from archai.common.analysis_utils import epoch_nodes, parse_a_job, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Freeze Darts Space Experiments')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
parser.add_argument('--reg-evals-file', '-r', type=str,
|
|
||||||
help='yaml file which contains full evaluation results for every archid')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# regular full training file
|
|
||||||
with open(args.reg_evals_file, 'r') as f:
|
|
||||||
reg_evals_data = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
confs = {}
|
|
||||||
job_dirs = list(results_dir.iterdir())
|
|
||||||
|
|
||||||
# # test single job parsing for debugging
|
|
||||||
# # WARNING: very slow, just use for debugging
|
|
||||||
# for job_dir in job_dirs:
|
|
||||||
# a = parse_a_job(job_dir)
|
|
||||||
|
|
||||||
# parallel parsing of yaml logs
|
|
||||||
num_workers = 9
|
|
||||||
with Pool(num_workers) as p:
|
|
||||||
a = p.map(parse_a_job, job_dirs)
|
|
||||||
|
|
||||||
for storage in a:
|
|
||||||
for key, val in storage.items():
|
|
||||||
logs[key] = val[0]
|
|
||||||
confs[key] = val[1]
|
|
||||||
|
|
||||||
# remove all search jobs
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if 'search' in key:
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
all_reg_evals = []
|
|
||||||
all_freeze_evals_last = []
|
|
||||||
|
|
||||||
all_freeze_time_last = []
|
|
||||||
all_cond_time_last = []
|
|
||||||
all_partial_time_last = []
|
|
||||||
|
|
||||||
num_archs_unmet_cond = 0
|
|
||||||
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'eval' in key:
|
|
||||||
try:
|
|
||||||
# if at the end of conditional training train accuracy has not gone above target then don't consider it
|
|
||||||
# important to get this first
|
|
||||||
last_cond_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
use_val = confs[key]['nas']['eval']['trainer']['use_val']
|
|
||||||
threshold = confs[key]['nas']['eval']['trainer']['top1_acc_threshold']
|
|
||||||
if use_val:
|
|
||||||
val_or_train = 'val'
|
|
||||||
else:
|
|
||||||
val_or_train = 'train'
|
|
||||||
end_cond = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][last_cond_epoch_key][val_or_train]['top1']
|
|
||||||
if end_cond < threshold:
|
|
||||||
num_archs_unmet_cond += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# regular evaluation
|
|
||||||
# important to get this first since if an arch id is
|
|
||||||
# not in the benchmark we need to remove it from consideration
|
|
||||||
arch_id = confs[key]['nas']['eval']['dartsspace']['arch_index']
|
|
||||||
if arch_id not in list(reg_evals_data.keys()):
|
|
||||||
continue
|
|
||||||
reg_eval_top1 = reg_evals_data[arch_id]
|
|
||||||
all_reg_evals.append(reg_eval_top1)
|
|
||||||
|
|
||||||
# freeze evaluation
|
|
||||||
#--------------------
|
|
||||||
|
|
||||||
# at last epoch
|
|
||||||
last_freeze_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
freeze_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][last_freeze_epoch_key][val_or_train]['top1']
|
|
||||||
all_freeze_evals_last.append(freeze_eval_top1)
|
|
||||||
|
|
||||||
# collect duration for conditional training and freeze training
|
|
||||||
# NOTE: don't use val_or_train here since we are really interested in the duration of training
|
|
||||||
freeze_duration = 0.0
|
|
||||||
for epoch_key in logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']:
|
|
||||||
freeze_duration += logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][epoch_key]['train']['duration']
|
|
||||||
|
|
||||||
cond_duration = 0.0
|
|
||||||
for epoch_key in logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs']:
|
|
||||||
cond_duration += logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][epoch_key]['train']['duration']
|
|
||||||
|
|
||||||
all_freeze_time_last.append(freeze_duration + cond_duration)
|
|
||||||
all_cond_time_last.append(cond_duration)
|
|
||||||
all_partial_time_last.append(freeze_duration)
|
|
||||||
|
|
||||||
except KeyError as err:
|
|
||||||
print(f'KeyError {err} not in {key}!')
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
|
|
||||||
# Store some key numbers in results.txt
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'w') as f:
|
|
||||||
f.write(f'Number of archs which did not reach condition: {num_archs_unmet_cond} \n')
|
|
||||||
f.write(f'Total valid archs processed: {len(all_reg_evals)} \n')
|
|
||||||
|
|
||||||
print(f'Number of archs which did not reach condition: {num_archs_unmet_cond}')
|
|
||||||
print(f'Total valid archs processed: {len(all_reg_evals)}')
|
|
||||||
|
|
||||||
# Sanity check
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_evals_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_time_last)
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_time_last)
|
|
||||||
|
|
||||||
# scatter plot between time to threshold accuracy and regular evaluation
|
|
||||||
fig = px.scatter(x=all_cond_time_last, y=all_reg_evals, labels={'x': 'Time to reach threshold train accuracy (s)', 'y': 'Final Accuracy'})
|
|
||||||
fig.update_layout(font=dict(
|
|
||||||
size=48,
|
|
||||||
))
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'cond_time_vs_final_acc.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
savename_pdf = os.path.join(out_dir, 'cond_time_vs_final_acc.pdf')
|
|
||||||
fig.write_image(savename_pdf, engine="kaleido", width=1500, height=1500, scale=1)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
# histogram of training accuracies
|
|
||||||
fig = px.histogram(all_reg_evals, labels={'x': 'Test Accuracy', 'y': 'Counts'})
|
|
||||||
savename = os.path.join(out_dir, 'distribution_of_reg_evals.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
# Freeze training results at last epoch
|
|
||||||
freeze_tau, freeze_p_value = kendalltau(all_reg_evals, all_freeze_evals_last)
|
|
||||||
freeze_spe, freeze_sp_value = spearmanr(all_reg_evals, all_freeze_evals_last)
|
|
||||||
print(f'Freeze Kendall Tau score: {freeze_tau:3.03f}, p_value {freeze_p_value:3.03f}')
|
|
||||||
print(f'Freeze Spearman corr: {freeze_spe:3.03f}, p_value {freeze_sp_value:3.03f}')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Freeze Kendall Tau score: {freeze_tau:3.03f}, p_value {freeze_p_value:3.03f} \n')
|
|
||||||
f.write(f'Freeze Spearman corr: {freeze_spe:3.03f}, p_value {freeze_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(x=all_reg_evals, y=all_freeze_evals_last)
|
|
||||||
plt.xlabel('Test top1 at natsbench full training')
|
|
||||||
plt.ylabel('Freeze training')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_epochs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,488 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict, defaultdict
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, sem
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
#import matplotlib
|
|
||||||
#matplotlib.use('TkAgg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from archai.common.analysis_utils import epoch_nodes, parse_a_job, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results from pt')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
parser.add_argument('--reg-evals-file', '-r', type=str, default=None,
|
|
||||||
help='optional yaml file which contains full evaluation \
|
|
||||||
of architectures on new datasets not part of natsbench')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# if optional regular evaluation lookup file is provided
|
|
||||||
if args.reg_evals_file:
|
|
||||||
with open(args.reg_evals_file, 'r') as f:
|
|
||||||
reg_evals_data = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
confs = {}
|
|
||||||
job_dirs = list(results_dir.iterdir())
|
|
||||||
|
|
||||||
# # test single job parsing for debugging
|
|
||||||
# # WARNING: very slow, just use for debugging
|
|
||||||
# for job_dir in job_dirs:
|
|
||||||
# a = parse_a_job(job_dir)
|
|
||||||
|
|
||||||
# parallel parsing of yaml logs
|
|
||||||
num_workers = 12
|
|
||||||
with Pool(num_workers) as p:
|
|
||||||
a = p.map(parse_a_job, job_dirs)
|
|
||||||
|
|
||||||
for storage in a:
|
|
||||||
for key, val in storage.items():
|
|
||||||
logs[key] = val[0]
|
|
||||||
confs[key] = val[1]
|
|
||||||
|
|
||||||
# examples of accessing logs
|
|
||||||
# logs['proxynas_blahblah:eval']['naswotrain_evaluate']['eval_arch']['eval_train']['naswithouttraining']
|
|
||||||
# logs['proxynas_blahblah:eval']['regular_evaluate']['regtrainingtop1']
|
|
||||||
# logs['proxynas_blahblah:eval']['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']['9']['val']['top1']
|
|
||||||
# last_epoch_key = list(logs['proxynas_blahblah:eval']['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
# last_val_top1 = logs['proxynas_blahblah:eval']['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][last_epoch_key]['val']['top1']
|
|
||||||
# epoch_duration = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']['0']['train']['duration']
|
|
||||||
|
|
||||||
# remove all search jobs
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if 'search' in key:
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
# remove all arch_ids which did not finish
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
to_delete = False
|
|
||||||
|
|
||||||
# it might have died early
|
|
||||||
if 'freeze_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if 'naswotrain_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if 'regular_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if to_delete:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if 'freeze_training'not in list(logs[key]['freeze_evaluate']['eval_arch'].keys()):
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# freeze train may not have finished
|
|
||||||
num_freeze_epochs = confs[key]['nas']['eval']['freeze_trainer']['epochs']
|
|
||||||
last_freeze_epoch_key = int(list(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1])
|
|
||||||
if last_freeze_epoch_key != num_freeze_epochs - 1:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
|
|
||||||
all_arch_ids = []
|
|
||||||
|
|
||||||
all_reg_evals = []
|
|
||||||
|
|
||||||
all_naswotrain_evals = []
|
|
||||||
all_freeze_evals_last = []
|
|
||||||
all_cond_evals_last = []
|
|
||||||
|
|
||||||
all_freeze_flops_last = []
|
|
||||||
all_cond_flops_last = []
|
|
||||||
|
|
||||||
all_freeze_time_last = []
|
|
||||||
all_cond_time_last = []
|
|
||||||
all_partial_time_last = []
|
|
||||||
|
|
||||||
all_freeze_evals = defaultdict(list)
|
|
||||||
|
|
||||||
num_archs_unmet_cond = 0
|
|
||||||
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'eval' in key:
|
|
||||||
try:
|
|
||||||
|
|
||||||
# if at the end of conditional training train accuracy has not gone above target then don't consider it
|
|
||||||
last_cond_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
train_end_cond = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][last_cond_epoch_key]['train']['top1']
|
|
||||||
if train_end_cond < confs[key]['nas']['eval']['trainer']['train_top1_acc_threshold']:
|
|
||||||
num_archs_unmet_cond += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# regular evaluation
|
|
||||||
# important to get this first since if it is not
|
|
||||||
# available for non-benchmark datasets we need to
|
|
||||||
# remove it from consideration
|
|
||||||
# --------------------
|
|
||||||
if not args.reg_evals_file:
|
|
||||||
reg_eval_top1 = logs[key]['regular_evaluate']['regtrainingtop1']
|
|
||||||
else:
|
|
||||||
# lookup from the provided file since this dataset is not part of the
|
|
||||||
# benchmark and hence we have to provide the info separately
|
|
||||||
if 'natsbench' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['natsbench']['arch_index']
|
|
||||||
elif 'nasbench101' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['nasbench101']['arch_index']
|
|
||||||
|
|
||||||
if arch_id_in_bench not in list(reg_evals_data.keys()):
|
|
||||||
# if the dataset used is not part of the standard benchmark some of the architectures
|
|
||||||
# may not have full evaluation accuracies available. Remove them from consideration.
|
|
||||||
continue
|
|
||||||
reg_eval_top1 = reg_evals_data[arch_id_in_bench]
|
|
||||||
all_reg_evals.append(reg_eval_top1)
|
|
||||||
|
|
||||||
# freeze evaluation
|
|
||||||
#--------------------
|
|
||||||
|
|
||||||
# at last epoch
|
|
||||||
last_freeze_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
freeze_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][last_freeze_epoch_key]['train']['top1']
|
|
||||||
all_freeze_evals_last.append(freeze_eval_top1)
|
|
||||||
|
|
||||||
# collect evals at other epochs
|
|
||||||
for epoch in range(int(last_freeze_epoch_key)):
|
|
||||||
all_freeze_evals[epoch].append(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][str(epoch)]['train']['top1'])
|
|
||||||
|
|
||||||
# collect flops used for conditional training and freeze training
|
|
||||||
freeze_mega_flops_epoch = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['total_mega_flops_epoch']
|
|
||||||
freeze_mega_flops_used = freeze_mega_flops_epoch * int(last_freeze_epoch_key)
|
|
||||||
all_freeze_flops_last.append(freeze_mega_flops_used)
|
|
||||||
|
|
||||||
last_cond_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
cond_mega_flops_epoch = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['total_mega_flops_epoch']
|
|
||||||
cond_mega_flops_used = cond_mega_flops_epoch * int(last_cond_epoch_key)
|
|
||||||
all_cond_flops_last.append(cond_mega_flops_used)
|
|
||||||
|
|
||||||
# collect training error at end of conditional training
|
|
||||||
cond_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][last_cond_epoch_key]['train']['top1']
|
|
||||||
all_cond_evals_last.append(cond_eval_top1)
|
|
||||||
|
|
||||||
# collect duration for conditional training and freeze training
|
|
||||||
freeze_duration = 0.0
|
|
||||||
for epoch_key in logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']:
|
|
||||||
freeze_duration += logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][epoch_key]['train']['duration']
|
|
||||||
|
|
||||||
cond_duration = 0.0
|
|
||||||
for epoch_key in logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs']:
|
|
||||||
cond_duration += logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][epoch_key]['train']['duration']
|
|
||||||
|
|
||||||
all_freeze_time_last.append(freeze_duration + cond_duration)
|
|
||||||
all_cond_time_last.append(cond_duration)
|
|
||||||
all_partial_time_last.append(freeze_duration)
|
|
||||||
|
|
||||||
# naswotrain
|
|
||||||
# --------------
|
|
||||||
naswotrain_top1 = logs[key]['naswotrain_evaluate']['eval_arch']['eval_train']['naswithouttraining']
|
|
||||||
all_naswotrain_evals.append(naswotrain_top1)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# record the arch id
|
|
||||||
# --------------------
|
|
||||||
if 'natsbench' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['natsbench']['arch_index'])
|
|
||||||
elif 'nasbench101' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['nasbench101']['arch_index'])
|
|
||||||
|
|
||||||
except KeyError as err:
|
|
||||||
print(f'KeyError {err} not in {key}!')
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
|
|
||||||
# Store some key numbers in results.txt
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'w') as f:
|
|
||||||
f.write(f'Number of archs which did not reach condition: {num_archs_unmet_cond} \n')
|
|
||||||
f.write(f'Total valid archs processed: {len(all_reg_evals)} \n')
|
|
||||||
|
|
||||||
print(f'Number of archs which did not reach condition: {num_archs_unmet_cond}')
|
|
||||||
print(f'Total valid archs processed: {len(all_reg_evals)}')
|
|
||||||
|
|
||||||
# Sanity check
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_evals_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_evals_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_time_last)
|
|
||||||
assert len(all_reg_evals) == len(all_naswotrain_evals)
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_flops_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_flops_last)
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_time_last)
|
|
||||||
assert len(all_reg_evals) == len(all_arch_ids)
|
|
||||||
|
|
||||||
# scatter plot between time to threshold accuracy and regular evaluation
|
|
||||||
fig = px.scatter(x=all_cond_time_last, y=all_reg_evals, labels={'x': 'Time to reach threshold accuracy (s)', 'y': 'Final Accuracy'})
|
|
||||||
savename = os.path.join(out_dir, 'cond_time_vs_final_acc.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
fig = px.histogram(all_reg_evals, labels={'x': 'Test Accuracy', 'y': 'Counts'})
|
|
||||||
savename = os.path.join(out_dir, 'distribution_of_reg_evals.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
# Freeze training results at last epoch
|
|
||||||
freeze_tau, freeze_p_value = kendalltau(all_reg_evals, all_freeze_evals_last)
|
|
||||||
freeze_spe, freeze_sp_value = spearmanr(all_reg_evals, all_freeze_evals_last)
|
|
||||||
print(f'Freeze Kendall Tau score: {freeze_tau:3.03f}, p_value {freeze_p_value:3.03f}')
|
|
||||||
print(f'Freeze Spearman corr: {freeze_spe:3.03f}, p_value {freeze_sp_value:3.03f}')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Freeze Kendall Tau score: {freeze_tau:3.03f}, p_value {freeze_p_value:3.03f} \n')
|
|
||||||
f.write(f'Freeze Spearman corr: {freeze_spe:3.03f}, p_value {freeze_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(x=all_reg_evals, y=all_freeze_evals_last)
|
|
||||||
plt.xlabel('Test top1 at natsbench full training')
|
|
||||||
plt.ylabel('Freeze training')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_epochs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# Conditional training results at last epoch
|
|
||||||
cond_tau, cond_p_value = kendalltau(all_reg_evals, all_cond_evals_last)
|
|
||||||
cond_spe, cond_sp_value = spearmanr(all_reg_evals, all_cond_evals_last)
|
|
||||||
print(f'Conditional Kendall Tau score: {cond_tau:3.03f}, p_value {cond_p_value:3.03f}')
|
|
||||||
print(f'Conditional Spearman corr: {cond_spe:3.03f}, p_value {cond_sp_value:3.03f}')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Conditional Kendall Tau score: {cond_tau:3.03f}, p_value {cond_p_value:3.03f} \n')
|
|
||||||
f.write(f'Conditional Spearman corr: {cond_spe:3.03f}, p_value {cond_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(x=all_reg_evals, y=all_cond_evals_last)
|
|
||||||
plt.xlabel('Test top1 at natsbench full training')
|
|
||||||
plt.ylabel('Conditional training')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_cond_training_epochs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# Report average runtime and average flops consumed
|
|
||||||
total_freeze_flops = np.array(all_freeze_flops_last) + np.array(all_cond_flops_last)
|
|
||||||
avg_freeze_flops = np.mean(total_freeze_flops)
|
|
||||||
std_freeze_flops = np.std(total_freeze_flops)
|
|
||||||
stderr_freeze_flops = std_freeze_flops / np.sqrt(len(all_freeze_flops_last))
|
|
||||||
|
|
||||||
avg_freeze_runtime = np.mean(np.array(all_freeze_time_last))
|
|
||||||
std_freeze_runtime = np.std(np.array(all_freeze_time_last))
|
|
||||||
stderr_freeze_runtime = std_freeze_runtime / np.sqrt(len(all_freeze_time_last))
|
|
||||||
|
|
||||||
avg_cond_runtime = np.mean(np.array(all_cond_time_last))
|
|
||||||
std_cond_runtime = np.std(np.array(all_cond_time_last))
|
|
||||||
stderr_cond_runtime = std_cond_runtime / np.sqrt(len(all_cond_time_last))
|
|
||||||
|
|
||||||
avg_partial_runtime = np.mean(np.array(all_partial_time_last))
|
|
||||||
std_partial_runtime = np.std(np.array(all_partial_time_last))
|
|
||||||
stderr_partial_runtime = std_partial_runtime / np.sqrt(len(all_partial_time_last))
|
|
||||||
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Avg. Freeze MFlops: {avg_freeze_flops:.03f}, std {std_freeze_flops}, stderr {stderr_freeze_flops:.03f} \n')
|
|
||||||
f.write(f'Avg. Freeze Runtime: {avg_freeze_runtime:.03f}, std {std_freeze_runtime}, stderr {stderr_freeze_runtime:.03f} \n')
|
|
||||||
f.write(f'Avg. Conditional Runtime: {avg_cond_runtime:.03f}, std {std_cond_runtime}, stderr {stderr_cond_runtime:.03f} \n')
|
|
||||||
f.write(f'Avg. Partial Runtime: {avg_partial_runtime:.03f}, std {std_partial_runtime}, stderr {stderr_partial_runtime:.03f} \n')
|
|
||||||
|
|
||||||
# Plot freeze training rank correlations if cutoff at various epochs
|
|
||||||
freeze_taus = {}
|
|
||||||
freeze_spes = {}
|
|
||||||
for epoch_key in all_freeze_evals.keys():
|
|
||||||
tau, _ = kendalltau(all_reg_evals, all_freeze_evals[epoch_key])
|
|
||||||
spe, _ = spearmanr(all_reg_evals, all_freeze_evals[epoch_key])
|
|
||||||
freeze_taus[epoch_key] = tau
|
|
||||||
freeze_spes[epoch_key] = spe
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
for epoch_key in freeze_taus.keys():
|
|
||||||
plt.scatter(epoch_key, freeze_taus[epoch_key])
|
|
||||||
plt.xlabel('Epochs of freeze training')
|
|
||||||
plt.ylabel('Kendall Tau')
|
|
||||||
plt.ylim((0.0, 1.0))
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_kendall_taus.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
for epoch_key in freeze_taus.keys():
|
|
||||||
plt.scatter(epoch_key, freeze_spes[epoch_key])
|
|
||||||
plt.xlabel('Epochs of freeze training')
|
|
||||||
plt.ylabel('Spearman Correlation')
|
|
||||||
plt.ylim((0.0, 1.0))
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_spearman_corrs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
|
|
||||||
# Naswottraining results
|
|
||||||
naswot_tau, naswot_p_value = kendalltau(all_reg_evals, all_naswotrain_evals)
|
|
||||||
naswot_spe, naswot_sp_value = spearmanr(all_reg_evals, all_naswotrain_evals)
|
|
||||||
print(f'Naswotraining Kendall Tau score: {naswot_tau:3.03f}, p_value {naswot_p_value:3.03f}')
|
|
||||||
print(f'Naswotraining Spearman corr: {naswot_spe:3.03f}, p_value {naswot_sp_value:3.03f}')
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Naswotraining Kendall Tau score: {naswot_tau:3.03f}, p_value {naswot_p_value:3.03f} \n')
|
|
||||||
f.write(f'Naswotraining Spearman corr: {naswot_spe:3.03f}, p_value {naswot_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(all_reg_evals, all_naswotrain_evals)
|
|
||||||
plt.xlabel('Test top1 at 200 epochs')
|
|
||||||
plt.ylabel('Naswotraining')
|
|
||||||
plt.title('Naswotraining')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_naswotraining.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
|
|
||||||
# Rank correlations at top n percent of architectures
|
|
||||||
reg_freezelast_naswot_evals = [(all_reg_evals[i], all_freeze_evals_last[i], all_naswotrain_evals[i], all_freeze_time_last[i]) for i in range(len(all_reg_evals))]
|
|
||||||
|
|
||||||
# sort in descending order of accuracy of regular evaluation
|
|
||||||
reg_freezelast_naswot_evals.sort(key=lambda x: x[0], reverse=True)
|
|
||||||
|
|
||||||
top_percent_freeze_times_avg = []
|
|
||||||
top_percent_freeze_times_std = []
|
|
||||||
top_percent_freeze_times_stderr = []
|
|
||||||
|
|
||||||
spe_freeze_top_percents = []
|
|
||||||
spe_naswot_top_percents = []
|
|
||||||
top_percents = []
|
|
||||||
top_percent_range = range(2, 101, 2)
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(reg_freezelast_naswot_evals) * top_percent * 0.01))
|
|
||||||
top_percent_evals = reg_freezelast_naswot_evals[:num_to_keep]
|
|
||||||
top_percent_reg = [x[0] for x in top_percent_evals]
|
|
||||||
top_percent_freeze = [x[1] for x in top_percent_evals]
|
|
||||||
top_percent_naswot = [x[2] for x in top_percent_evals]
|
|
||||||
top_percent_freeze_times = [x[3] for x in top_percent_evals]
|
|
||||||
|
|
||||||
top_percent_freeze_times_avg.append(np.mean(np.array(top_percent_freeze_times)))
|
|
||||||
top_percent_freeze_times_std.append(np.std(np.array(top_percent_freeze_times)))
|
|
||||||
top_percent_freeze_times_stderr.append(sem(np.array(top_percent_freeze_times)))
|
|
||||||
|
|
||||||
spe_freeze, _ = spearmanr(top_percent_reg, top_percent_freeze)
|
|
||||||
spe_freeze_top_percents.append(spe_freeze)
|
|
||||||
spe_naswot, _ = spearmanr(top_percent_reg, top_percent_naswot)
|
|
||||||
spe_naswot_top_percents.append(spe_naswot)
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(top_percents, spe_freeze_top_percents)
|
|
||||||
sns.scatterplot(top_percents, spe_naswot_top_percents)
|
|
||||||
plt.legend(labels=['Freeze Train', 'Naswot'])
|
|
||||||
plt.ylim((0.0, 1.0))
|
|
||||||
plt.xlabel('Top percent of architectures')
|
|
||||||
plt.ylabel('Spearman Correlation')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, f'spe_top_archs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
plt.errorbar(top_percents, top_percent_freeze_times_avg, yerr=np.array(top_percent_freeze_times_std)/2, marker='s', mfc='red', ms=10, mew=4)
|
|
||||||
plt.xlabel('Top percent of architectures')
|
|
||||||
plt.ylabel('Avg. time (s)')
|
|
||||||
plt.yticks(np.arange(0,600, step=50))
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, f'freeze_train_duration_top_archs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# how much overlap in top x% of architectures between method and groundtruth
|
|
||||||
# ----------------------------------------------------------------------------
|
|
||||||
arch_id_reg_evals = [(arch_id, reg_eval) for arch_id, reg_eval in zip(all_arch_ids, all_reg_evals)]
|
|
||||||
arch_id_freezetrain_evals = [(arch_id, freeze_eval) for arch_id, freeze_eval in zip(all_arch_ids, all_freeze_evals_last)]
|
|
||||||
arch_id_naswot_evals = [(arch_id, naswot_eval) for arch_id, naswot_eval in zip(all_arch_ids, all_naswotrain_evals)]
|
|
||||||
|
|
||||||
arch_id_reg_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
arch_id_freezetrain_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
arch_id_naswot_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
assert len(arch_id_reg_evals) == len(arch_id_freezetrain_evals)
|
|
||||||
assert len(arch_id_reg_evals) == len(arch_id_naswot_evals)
|
|
||||||
|
|
||||||
top_percents = []
|
|
||||||
freezetrain_ratio_common = []
|
|
||||||
naswot_ratio_common = []
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(arch_id_reg_evals) * top_percent * 0.01))
|
|
||||||
top_percent_arch_id_reg_evals = arch_id_reg_evals[:num_to_keep]
|
|
||||||
top_percent_arch_id_freezetrain_evals = arch_id_freezetrain_evals[:num_to_keep]
|
|
||||||
top_percent_arch_id_naswot_evals = arch_id_naswot_evals[:num_to_keep]
|
|
||||||
|
|
||||||
# take the set of arch_ids in each method and find overlap with top archs
|
|
||||||
set_reg = set([x[0] for x in top_percent_arch_id_reg_evals])
|
|
||||||
set_ft = set([x[0] for x in top_percent_arch_id_freezetrain_evals])
|
|
||||||
ft_num_common = len(set_reg.intersection(set_ft))
|
|
||||||
freezetrain_ratio_common.append(ft_num_common/num_to_keep)
|
|
||||||
|
|
||||||
set_naswot = set([x[0] for x in top_percent_arch_id_naswot_evals])
|
|
||||||
naswot_num_common = len(set_reg.intersection(set_naswot))
|
|
||||||
naswot_ratio_common.append(naswot_num_common/num_to_keep)
|
|
||||||
|
|
||||||
# save raw data for other aggregate plots over experiments
|
|
||||||
raw_data_dict = {}
|
|
||||||
raw_data_dict['top_percents'] = top_percents
|
|
||||||
raw_data_dict['spe_freeze'] = spe_freeze_top_percents
|
|
||||||
raw_data_dict['spe_naswot'] = spe_naswot_top_percents
|
|
||||||
raw_data_dict['freeze_times_avg'] = top_percent_freeze_times_avg
|
|
||||||
raw_data_dict['freeze_times_std'] = top_percent_freeze_times_std
|
|
||||||
raw_data_dict['freeze_times_stderr'] = top_percent_freeze_times_stderr
|
|
||||||
raw_data_dict['freeze_ratio_common'] = freezetrain_ratio_common
|
|
||||||
raw_data_dict['naswot_ratio_common'] = naswot_ratio_common
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'raw_data.yaml')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(raw_data_dict, f)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,520 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict, defaultdict
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, sem
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
#import matplotlib
|
|
||||||
#matplotlib.use('TkAgg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from archai.common.analysis_utils import epoch_nodes, parse_a_job, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
parser.add_argument('--reg-evals-file', '-r', type=str, default=None,
|
|
||||||
help='optional yaml file which contains full evaluation \
|
|
||||||
of architectures on new datasets not part of natsbench')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# if optional regular evaluation lookup file is provided
|
|
||||||
if args.reg_evals_file:
|
|
||||||
with open(args.reg_evals_file, 'r') as f:
|
|
||||||
reg_evals_data = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
confs = {}
|
|
||||||
job_dirs = list(results_dir.iterdir())
|
|
||||||
|
|
||||||
# # test single job parsing for debugging
|
|
||||||
# # WARNING: very slow, just use for debugging
|
|
||||||
# for job_dir in job_dirs:
|
|
||||||
# a = parse_a_job(job_dir)
|
|
||||||
|
|
||||||
# parallel parsing of yaml logs
|
|
||||||
num_workers = 6
|
|
||||||
with Pool(num_workers) as p:
|
|
||||||
a = p.map(parse_a_job, job_dirs)
|
|
||||||
|
|
||||||
for storage in a:
|
|
||||||
for key, val in storage.items():
|
|
||||||
logs[key] = val[0]
|
|
||||||
confs[key] = val[1]
|
|
||||||
|
|
||||||
# examples of accessing logs
|
|
||||||
# logs['proxynas_blahblah:eval']['naswotrain_evaluate']['eval_arch']['eval_train']['naswithouttraining']
|
|
||||||
# logs['proxynas_blahblah:eval']['regular_evaluate']['regtrainingtop1']
|
|
||||||
# logs['proxynas_blahblah:eval']['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']['9']['val']['top1']
|
|
||||||
# last_epoch_key = list(logs['proxynas_blahblah:eval']['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
# last_val_top1 = logs['proxynas_blahblah:eval']['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][last_epoch_key]['val']['top1']
|
|
||||||
# epoch_duration = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']['0']['train']['duration']
|
|
||||||
|
|
||||||
# remove all search jobs
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if 'search' in key:
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
# remove all arch_ids which did not finish
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
to_delete = False
|
|
||||||
|
|
||||||
# it might have died early
|
|
||||||
if 'freeze_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if 'naswotrain_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if 'regular_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if to_delete:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if 'freeze_training'not in list(logs[key]['freeze_evaluate']['eval_arch'].keys()):
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# freeze train may not have finished
|
|
||||||
num_freeze_epochs = confs[key]['nas']['eval']['freeze_trainer']['epochs']
|
|
||||||
last_freeze_epoch_key = int(list(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1])
|
|
||||||
if last_freeze_epoch_key != num_freeze_epochs - 1:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
|
|
||||||
all_arch_ids = []
|
|
||||||
|
|
||||||
all_reg_evals = []
|
|
||||||
|
|
||||||
all_naswotrain_evals = []
|
|
||||||
all_freeze_evals_last = []
|
|
||||||
all_cond_evals_last = []
|
|
||||||
|
|
||||||
all_freeze_flops_last = []
|
|
||||||
all_cond_flops_last = []
|
|
||||||
|
|
||||||
all_freeze_time_last = []
|
|
||||||
all_cond_time_last = []
|
|
||||||
all_partial_time_last = []
|
|
||||||
|
|
||||||
all_freeze_evals = defaultdict(list)
|
|
||||||
|
|
||||||
num_archs_unmet_cond = 0
|
|
||||||
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'eval' in key:
|
|
||||||
try:
|
|
||||||
|
|
||||||
# if at the end of conditional training train accuracy has not gone above target then don't consider it
|
|
||||||
# important to get this first
|
|
||||||
last_cond_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
use_val = confs[key]['nas']['eval']['trainer']['use_val']
|
|
||||||
threshold = confs[key]['nas']['eval']['trainer']['top1_acc_threshold']
|
|
||||||
if use_val:
|
|
||||||
val_or_train = 'val'
|
|
||||||
else:
|
|
||||||
val_or_train = 'train'
|
|
||||||
end_cond = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][last_cond_epoch_key][val_or_train]['top1']
|
|
||||||
if end_cond < threshold:
|
|
||||||
num_archs_unmet_cond += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
# regular evaluation
|
|
||||||
# important to get this first since if it is not
|
|
||||||
# available for non-benchmark datasets we need to
|
|
||||||
# remove it from consideration
|
|
||||||
# --------------------
|
|
||||||
if not args.reg_evals_file:
|
|
||||||
reg_eval_top1 = logs[key]['regular_evaluate']['regtrainingtop1']
|
|
||||||
else:
|
|
||||||
# lookup from the provided file since this dataset is not part of the
|
|
||||||
# benchmark and hence we have to provide the info separately
|
|
||||||
if 'natsbench' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['natsbench']['arch_index']
|
|
||||||
elif 'nasbench101' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['nasbench101']['arch_index']
|
|
||||||
|
|
||||||
if arch_id_in_bench not in list(reg_evals_data.keys()):
|
|
||||||
# if the dataset used is not part of the standard benchmark some of the architectures
|
|
||||||
# may not have full evaluation accuracies available. Remove them from consideration.
|
|
||||||
continue
|
|
||||||
reg_eval_top1 = reg_evals_data[arch_id_in_bench]
|
|
||||||
all_reg_evals.append(reg_eval_top1)
|
|
||||||
|
|
||||||
# freeze evaluation
|
|
||||||
#--------------------
|
|
||||||
|
|
||||||
# at last epoch
|
|
||||||
last_freeze_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
freeze_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][last_freeze_epoch_key][val_or_train]['top1']
|
|
||||||
all_freeze_evals_last.append(freeze_eval_top1)
|
|
||||||
|
|
||||||
# collect evals at other epochs
|
|
||||||
for epoch in range(int(last_freeze_epoch_key)):
|
|
||||||
all_freeze_evals[epoch].append(logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][str(epoch)][val_or_train]['top1'])
|
|
||||||
|
|
||||||
# collect flops used for conditional training and freeze training
|
|
||||||
freeze_mega_flops_epoch = logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['total_mega_flops_epoch']
|
|
||||||
freeze_mega_flops_used = freeze_mega_flops_epoch * int(last_freeze_epoch_key)
|
|
||||||
all_freeze_flops_last.append(freeze_mega_flops_used)
|
|
||||||
|
|
||||||
last_cond_epoch_key = list(logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'].keys())[-1]
|
|
||||||
cond_mega_flops_epoch = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['total_mega_flops_epoch']
|
|
||||||
cond_mega_flops_used = cond_mega_flops_epoch * int(last_cond_epoch_key)
|
|
||||||
all_cond_flops_last.append(cond_mega_flops_used)
|
|
||||||
|
|
||||||
# collect training error at end of conditional training
|
|
||||||
cond_eval_top1 = logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][last_cond_epoch_key][val_or_train]['top1']
|
|
||||||
all_cond_evals_last.append(cond_eval_top1)
|
|
||||||
|
|
||||||
# collect duration for conditional training and freeze training
|
|
||||||
# NOTE: don't use val_or_train here since we are really interested in the duration of training
|
|
||||||
freeze_duration = 0.0
|
|
||||||
for epoch_key in logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs']:
|
|
||||||
freeze_duration += logs[key]['freeze_evaluate']['eval_arch']['freeze_training']['eval_train']['epochs'][epoch_key]['train']['duration']
|
|
||||||
|
|
||||||
cond_duration = 0.0
|
|
||||||
for epoch_key in logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs']:
|
|
||||||
cond_duration += logs[key]['freeze_evaluate']['eval_arch']['conditional_training']['eval_train']['epochs'][epoch_key]['train']['duration']
|
|
||||||
|
|
||||||
all_freeze_time_last.append(freeze_duration + cond_duration)
|
|
||||||
all_cond_time_last.append(cond_duration)
|
|
||||||
all_partial_time_last.append(freeze_duration)
|
|
||||||
|
|
||||||
# naswotrain
|
|
||||||
# --------------
|
|
||||||
naswotrain_top1 = logs[key]['naswotrain_evaluate']['eval_arch']['eval_train']['naswithouttraining']
|
|
||||||
all_naswotrain_evals.append(naswotrain_top1)
|
|
||||||
|
|
||||||
# record the arch id
|
|
||||||
# --------------------
|
|
||||||
if 'natsbench' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['natsbench']['arch_index'])
|
|
||||||
elif 'nasbench101' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['nasbench101']['arch_index'])
|
|
||||||
|
|
||||||
except KeyError as err:
|
|
||||||
print(f'KeyError {err} not in {key}!')
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
|
|
||||||
# Store some key numbers in results.txt
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'w') as f:
|
|
||||||
f.write(f'Number of archs which did not reach condition: {num_archs_unmet_cond} \n')
|
|
||||||
f.write(f'Total valid archs processed: {len(all_reg_evals)} \n')
|
|
||||||
|
|
||||||
print(f'Number of archs which did not reach condition: {num_archs_unmet_cond}')
|
|
||||||
print(f'Total valid archs processed: {len(all_reg_evals)}')
|
|
||||||
|
|
||||||
# Sanity check
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_evals_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_evals_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_time_last)
|
|
||||||
assert len(all_reg_evals) == len(all_naswotrain_evals)
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_flops_last)
|
|
||||||
assert len(all_reg_evals) == len(all_cond_flops_last)
|
|
||||||
assert len(all_reg_evals) == len(all_freeze_time_last)
|
|
||||||
assert len(all_reg_evals) == len(all_arch_ids)
|
|
||||||
|
|
||||||
# scatter plot between time to threshold accuracy and regular evaluation
|
|
||||||
fig = px.scatter(x=all_cond_time_last, y=all_reg_evals, labels={'x': 'Time to reach threshold train accuracy (s)', 'y': 'Final Accuracy'})
|
|
||||||
fig.update_layout(font=dict(
|
|
||||||
size=48,
|
|
||||||
))
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'cond_time_vs_final_acc.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
|
|
||||||
savename_pdf = os.path.join(out_dir, 'cond_time_vs_final_acc.pdf')
|
|
||||||
fig.write_image(savename_pdf, engine="kaleido", width=1500, height=1500, scale=1)
|
|
||||||
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
# scatter plot between time to threshold accuracy and fear evaluation
|
|
||||||
all_freeze_evals_last_scaled = [x*100.0 for x in all_freeze_evals_last]
|
|
||||||
fig = px.scatter(x=all_cond_time_last, y=all_freeze_evals_last_scaled, labels={'x': 'Time to reach threshold train accuracy (s)', 'y': 'FEAR Accuracy'})
|
|
||||||
fig.update_layout(font=dict(
|
|
||||||
size=48,
|
|
||||||
))
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'cond_time_vs_fear_acc.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
|
|
||||||
savename_pdf = os.path.join(out_dir, 'cond_time_vs_fear_acc.pdf')
|
|
||||||
fig.write_image(savename_pdf, engine="kaleido", width=1500, height=1500, scale=1)
|
|
||||||
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
|
|
||||||
# histogram of training accuracies
|
|
||||||
fig = px.histogram(all_reg_evals, labels={'x': 'Test Accuracy', 'y': 'Counts'})
|
|
||||||
savename = os.path.join(out_dir, 'distribution_of_reg_evals.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
# Freeze training results at last epoch
|
|
||||||
freeze_tau, freeze_p_value = kendalltau(all_reg_evals, all_freeze_evals_last)
|
|
||||||
freeze_spe, freeze_sp_value = spearmanr(all_reg_evals, all_freeze_evals_last)
|
|
||||||
print(f'Freeze Kendall Tau score: {freeze_tau:3.03f}, p_value {freeze_p_value:3.03f}')
|
|
||||||
print(f'Freeze Spearman corr: {freeze_spe:3.03f}, p_value {freeze_sp_value:3.03f}')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Freeze Kendall Tau score: {freeze_tau:3.03f}, p_value {freeze_p_value:3.03f} \n')
|
|
||||||
f.write(f'Freeze Spearman corr: {freeze_spe:3.03f}, p_value {freeze_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(x=all_reg_evals, y=all_freeze_evals_last)
|
|
||||||
plt.xlabel('Test top1 at natsbench full training')
|
|
||||||
plt.ylabel('Freeze training')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_epochs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# Conditional training results at last epoch
|
|
||||||
cond_tau, cond_p_value = kendalltau(all_reg_evals, all_cond_evals_last)
|
|
||||||
cond_spe, cond_sp_value = spearmanr(all_reg_evals, all_cond_evals_last)
|
|
||||||
print(f'Conditional Kendall Tau score: {cond_tau:3.03f}, p_value {cond_p_value:3.03f}')
|
|
||||||
print(f'Conditional Spearman corr: {cond_spe:3.03f}, p_value {cond_sp_value:3.03f}')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Conditional Kendall Tau score: {cond_tau:3.03f}, p_value {cond_p_value:3.03f} \n')
|
|
||||||
f.write(f'Conditional Spearman corr: {cond_spe:3.03f}, p_value {cond_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(x=all_reg_evals, y=all_cond_evals_last)
|
|
||||||
plt.xlabel('Test top1 at natsbench full training')
|
|
||||||
plt.ylabel('Conditional training')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_cond_training_epochs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# Report average runtime and average flops consumed
|
|
||||||
total_freeze_flops = np.array(all_freeze_flops_last) + np.array(all_cond_flops_last)
|
|
||||||
avg_freeze_flops = np.mean(total_freeze_flops)
|
|
||||||
std_freeze_flops = np.std(total_freeze_flops)
|
|
||||||
stderr_freeze_flops = std_freeze_flops / np.sqrt(len(all_freeze_flops_last))
|
|
||||||
|
|
||||||
avg_freeze_runtime = np.mean(np.array(all_freeze_time_last))
|
|
||||||
std_freeze_runtime = np.std(np.array(all_freeze_time_last))
|
|
||||||
stderr_freeze_runtime = std_freeze_runtime / np.sqrt(len(all_freeze_time_last))
|
|
||||||
|
|
||||||
avg_cond_runtime = np.mean(np.array(all_cond_time_last))
|
|
||||||
std_cond_runtime = np.std(np.array(all_cond_time_last))
|
|
||||||
stderr_cond_runtime = std_cond_runtime / np.sqrt(len(all_cond_time_last))
|
|
||||||
|
|
||||||
avg_partial_runtime = np.mean(np.array(all_partial_time_last))
|
|
||||||
std_partial_runtime = np.std(np.array(all_partial_time_last))
|
|
||||||
stderr_partial_runtime = std_partial_runtime / np.sqrt(len(all_partial_time_last))
|
|
||||||
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Avg. Freeze MFlops: {avg_freeze_flops:.03f}, std {std_freeze_flops}, stderr {stderr_freeze_flops:.03f} \n')
|
|
||||||
f.write(f'Avg. Freeze Runtime: {avg_freeze_runtime:.03f}, std {std_freeze_runtime}, stderr {stderr_freeze_runtime:.03f} \n')
|
|
||||||
f.write(f'Avg. Conditional Runtime: {avg_cond_runtime:.03f}, std {std_cond_runtime}, stderr {stderr_cond_runtime:.03f} \n')
|
|
||||||
f.write(f'Avg. Partial Runtime: {avg_partial_runtime:.03f}, std {std_partial_runtime}, stderr {stderr_partial_runtime:.03f} \n')
|
|
||||||
|
|
||||||
# Plot freeze training rank correlations if cutoff at various epochs
|
|
||||||
freeze_taus = {}
|
|
||||||
freeze_spes = {}
|
|
||||||
for epoch_key in all_freeze_evals.keys():
|
|
||||||
tau, _ = kendalltau(all_reg_evals, all_freeze_evals[epoch_key])
|
|
||||||
spe, _ = spearmanr(all_reg_evals, all_freeze_evals[epoch_key])
|
|
||||||
freeze_taus[epoch_key] = tau
|
|
||||||
freeze_spes[epoch_key] = spe
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
for epoch_key in freeze_taus.keys():
|
|
||||||
plt.scatter(epoch_key, freeze_taus[epoch_key])
|
|
||||||
plt.xlabel('Epochs of freeze training')
|
|
||||||
plt.ylabel('Kendall Tau')
|
|
||||||
plt.ylim((-1.0, 1.0))
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_kendall_taus.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
for epoch_key in freeze_taus.keys():
|
|
||||||
plt.scatter(epoch_key, freeze_spes[epoch_key])
|
|
||||||
plt.xlabel('Epochs of freeze training')
|
|
||||||
plt.ylabel('Spearman Correlation')
|
|
||||||
plt.ylim((-1.0, 1.0))
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_freeze_training_spearman_corrs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
|
|
||||||
# Naswottraining results
|
|
||||||
naswot_tau, naswot_p_value = kendalltau(all_reg_evals, all_naswotrain_evals)
|
|
||||||
naswot_spe, naswot_sp_value = spearmanr(all_reg_evals, all_naswotrain_evals)
|
|
||||||
print(f'Naswotraining Kendall Tau score: {naswot_tau:3.03f}, p_value {naswot_p_value:3.03f}')
|
|
||||||
print(f'Naswotraining Spearman corr: {naswot_spe:3.03f}, p_value {naswot_sp_value:3.03f}')
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Naswotraining Kendall Tau score: {naswot_tau:3.03f}, p_value {naswot_p_value:3.03f} \n')
|
|
||||||
f.write(f'Naswotraining Spearman corr: {naswot_spe:3.03f}, p_value {naswot_sp_value:3.03f} \n')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(all_reg_evals, all_naswotrain_evals)
|
|
||||||
plt.xlabel('Test top1 at 200 epochs')
|
|
||||||
plt.ylabel('Naswotraining')
|
|
||||||
plt.title('Naswotraining')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, 'proxynas_naswotraining.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# Rank correlations at top n percent of architectures
|
|
||||||
#-----------------------------------------------------
|
|
||||||
reg_freezelast_naswot_evals = [(all_reg_evals[i], all_freeze_evals_last[i], all_naswotrain_evals[i], all_freeze_time_last[i]) for i in range(len(all_reg_evals))]
|
|
||||||
|
|
||||||
# sort in descending order of accuracy of regular evaluation
|
|
||||||
reg_freezelast_naswot_evals.sort(key=lambda x: x[0], reverse=True)
|
|
||||||
|
|
||||||
top_percent_freeze_times_avg = []
|
|
||||||
top_percent_freeze_times_std = []
|
|
||||||
top_percent_freeze_times_stderr = []
|
|
||||||
|
|
||||||
spe_freeze_top_percents = []
|
|
||||||
spe_naswot_top_percents = []
|
|
||||||
top_percents = []
|
|
||||||
top_percent_range = range(2, 101, 2)
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(reg_freezelast_naswot_evals) * top_percent * 0.01))
|
|
||||||
top_percent_evals = reg_freezelast_naswot_evals[:num_to_keep]
|
|
||||||
top_percent_reg = [x[0] for x in top_percent_evals]
|
|
||||||
top_percent_freeze = [x[1] for x in top_percent_evals]
|
|
||||||
top_percent_naswot = [x[2] for x in top_percent_evals]
|
|
||||||
top_percent_freeze_times = [x[3] for x in top_percent_evals]
|
|
||||||
|
|
||||||
top_percent_freeze_times_avg.append(np.mean(np.array(top_percent_freeze_times)))
|
|
||||||
top_percent_freeze_times_std.append(np.std(np.array(top_percent_freeze_times)))
|
|
||||||
top_percent_freeze_times_stderr.append(sem(np.array(top_percent_freeze_times)))
|
|
||||||
|
|
||||||
spe_freeze, _ = spearmanr(top_percent_reg, top_percent_freeze)
|
|
||||||
spe_freeze_top_percents.append(spe_freeze)
|
|
||||||
spe_naswot, _ = spearmanr(top_percent_reg, top_percent_naswot)
|
|
||||||
spe_naswot_top_percents.append(spe_naswot)
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
sns.scatterplot(top_percents, spe_freeze_top_percents)
|
|
||||||
sns.scatterplot(top_percents, spe_naswot_top_percents)
|
|
||||||
plt.legend(labels=['Freeze Train', 'Naswot'])
|
|
||||||
plt.ylim((-1.0, 1.0))
|
|
||||||
plt.xlabel('Top percent of architectures')
|
|
||||||
plt.ylabel('Spearman Correlation')
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, f'spe_top_archs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
plt.clf()
|
|
||||||
plt.errorbar(top_percents, top_percent_freeze_times_avg, yerr=np.array(top_percent_freeze_times_std)/2, marker='s', mfc='red', ms=10, mew=4)
|
|
||||||
plt.xlabel('Top percent of architectures')
|
|
||||||
plt.ylabel('Avg. time (s)')
|
|
||||||
plt.yticks(np.arange(0,600, step=50))
|
|
||||||
plt.grid()
|
|
||||||
savename = os.path.join(out_dir, f'freeze_train_duration_top_archs.png')
|
|
||||||
plt.savefig(savename, dpi=plt.gcf().dpi, bbox_inches='tight')
|
|
||||||
|
|
||||||
# how much overlap in top x% of architectures between method and groundtruth
|
|
||||||
# ----------------------------------------------------------------------------
|
|
||||||
arch_id_reg_evals = [(arch_id, reg_eval) for arch_id, reg_eval in zip(all_arch_ids, all_reg_evals)]
|
|
||||||
arch_id_freezetrain_evals = [(arch_id, freeze_eval) for arch_id, freeze_eval in zip(all_arch_ids, all_freeze_evals_last)]
|
|
||||||
arch_id_naswot_evals = [(arch_id, naswot_eval) for arch_id, naswot_eval in zip(all_arch_ids, all_naswotrain_evals)]
|
|
||||||
|
|
||||||
arch_id_reg_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
arch_id_freezetrain_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
arch_id_naswot_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
assert len(arch_id_reg_evals) == len(arch_id_freezetrain_evals)
|
|
||||||
assert len(arch_id_reg_evals) == len(arch_id_naswot_evals)
|
|
||||||
|
|
||||||
top_percents = []
|
|
||||||
freezetrain_ratio_common = []
|
|
||||||
naswot_ratio_common = []
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(arch_id_reg_evals) * top_percent * 0.01))
|
|
||||||
top_percent_arch_id_reg_evals = arch_id_reg_evals[:num_to_keep]
|
|
||||||
top_percent_arch_id_freezetrain_evals = arch_id_freezetrain_evals[:num_to_keep]
|
|
||||||
top_percent_arch_id_naswot_evals = arch_id_naswot_evals[:num_to_keep]
|
|
||||||
|
|
||||||
# take the set of arch_ids in each method and find overlap with top archs
|
|
||||||
set_reg = set([x[0] for x in top_percent_arch_id_reg_evals])
|
|
||||||
set_ft = set([x[0] for x in top_percent_arch_id_freezetrain_evals])
|
|
||||||
ft_num_common = len(set_reg.intersection(set_ft))
|
|
||||||
freezetrain_ratio_common.append(ft_num_common/num_to_keep)
|
|
||||||
|
|
||||||
set_naswot = set([x[0] for x in top_percent_arch_id_naswot_evals])
|
|
||||||
naswot_num_common = len(set_reg.intersection(set_naswot))
|
|
||||||
naswot_ratio_common.append(naswot_num_common/num_to_keep)
|
|
||||||
|
|
||||||
# save raw data for other aggregate plots over experiments
|
|
||||||
raw_data_dict = {}
|
|
||||||
raw_data_dict['top_percents'] = top_percents
|
|
||||||
raw_data_dict['spe_freeze'] = spe_freeze_top_percents
|
|
||||||
raw_data_dict['spe_naswot'] = spe_naswot_top_percents
|
|
||||||
raw_data_dict['freeze_times_avg'] = top_percent_freeze_times_avg
|
|
||||||
raw_data_dict['freeze_times_std'] = top_percent_freeze_times_std
|
|
||||||
raw_data_dict['freeze_times_stderr'] = top_percent_freeze_times_stderr
|
|
||||||
raw_data_dict['freeze_ratio_common'] = freezetrain_ratio_common
|
|
||||||
raw_data_dict['naswot_ratio_common'] = naswot_ratio_common
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'raw_data.yaml')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(raw_data_dict, f)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,57 +0,0 @@
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
from typing import List
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
|
|
||||||
def find_train_thresh_epochs(train_acc:List[float], train_thresh:float)->int:
|
|
||||||
for i, t in enumerate(train_acc):
|
|
||||||
if t >= train_thresh:
|
|
||||||
return i
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Nasbench301 time to threshold vs. test accuracy')
|
|
||||||
parser.add_argument('--nb301-logs-dir', '-d', type=str, help='folder with nasbench301 architecture training logs')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports', help='folder to output reports')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
train_thresh = 60.0
|
|
||||||
|
|
||||||
timetothresh_vs_test_acc = []
|
|
||||||
|
|
||||||
# collect all the json file names in the log dir recursively
|
|
||||||
for root, dir, files in os.walk(args.nb301_logs_dir):
|
|
||||||
for name in tqdm(files):
|
|
||||||
log_name = os.path.join(root, name)
|
|
||||||
with open(log_name, 'r') as f:
|
|
||||||
log_data = json.load(f)
|
|
||||||
num_epochs = len(log_data['learning_curves']['Train/train_accuracy'])
|
|
||||||
test_acc = log_data['test_accuracy']
|
|
||||||
per_epoch_time = log_data['runtime'] / num_epochs
|
|
||||||
num_epochs_to_thresh = find_train_thresh_epochs(log_data['learning_curves']['Train/train_accuracy'],
|
|
||||||
train_thresh)
|
|
||||||
# many weak architectures will never reach threshold
|
|
||||||
if not num_epochs_to_thresh:
|
|
||||||
continue
|
|
||||||
time_to_thresh = per_epoch_time * num_epochs_to_thresh
|
|
||||||
timetothresh_vs_test_acc.append((time_to_thresh, test_acc))
|
|
||||||
|
|
||||||
# plot
|
|
||||||
fig = go.Figure()
|
|
||||||
xs = [timetothresh for timetothresh, testacc in timetothresh_vs_test_acc]
|
|
||||||
ys = [testacc for timetothresh, testacc in timetothresh_vs_test_acc]
|
|
||||||
fig.add_trace(go.Scatter(x=xs, y=ys, mode='markers'))
|
|
||||||
fig.update_layout(xaxis_title='Time to reach threshold train accuracy (s)',
|
|
||||||
yaxis_title='Final Accuracy')
|
|
||||||
savename_html = os.path.join(args.out_dir, 'nasbench301_timetothresh_vs_testacc.html')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,168 +0,0 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
|
||||||
# Licensed under the MIT license.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict, defaultdict, namedtuple
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
#import matplotlib
|
|
||||||
#matplotlib.use('TkAgg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from collections import namedtuple
|
|
||||||
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from archai.common.analysis_utils import epoch_nodes, parse_a_job, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results from pt')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
confs = {}
|
|
||||||
job_dirs = list(results_dir.iterdir())
|
|
||||||
|
|
||||||
# # test single job parsing for debugging
|
|
||||||
# # WARNING: very slow, just use for debugging
|
|
||||||
# for job_dir in job_dirs:
|
|
||||||
# a = parse_a_job(job_dir)
|
|
||||||
|
|
||||||
# parallel parsing of yaml logs
|
|
||||||
num_workers = 60
|
|
||||||
with Pool(num_workers) as p:
|
|
||||||
a = p.map(parse_a_job, job_dirs)
|
|
||||||
|
|
||||||
for storage in a:
|
|
||||||
for key, val in storage.items():
|
|
||||||
logs[key] = val[0]
|
|
||||||
confs[key] = val[1]
|
|
||||||
|
|
||||||
# remove all search jobs
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if 'search' in key:
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
# remove all arch_ids which did not finish
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
to_delete = False
|
|
||||||
|
|
||||||
if 'eval_arch' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if to_delete:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# eval_arch may not have finished
|
|
||||||
num_epochs = confs[key]['nas']['eval']['trainer']['epochs']
|
|
||||||
last_epoch_key = int(list(logs[key]['eval_arch']['eval_train']['epochs'].keys())[-1])
|
|
||||||
if last_epoch_key != num_epochs - 1:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
|
|
||||||
# create a dict with arch_id: regular eval score as entries
|
|
||||||
# and save since synthetic cifar10 or other new datasets
|
|
||||||
# are not part of the benchmark
|
|
||||||
arch_id_reg_eval = {}
|
|
||||||
arch_id_params_flops = {}
|
|
||||||
arch_id_trainacc_at_n_epoch = {}
|
|
||||||
n_epoch = '3'
|
|
||||||
|
|
||||||
for key in logs.keys():
|
|
||||||
arch_id = confs[key]['nas']['eval']['natsbench']['arch_index']
|
|
||||||
reg_eval = logs[key]['eval_arch']['eval_train']['best_test']['top1']
|
|
||||||
train_acc_at_n = logs[key]['eval_arch']['eval_train']['epochs'][n_epoch]['train']['top1']
|
|
||||||
num_params = logs[key]['eval_arch']['eval_train']['num_params']
|
|
||||||
mega_flops_per_batch = logs[key]['eval_arch']['eval_train']['mega_flops_per_batch']
|
|
||||||
# store
|
|
||||||
arch_id_reg_eval[arch_id] = reg_eval
|
|
||||||
arch_id_trainacc_at_n_epoch[arch_id] = train_acc_at_n
|
|
||||||
arch_id_params_flops[arch_id] = {'params': num_params, 'flops': mega_flops_per_batch}
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'arch_id_test_accuracy_synthetic_cifar10.yaml')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(arch_id_reg_eval, f)
|
|
||||||
|
|
||||||
savename = os.path.join(out_dir, 'arch_id_params_flops.yaml')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(arch_id_params_flops, f)
|
|
||||||
|
|
||||||
# now create a list of regular evaluation and corresponding synflow scores
|
|
||||||
# to compute spearman's correlation
|
|
||||||
all_reg_evals = []
|
|
||||||
all_epochs_at_n = []
|
|
||||||
for arch_id in arch_id_reg_eval.keys():
|
|
||||||
all_reg_evals.append(arch_id_reg_eval[arch_id])
|
|
||||||
all_epochs_at_n.append(arch_id_trainacc_at_n_epoch[arch_id])
|
|
||||||
|
|
||||||
spe_epochs_at_n, _ = spearmanr(all_reg_evals, all_epochs_at_n)
|
|
||||||
print(f'Spearman corr. {n_epoch}: {spe_epochs_at_n}')
|
|
||||||
|
|
||||||
print(f'num valid architectures used for analysis {len(logs)}')
|
|
||||||
|
|
||||||
# plot histogram of regular evaluation scores
|
|
||||||
fig = px.histogram(all_reg_evals, labels={'x': 'Test Accuracy', 'y': 'Counts'})
|
|
||||||
savename = os.path.join(out_dir, 'distribution_of_test_accuracies.html')
|
|
||||||
fig.write_html(savename)
|
|
||||||
#fig.show()
|
|
||||||
|
|
||||||
# plot histogram of training scores
|
|
||||||
all_train_accs = []
|
|
||||||
for key in logs.keys():
|
|
||||||
train_acc = logs[key]['eval_arch']['eval_train']['best_train']['top1']
|
|
||||||
all_train_accs.append(train_acc)
|
|
||||||
|
|
||||||
fig1 = px.histogram(all_train_accs, labels={'x': 'Train Accuracy', 'y': 'Counts'})
|
|
||||||
savename = os.path.join(out_dir, 'distribution_of_train_accuracies.html')
|
|
||||||
fig1.write_html(savename)
|
|
||||||
#fig1.show()
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,360 +0,0 @@
|
||||||
import sys
|
|
||||||
import argparse
|
|
||||||
from typing import Dict, List, Type, Iterator, Tuple
|
|
||||||
import glob
|
|
||||||
import os
|
|
||||||
import pathlib
|
|
||||||
from collections import OrderedDict, defaultdict
|
|
||||||
from scipy.stats.stats import _two_sample_transform
|
|
||||||
import yaml
|
|
||||||
from inspect import getsourcefile
|
|
||||||
import seaborn as sns
|
|
||||||
import math as ma
|
|
||||||
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, pearsonr
|
|
||||||
|
|
||||||
from runstats import Statistics
|
|
||||||
|
|
||||||
#import matplotlib
|
|
||||||
#matplotlib.use('TkAgg')
|
|
||||||
import seaborn as sns
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from multiprocessing import Pool
|
|
||||||
from collections import namedtuple
|
|
||||||
from itertools import product
|
|
||||||
|
|
||||||
|
|
||||||
from archai.common import utils
|
|
||||||
from archai.common.ordereddict_logger import OrderedDictLogger
|
|
||||||
from archai.common.analysis_utils import epoch_nodes, parse_a_job, fix_yaml, remove_seed_part, group_multi_runs, collect_epoch_nodes, EpochStats, FoldStats, stat2str, get_epoch_stats, get_summary_text, get_details_text, plot_epochs, write_report
|
|
||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
ZEROCOST_MEASURES = ['grad_norm', 'snip', 'grasp', 'fisher', 'jacob_cov', 'plain', 'synflow', 'synflow_bn']
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Report creator')
|
|
||||||
parser.add_argument('--results-dir', '-d', type=str,
|
|
||||||
default=r'~/logdir/proxynas_test_0001',
|
|
||||||
help='folder with experiment results from pt')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports',
|
|
||||||
help='folder to output reports')
|
|
||||||
parser.add_argument('--reg-evals-file', '-r', type=str, default=None,
|
|
||||||
help='optional yaml file which contains full evaluation \
|
|
||||||
of architectures on new datasets not part of natsbench')
|
|
||||||
parser.add_argument('--params-flops-file', '-p', type=str, default=None,
|
|
||||||
help='optional yaml file which contains params flops information \
|
|
||||||
of architectures on new datasets not part of natsbench')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
# root dir where all results are stored
|
|
||||||
results_dir = pathlib.Path(utils.full_path(args.results_dir))
|
|
||||||
print(f'results_dir: {results_dir}')
|
|
||||||
|
|
||||||
# extract experiment name which is top level directory
|
|
||||||
exp_name = results_dir.parts[-1]
|
|
||||||
|
|
||||||
# create results dir for experiment
|
|
||||||
out_dir = utils.full_path(os.path.join(args.out_dir, exp_name))
|
|
||||||
print(f'out_dir: {out_dir}')
|
|
||||||
os.makedirs(out_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# if optional regular evaluation lookup file is provided
|
|
||||||
reg_evals_data = None
|
|
||||||
if args.reg_evals_file:
|
|
||||||
with open(args.reg_evals_file, 'r') as f:
|
|
||||||
reg_evals_data = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
# if optional params flops lookup file is provided
|
|
||||||
params_flops_data = None
|
|
||||||
if args.params_flops_file:
|
|
||||||
with open(args.params_flops_file, 'r') as f:
|
|
||||||
params_flops_data = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
# get list of all structured logs for each job
|
|
||||||
logs = {}
|
|
||||||
confs = {}
|
|
||||||
job_dirs = list(results_dir.iterdir())
|
|
||||||
|
|
||||||
# # test single job parsing for debugging
|
|
||||||
# # WARNING: very slow, just use for debugging
|
|
||||||
# for job_dir in job_dirs:
|
|
||||||
# a = parse_a_job(job_dir)
|
|
||||||
|
|
||||||
# parallel parsing of yaml logs
|
|
||||||
num_workers = 48
|
|
||||||
with Pool(num_workers) as p:
|
|
||||||
a = p.map(parse_a_job, job_dirs)
|
|
||||||
|
|
||||||
for storage in a:
|
|
||||||
for key, val in storage.items():
|
|
||||||
logs[key] = val[0]
|
|
||||||
confs[key] = val[1]
|
|
||||||
|
|
||||||
|
|
||||||
# remove all search jobs
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
if 'search' in key:
|
|
||||||
logs.pop(key)
|
|
||||||
|
|
||||||
# remove all arch_ids which did not finish
|
|
||||||
for key in list(logs.keys()):
|
|
||||||
to_delete = False
|
|
||||||
|
|
||||||
# it might have died early
|
|
||||||
if 'zerocost_evaluate' not in list(logs[key].keys()):
|
|
||||||
to_delete = True
|
|
||||||
|
|
||||||
if to_delete:
|
|
||||||
print(f'arch id {key} did not finish. removing from calculations.')
|
|
||||||
logs.pop(key)
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
|
||||||
all_arch_ids = []
|
|
||||||
all_reg_evals = []
|
|
||||||
all_params_evals = []
|
|
||||||
all_flops_evals = []
|
|
||||||
all_zerocost_init_evals = defaultdict(list)
|
|
||||||
|
|
||||||
for key in logs.keys():
|
|
||||||
if 'eval' in key:
|
|
||||||
try:
|
|
||||||
# regular evaluation
|
|
||||||
# important to get this first since if it is not
|
|
||||||
# available for non-benchmark datasets we need to
|
|
||||||
# remove it from consideration
|
|
||||||
# --------------------
|
|
||||||
if not args.reg_evals_file:
|
|
||||||
reg_eval_top1 = logs[key]['regular_evaluate']['regtrainingtop1']
|
|
||||||
else:
|
|
||||||
# lookup from the provided file since this dataset is not part of the
|
|
||||||
# benchmark and hence we have to provide the info separately
|
|
||||||
if 'natsbench' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['natsbench']['arch_index']
|
|
||||||
elif 'nasbench101' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['nasbench101']['arch_index']
|
|
||||||
elif 'dartsspace' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
arch_id_in_bench = confs[key]['nas']['eval']['dartsspace']['arch_index']
|
|
||||||
|
|
||||||
if arch_id_in_bench not in list(reg_evals_data.keys()):
|
|
||||||
# if the dataset used is not part of the standard benchmark some of the architectures
|
|
||||||
# may not have full evaluation accuracies available. Remove them from consideration.
|
|
||||||
continue
|
|
||||||
reg_eval_top1 = reg_evals_data[arch_id_in_bench]
|
|
||||||
|
|
||||||
if params_flops_data:
|
|
||||||
params = params_flops_data[arch_id_in_bench]['params']
|
|
||||||
flops = params_flops_data[arch_id_in_bench]['flops']
|
|
||||||
|
|
||||||
all_reg_evals.append(reg_eval_top1)
|
|
||||||
all_params_evals.append(params)
|
|
||||||
all_flops_evals.append(flops)
|
|
||||||
|
|
||||||
# zerocost initial scores
|
|
||||||
#-------------------------------
|
|
||||||
for measure in ZEROCOST_MEASURES:
|
|
||||||
score = logs[key]['zerocost_evaluate']['eval_arch']['eval_train'][measure]
|
|
||||||
all_zerocost_init_evals[measure].append(score)
|
|
||||||
|
|
||||||
# record the arch id
|
|
||||||
# --------------------
|
|
||||||
if 'natsbench' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['natsbench']['arch_index'])
|
|
||||||
elif 'nasbench101' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['nasbench101']['arch_index'])
|
|
||||||
elif 'dartsspace' in list(confs[key]['nas']['eval'].keys()):
|
|
||||||
all_arch_ids.append(confs[key]['nas']['eval']['dartsspace']['arch_index'])
|
|
||||||
|
|
||||||
except KeyError as err:
|
|
||||||
print(f'KeyError {err} not in {key}')
|
|
||||||
sys.exit()
|
|
||||||
|
|
||||||
# Sanity check
|
|
||||||
for measure in ZEROCOST_MEASURES:
|
|
||||||
assert len(all_reg_evals) == len(all_zerocost_init_evals[measure])
|
|
||||||
assert len(all_reg_evals) == len(all_arch_ids)
|
|
||||||
|
|
||||||
# if params flops is present compute spearman wrt params and flops
|
|
||||||
# also compute scatter plots for params vs synflow
|
|
||||||
if params_flops_data:
|
|
||||||
assert len(all_reg_evals) == len(all_params_evals)
|
|
||||||
assert len(all_reg_evals) == len(all_flops_evals)
|
|
||||||
|
|
||||||
spe_params, _ = spearmanr(all_reg_evals, all_params_evals)
|
|
||||||
spe_flops, _ = spearmanr(all_reg_evals, all_flops_evals)
|
|
||||||
|
|
||||||
# Store some key numbers in results.txt
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'w') as f:
|
|
||||||
f.write(f'Total valid archs processed: {len(all_reg_evals)} \n')
|
|
||||||
f.write(f'Spearman wrt params: {spe_params} \n')
|
|
||||||
f.write(f'Spearman wrt flops: {spe_flops} \n')
|
|
||||||
|
|
||||||
print(f'Total valid archs processed: {len(all_reg_evals)}')
|
|
||||||
print(f'Spearman wrt params: {spe_params} \n')
|
|
||||||
print(f'Spearman wrt flops: {spe_flops} \n')
|
|
||||||
|
|
||||||
# scatter params vs. synflow
|
|
||||||
fig = go.Figure()
|
|
||||||
fig.add_trace(go.Scatter(x=all_params_evals, y=all_zerocost_init_evals['synflow'], mode='markers'))
|
|
||||||
fig.update_layout(xaxis_title="Parameters",
|
|
||||||
yaxis_title="Synflow")
|
|
||||||
fig.update_layout(font=dict(size=36)) # font size
|
|
||||||
fig.update_traces(marker=dict(size=20)) # marker size
|
|
||||||
savename_html = os.path.join(out_dir, f'params_vs_synflow.html')
|
|
||||||
savename_png = os.path.join(out_dir, f'params_vs_synflow.png')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
fig.write_image(savename_png, width=1500, height=1500, scale=1)
|
|
||||||
|
|
||||||
# create heatmap of all pairs of proxies along with params, flops, gt
|
|
||||||
ZEROCOST_MEASURES_PF = ['grad_norm', 'snip', 'grasp', 'fisher', 'jacob_cov', 'synflow', 'params', 'flops', 'gt']
|
|
||||||
all_zerocost_init_evals['params'] = all_params_evals
|
|
||||||
all_zerocost_init_evals['flops'] = all_flops_evals
|
|
||||||
all_zerocost_init_evals['gt'] = all_reg_evals
|
|
||||||
hm = np.zeros((len(ZEROCOST_MEASURES_PF), len(ZEROCOST_MEASURES_PF)))
|
|
||||||
for i, m1 in enumerate(ZEROCOST_MEASURES_PF):
|
|
||||||
for j, m2 in enumerate(ZEROCOST_MEASURES_PF):
|
|
||||||
# sometimes jacob_cov has a nan here and there. ignore those.
|
|
||||||
m1_scores = all_zerocost_init_evals[m1]
|
|
||||||
m2_scores = all_zerocost_init_evals[m2]
|
|
||||||
valid_scores = [x for x in zip(m1_scores, m2_scores) if not ma.isnan(x[0]) and not ma.isnan(x[1])]
|
|
||||||
m1_valid = [x[0] for x in valid_scores]
|
|
||||||
m2_valid = [x[1] for x in valid_scores]
|
|
||||||
spe, _ = spearmanr(m1_valid, m2_valid)
|
|
||||||
hm[i][j] = spe
|
|
||||||
|
|
||||||
fig = px.imshow(hm, text_auto="0.1f", x=ZEROCOST_MEASURES_PF, y=ZEROCOST_MEASURES_PF)
|
|
||||||
fig.update_layout(font=dict(size=36)) # font size
|
|
||||||
savename_html = os.path.join(out_dir, f'all_pairs_zc_spe.html')
|
|
||||||
savename_png = os.path.join(out_dir, f'all_pairs_zc_spe.png')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
fig.write_image(savename_png, width=1500, height=1500, scale=1)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
results_savename = os.path.join(out_dir, 'results.txt')
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write(f'Total valid archs processed: {len(all_reg_evals)} \n')
|
|
||||||
f.write(f'Spearman wrt params: {spe_params}')
|
|
||||||
f.write(f'Spearman wrt flops: {spe_flops}')
|
|
||||||
|
|
||||||
print(f'Total valid archs processed: {len(all_reg_evals)}')
|
|
||||||
print(f'Spearman wrt params: {spe_params}')
|
|
||||||
print(f'Spearman wrt flops: {spe_flops}')
|
|
||||||
|
|
||||||
top_percent_range = range(2, 101, 2)
|
|
||||||
# Rank correlations at top n percent of architectures
|
|
||||||
# -------------------------------------------------------
|
|
||||||
spe_top_percents_init = defaultdict(list)
|
|
||||||
|
|
||||||
for measure in ZEROCOST_MEASURES:
|
|
||||||
reg_init = [(all_reg_evals[i], all_zerocost_init_evals[measure][i]) for i in range(len(all_reg_evals))]
|
|
||||||
reg_init.sort(key=lambda x: x[0], reverse=True)
|
|
||||||
|
|
||||||
top_percents = []
|
|
||||||
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(reg_init) * top_percent * 0.01))
|
|
||||||
top_percent_evals = reg_init[:num_to_keep]
|
|
||||||
# sometimes jacob_cov has a nan here and there. ignore those.
|
|
||||||
top_percent_reg = [x[0] for x in top_percent_evals
|
|
||||||
if not ma.isnan(x[0]) and not ma.isnan(x[1])]
|
|
||||||
top_percent_init = [x[1] for x in top_percent_evals
|
|
||||||
if not ma.isnan(x[0]) and not ma.isnan(x[1])]
|
|
||||||
|
|
||||||
assert len(top_percent_reg) == len(top_percent_init)
|
|
||||||
|
|
||||||
spe_init, _ = spearmanr(top_percent_reg, top_percent_init)
|
|
||||||
# for the entire bin of archs scatter plot
|
|
||||||
# groundtruth accuracy (x-axis) vs. measure and save
|
|
||||||
if top_percent == 100:
|
|
||||||
fig = go.Figure()
|
|
||||||
fig.add_trace(go.Scatter(x=top_percent_reg, y=top_percent_init, mode='markers'))
|
|
||||||
fig.update_layout(xaxis_title="Test Accuracy",
|
|
||||||
yaxis_title=f"{measure}")
|
|
||||||
fig.update_layout(font=dict(size=36)) # font size
|
|
||||||
fig.update_traces(marker=dict(size=20)) # marker size
|
|
||||||
savename_html = os.path.join(out_dir, f'test_accuracy_vs_{measure}.html')
|
|
||||||
savename_png = os.path.join(out_dir, f'test_accuracy_vs_{measure}.png')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
fig.write_image(savename_png, width=1500, height=1500, scale=1)
|
|
||||||
#fig.show()
|
|
||||||
|
|
||||||
|
|
||||||
spe_top_percents_init[measure].append(spe_init)
|
|
||||||
|
|
||||||
spe_top_percents_init['top_percents'] = top_percents
|
|
||||||
|
|
||||||
|
|
||||||
# overlap in top x% of architectures between method and groundtruth
|
|
||||||
# ------------------------------------------------------------------
|
|
||||||
cr_init_top_percents = defaultdict(list)
|
|
||||||
|
|
||||||
arch_id_reg_evals = [(all_arch_ids[i], all_reg_evals[i]) for i in range(len(all_reg_evals))]
|
|
||||||
arch_id_reg_evals.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
for measure in ZEROCOST_MEASURES:
|
|
||||||
arch_id_init = [(all_arch_ids[i], all_zerocost_init_evals[measure][i]) for i in range(len(all_reg_evals))]
|
|
||||||
arch_id_init.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
|
|
||||||
assert len(arch_id_reg_evals) == len(arch_id_init)
|
|
||||||
|
|
||||||
top_percents = []
|
|
||||||
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(arch_id_reg_evals) * top_percent * 0.01))
|
|
||||||
top_percent_arch_id_reg_evals = arch_id_reg_evals[:num_to_keep]
|
|
||||||
top_percent_arch_id_init_evals = arch_id_init[:num_to_keep]
|
|
||||||
|
|
||||||
# take the set of arch_ids in each method and find overlap with top archs
|
|
||||||
set_reg = set([x[0] for x in top_percent_arch_id_reg_evals])
|
|
||||||
set_init = set([x[0] for x in top_percent_arch_id_init_evals])
|
|
||||||
init_num_common = len(set_reg.intersection(set_init))
|
|
||||||
cr_init_top_percents[measure].append(init_num_common/num_to_keep)
|
|
||||||
|
|
||||||
cr_init_top_percents['top_percents'] = top_percents
|
|
||||||
|
|
||||||
# save data
|
|
||||||
save_data(spe_top_percents_init, cr_init_top_percents, out_dir)
|
|
||||||
|
|
||||||
# print out summary in pretty format to text file
|
|
||||||
with open(results_savename, 'a') as f:
|
|
||||||
f.write('spearman correlations: \n')
|
|
||||||
for measure in ZEROCOST_MEASURES:
|
|
||||||
f.write(f'{measure}: {spe_top_percents_init[measure][-1]} \n')
|
|
||||||
print(f'{measure}: {spe_top_percents_init[measure][-1]} \n')
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def save_data(spe_top_percents:List[float], cr_top_percents:List[float], savefolder:str):
|
|
||||||
# save raw data for other aggregate plots over experiments
|
|
||||||
# --------------------------------------------------------
|
|
||||||
raw_data_dict = {}
|
|
||||||
raw_data_dict['top_percents'] = spe_top_percents['top_percents']
|
|
||||||
|
|
||||||
for measure in ZEROCOST_MEASURES:
|
|
||||||
raw_data_dict[measure+'_spe'] = spe_top_percents[measure]
|
|
||||||
raw_data_dict[measure+'_ratio_common'] = cr_top_percents[measure]
|
|
||||||
|
|
||||||
os.makedirs(savefolder, exist_ok=True)
|
|
||||||
savename = os.path.join(savefolder, 'raw_data.yaml')
|
|
||||||
with open(savename, 'w') as f:
|
|
||||||
yaml.dump(raw_data_dict, f)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,219 +0,0 @@
|
||||||
from collections import defaultdict
|
|
||||||
from enum import Enum
|
|
||||||
import json
|
|
||||||
import argparse
|
|
||||||
import os
|
|
||||||
from typing import Dict, List
|
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
from scipy.stats import kendalltau, spearmanr, sem
|
|
||||||
import statistics
|
|
||||||
|
|
||||||
SCORERS = {'train_accuracy', 'train_loss', 'train_cross_entropy', 'val_accuracy'}
|
|
||||||
|
|
||||||
|
|
||||||
def plot_spearman_top_percents(results:Dict[str, list],
|
|
||||||
plotly_fig_handle,
|
|
||||||
legend_text:str,
|
|
||||||
marker_color:str):
|
|
||||||
|
|
||||||
for idx, tp in enumerate(results['top_percents']):
|
|
||||||
avg_time = results['avg_times'][idx]
|
|
||||||
stderr = results['stderr_times'][idx]
|
|
||||||
error_x = dict(type='data', array=[stderr], visible=True, thickness=1, width=0)
|
|
||||||
spe = results['spes'][idx]
|
|
||||||
show_legend = False if idx > 0 else True
|
|
||||||
plotly_fig_handle.add_trace(go.Scatter(x=[avg_time],
|
|
||||||
error_x=error_x,
|
|
||||||
y=[spe],
|
|
||||||
mode='markers',
|
|
||||||
name=legend_text,
|
|
||||||
showlegend=show_legend,
|
|
||||||
marker_color=marker_color),
|
|
||||||
row=idx+1, col=1)
|
|
||||||
|
|
||||||
|
|
||||||
def find_train_thresh_epochs(train_acc:List[float], train_thresh:float)->int:
|
|
||||||
for i, t in enumerate(train_acc):
|
|
||||||
if t >= train_thresh:
|
|
||||||
return i + 1
|
|
||||||
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
=======
|
|
||||||
def top_buckets_spearmans(all_reg_evals:List[float],
|
|
||||||
all_proxy_evals:List[float],
|
|
||||||
all_proxy_times:List[float]):
|
|
||||||
|
|
||||||
assert len(all_reg_evals) == len(all_proxy_evals)
|
|
||||||
assert len(all_reg_evals) == len(all_proxy_times)
|
|
||||||
reg_proxy = [(x, y, z) for x, y, z in zip(all_reg_evals, all_proxy_evals, all_proxy_times)]
|
|
||||||
|
|
||||||
# sort in descending order of accuracy of regular evaluation
|
|
||||||
reg_proxy.sort(key= lambda x: x[0], reverse=True)
|
|
||||||
|
|
||||||
top_percent_times_avg = []
|
|
||||||
top_percent_times_std = []
|
|
||||||
top_percent_times_stderr = []
|
|
||||||
|
|
||||||
spe_top_percents = []
|
|
||||||
|
|
||||||
top_percents = []
|
|
||||||
top_percent_range = range(10, 101, 10)
|
|
||||||
for top_percent in top_percent_range:
|
|
||||||
top_percents.append(top_percent)
|
|
||||||
num_to_keep = int(ma.floor(len(reg_proxy) * top_percent * 0.01))
|
|
||||||
top_percent_reg_proxy_times = reg_proxy[:num_to_keep]
|
|
||||||
top_percent_reg = [x[0] for x in top_percent_reg_proxy_times]
|
|
||||||
top_percent_proxy = [x[1] for x in top_percent_reg_proxy_times]
|
|
||||||
top_percent_proxy_times = [x[2] for x in top_percent_reg_proxy_times]
|
|
||||||
|
|
||||||
top_percent_times_avg.append(np.mean(np.array(top_percent_proxy_times)))
|
|
||||||
top_percent_times_std.append(np.std(np.array(top_percent_proxy_times)))
|
|
||||||
top_percent_times_stderr.append(sem(np.array(top_percent_proxy_times)))
|
|
||||||
|
|
||||||
spe_proxy, _ = spearmanr(top_percent_reg, top_percent_proxy)
|
|
||||||
spe_top_percents.append(spe_proxy)
|
|
||||||
|
|
||||||
results = {
|
|
||||||
'top_percents': top_percents,
|
|
||||||
'spes': spe_top_percents,
|
|
||||||
'avg_times': top_percent_times_avg,
|
|
||||||
'std_times': top_percent_times_std,
|
|
||||||
'stderr_times': top_percent_times_stderr
|
|
||||||
}
|
|
||||||
|
|
||||||
return results
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
>>>>>>> 17e92924 (Simulation code on DARTS logs nominally working.)
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Nasbench301 time to threshold vs. test accuracy')
|
|
||||||
parser.add_argument('--nb301-logs-dir', '-d', type=str, help='folder with nasbench301 architecture training logs')
|
|
||||||
parser.add_argument('--out-dir', '-o', type=str, default=r'~/logdir/reports', help='folder to output reports')
|
|
||||||
parser.add_argument('--scorer', '-s', type=str, default='train_accuracy',
|
|
||||||
help='one of train_accuracy, train_loss, train_cross_entropy, val_accuracy')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
if args.scorer not in SCORERS:
|
|
||||||
raise argparse.ArgumentError
|
|
||||||
scorer_key = "Train/" + args.scorer
|
|
||||||
|
|
||||||
# TODO: make these into cmd line arguments
|
|
||||||
train_thresh = 60.0
|
|
||||||
post_thresh_epochs = 10
|
|
||||||
|
|
||||||
all_test_acc = []
|
|
||||||
all_fear_end_acc = []
|
|
||||||
all_fear_time = []
|
|
||||||
|
|
||||||
all_reg_train_acc = defaultdict(list)
|
|
||||||
all_reg_train_time_per_epoch = defaultdict(list)
|
|
||||||
|
|
||||||
# collect all the json file names in the log dir recursively
|
|
||||||
for root, dir, files in os.walk(args.nb301_logs_dir):
|
|
||||||
for name in tqdm(files):
|
|
||||||
log_name = os.path.join(root, name)
|
|
||||||
with open(log_name, 'r') as f:
|
|
||||||
log_data = json.load(f)
|
|
||||||
num_epochs = len(log_data['learning_curves'][scorer_key])
|
|
||||||
test_acc = log_data['test_accuracy']
|
|
||||||
per_epoch_time = log_data['runtime'] / num_epochs
|
|
||||||
num_epochs_to_thresh = find_train_thresh_epochs(log_data['learning_curves'][scorer_key],
|
|
||||||
train_thresh)
|
|
||||||
# many weak architectures will never reach threshold
|
|
||||||
if not num_epochs_to_thresh:
|
|
||||||
continue
|
|
||||||
simulated_stage2_epoch = num_epochs_to_thresh + post_thresh_epochs
|
|
||||||
fear_time = per_epoch_time * simulated_stage2_epoch
|
|
||||||
try:
|
|
||||||
train_acc_stage2 = log_data['learning_curves'][scorer_key][simulated_stage2_epoch]
|
|
||||||
except:
|
|
||||||
continue
|
|
||||||
|
|
||||||
all_test_acc.append(test_acc)
|
|
||||||
all_fear_end_acc.append(train_acc_stage2)
|
|
||||||
all_fear_time.append(fear_time)
|
|
||||||
|
|
||||||
# get training acc at all epochs for regular
|
|
||||||
# evaluation baseline
|
|
||||||
for epoch_num, train_acc in enumerate(log_data['learning_curves'][scorer_key]):
|
|
||||||
all_reg_train_acc[epoch_num].append(train_acc)
|
|
||||||
all_reg_train_time_per_epoch[epoch_num].append((epoch_num + 1) * per_epoch_time)
|
|
||||||
|
|
||||||
|
|
||||||
spes_train_acc_vs_epoch = {}
|
|
||||||
avg_time_train_acc_vs_epoch = {}
|
|
||||||
for epoch_num, train_accs_epoch in all_reg_train_acc.items():
|
|
||||||
if len(train_accs_epoch) != len(all_test_acc):
|
|
||||||
continue
|
|
||||||
this_spe, _ = spearmanr(all_test_acc, train_accs_epoch)
|
|
||||||
spes_train_acc_vs_epoch[epoch_num] = this_spe
|
|
||||||
avg_time_train_acc_vs_epoch[epoch_num] = statistics.mean(all_reg_train_time_per_epoch[epoch_num])
|
|
||||||
|
|
||||||
for epoch_num, spe in spes_train_acc_vs_epoch.items():
|
|
||||||
avg_time = avg_time_train_acc_vs_epoch[epoch_num]
|
|
||||||
<<<<<<< HEAD
|
|
||||||
print(f'Epoch {epoch_num}, spearman {spe}, avg. time: {avg_time} seconds')
|
|
||||||
=======
|
|
||||||
# print(f'Epoch {epoch_num}, spearman {spe}, avg. time: {avg_time} seconds')
|
|
||||||
|
|
||||||
# FEAR rank correlations at top n percent of architectures
|
|
||||||
# -------------------------------------------------------------
|
|
||||||
fear_results = top_buckets_spearmans(all_reg_evals=all_test_acc,
|
|
||||||
all_proxy_evals=all_fear_end_acc,
|
|
||||||
all_proxy_times=all_fear_time)
|
|
||||||
|
|
||||||
# picking epoch 10 to plot for regular evaluation
|
|
||||||
reg_results = {}
|
|
||||||
for epoch_num in all_reg_train_acc.keys():
|
|
||||||
all_reg = all_reg_train_acc[epoch_num]
|
|
||||||
all_reg_times = all_reg_train_time_per_epoch[epoch_num]
|
|
||||||
if len(all_test_acc) != len(all_reg):
|
|
||||||
continue
|
|
||||||
reg_results[epoch_num] = top_buckets_spearmans(all_reg_evals=all_test_acc,
|
|
||||||
all_proxy_evals=all_reg,
|
|
||||||
all_proxy_times=all_reg_times)
|
|
||||||
|
|
||||||
# plot
|
|
||||||
num_plots = len(fear_results['top_percents'])
|
|
||||||
num_plots_per_row = num_plots
|
|
||||||
num_plots_per_col = 1
|
|
||||||
|
|
||||||
subplot_titles = [f'Top {x} %' for x in fear_results['top_percents']]
|
|
||||||
|
|
||||||
fig = make_subplots(rows=num_plots_per_row,
|
|
||||||
cols=num_plots_per_col,
|
|
||||||
subplot_titles=subplot_titles,
|
|
||||||
shared_yaxes=False)
|
|
||||||
|
|
||||||
plot_spearman_top_percents(fear_results, fig, 'FEAR', 'red')
|
|
||||||
|
|
||||||
for epoch_num, epoch_num_results in reg_results.items():
|
|
||||||
plot_spearman_top_percents(epoch_num_results, fig, f'Regular epochs {epoch_num}', 'blue')
|
|
||||||
|
|
||||||
fig.update_layout(title_text="Duration vs. Spearman Rank Correlation vs. Top %")
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
|
|
||||||
# Regular evaluation rank correlations at top n percent of architectures
|
|
||||||
# -----------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
print('dummy')
|
|
||||||
>>>>>>> 17e92924 (Simulation code on DARTS logs nominally working.)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
|
@ -1,417 +0,0 @@
|
||||||
exp_folder: 'F:\\archai_experiment_reports'
|
|
||||||
|
|
||||||
|
|
||||||
darts_cifar10:
|
|
||||||
freezetrain:
|
|
||||||
ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_ftonly: 'ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_ftonly'
|
|
||||||
ft_dt_fb96_ftlr0.025_fte15_ct96_ftt0.6_ftonly: 'ft_dt_fb96_ftlr0.025_fte15_ct96_ftt0.6_ftonly'
|
|
||||||
ft_dt_fb96_ftlr0.025_fte20_ct96_ftt0.6_ftonly: 'ft_dt_fb96_ftlr0.025_fte20_ct96_ftt0.6_ftonly'
|
|
||||||
ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_c5_ftonly: 'ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_c5_ftonly'
|
|
||||||
ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_c4_ftonly: 'ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_c4_ftonly'
|
|
||||||
ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_c3_ftonly: 'ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_c3_ftonly'
|
|
||||||
ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_nofreeze_ftonly: 'ft_dt_fb96_ftlr0.025_fte10_ct96_ftt0.6_nofreeze_ftonly'
|
|
||||||
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
dt_reg_b96_e5: 'dt_reg_b96_e5'
|
|
||||||
dt_reg_b96_e10: 'dt_reg_b96_e10'
|
|
||||||
dt_reg_b96_e15: 'dt_reg_b96_e15'
|
|
||||||
dt_reg_b96_e20: 'dt_reg_b96_e20'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_sss_cifar10:
|
|
||||||
freezetrain:
|
|
||||||
nb_sss_c4_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_c4_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
nb_sss_c3_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_c3_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
nb_sss_c2_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_c2_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
nb_sss_c1_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_c1_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
|
|
||||||
nb_sss_r0.1_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_r0.1_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
nb_sss_r0.2_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_r0.2_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
nb_sss_r1.0_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'nb_sss_r1.0_ft_fb256_ftlr0.1_fte10_ct256_ftt0.6'
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
nb_sss_reg_b256_e5: 'nb_sss_reg_b256_e5'
|
|
||||||
nb_sss_reg_b256_e10: 'nb_sss_reg_b256_e10'
|
|
||||||
nb_sss_reg_b256_e15: 'nb_sss_reg_b256_e15'
|
|
||||||
nb_sss_reg_b256_e20: 'nb_sss_reg_b256_e20'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_cifar10:
|
|
||||||
|
|
||||||
freezetrain:
|
|
||||||
# ft_fb2048_ftlr1.5_fte5_ct256_ftt0.6: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb2048_ftlr1.5_fte10_ct256_ftt0.6: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb2048_ftlr1.5_fte5_ct256_ftt0.5: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.5'
|
|
||||||
# ft_fb2048_ftlr1.5_fte10_ct256_ftt0.5: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.5'
|
|
||||||
# ft_fb2048_ftlr1.5_fte5_ct256_ftt0.4: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.4'
|
|
||||||
# ft_fb2048_ftlr1.5_fte10_ct256_ftt0.4: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.4'
|
|
||||||
# ft_fb2048_ftlr1.5_fte5_ct256_ftt0.3: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_fb2048_ftlr1.5_fte10_ct256_ftt0.3: 'fear stage 2: batch 2048, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_fb1024_ftlr1.5_fte5_ct256_ftt0.6: 'fear stage 2: batch 1024, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr1.5_fte10_ct256_ftt0.6: 'fear stage 2: batch 1024, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr1.5_fte5_ct256_ftt0.6_scu: 'fear stage 2: batch 1024, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.6 scu'
|
|
||||||
# ft_fb1024_ftlr1.5_fte10_ct256_ftt0.6_scu: 'fear stage 2: batch 1024, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.6 scu'
|
|
||||||
# ft_fb512_ftlr1.5_fte5_ct256_ftt0.6: 'fear stage 2: batch 512, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb512_ftlr1.5_fte10_ct256_ftt0.6: 'fear stage 2: batch 512, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb256_ftlr1.5_fte5_ct256_ftt0.6: 'fear stage 2: batch 256, lr 1.5, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb256_ftlr1.5_fte10_ct256_ftt0.6: 'fear stage 2: batch 256, lr 1.5, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
#ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6_scu: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb512_ftlr0.1_fte5_ct256_ftt0.6: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb512_ftlr0.1_fte10_ct256_ftt0.6: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb512_ftlr0.1_fte5_ct256_ftt0.6_scu: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6 scu'
|
|
||||||
# ft_fb512_ftlr0.1_fte10_ct256_ftt0.6_scu: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6 scu'
|
|
||||||
# ft_fb256_ftlr0.1_fte5_ct256_ftt0.6: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb256_ftlr0.1_fte10_ct256_ftt0.6: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb256_ftlr0.1_fte15_ct256_ftt0.6: 'fear stage 2: batch 256, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c9: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6, frozen till: cell 9'
|
|
||||||
# ft_fb512_ftlr0.1_fte5_ct256_ftt0.6_c9: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6, frozen till: cell 9'
|
|
||||||
# ft_fb512_ftlr0.1_fte10_ct256_ftt0.6_c9: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6, frozen till: cell 9'
|
|
||||||
# ft_fb256_ftlr0.1_fte5_ct256_ftt0.6_c9: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6, frozen till: cell 9'
|
|
||||||
# ft_fb256_ftlr0.1_fte10_ct256_ftt0.6_c9: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6, frozen till: cell 9'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_nofreeze: 'fear stage 2: nofreeze, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_classifier: 'fear stage 2: last layer, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c16: 'fear stage 2: c16 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c15: 'fear stage 2: c15 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_scu: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c14: 'fear stage 2: c14 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c12: 'fear stage 2: c12 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c11: 'fear stage 2: c11 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c10: 'fear stage 2: c10 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.6_c9: 'fear stage 2: c9 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.6'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.55_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.55'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.50_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.50'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.45_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.45'
|
|
||||||
# ft_fb1024_ftlr0.1_fte5_ct256_ftt0.40_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.40'
|
|
||||||
ft_fb1024_ftlr0.1_fte10_ct256_ftt0.6_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.6'
|
|
||||||
ft_fb1024_ftlr0.1_fte15_ct256_ftt0.6_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.6'
|
|
||||||
ft_fb1024_ftlr0.1_fte20_ct256_ftt0.6_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 20, stage 1: batch 256, thresh 0.6'
|
|
||||||
ft_fb1024_ftlr0.1_fte25_ct256_ftt0.6_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 25, stage 1: batch 256, thresh 0.6'
|
|
||||||
ft_fb1024_ftlr0.1_fte30_ct256_ftt0.6_c13: 'fear stage 2: c13 onwards, batch 1024, lr 0.1, <br> epochs 30, stage 1: batch 256, thresh 0.6'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
zero_cost:
|
|
||||||
zc_cond_b256_ftt0.6_at_init: 'zero cost score at initialization'
|
|
||||||
#zc_cond_b256_ftt0.6_at_cond: 'zero cost score at train accuracy 0.6'
|
|
||||||
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
# nb_reg_b1024_e01: 'shortreg: batch 1024 epochs 01'
|
|
||||||
# nb_reg_b1024_e02: 'shortreg: batch 1024 epochs 02'
|
|
||||||
# nb_reg_b1024_e04: 'shortreg: batch 1024 epochs 04'
|
|
||||||
# nb_reg_b1024_e06: 'shortreg: batch 1024 epochs 06'
|
|
||||||
# nb_reg_b1024_e08: 'shortreg: batch 1024 epochs 08'
|
|
||||||
# nb_reg_b1024_e10: 'shortreg: batch 1024 epochs 10'
|
|
||||||
# nb_reg_b1024_e20: 'shortreg: batch 1024 epochs 20'
|
|
||||||
# nb_reg_b1024_e30: 'shortreg: batch 1024 epochs 30'
|
|
||||||
nb_reg_b1024_e01_scu: 'shortreg: batch 1024 epochs 01'
|
|
||||||
nb_reg_b1024_e02_scu: 'shortreg: batch 1024 epochs 02'
|
|
||||||
nb_reg_b1024_e04_scu: 'shortreg: batch 1024 epochs 04'
|
|
||||||
nb_reg_b1024_e06_scu: 'shortreg: batch 1024 epochs 06'
|
|
||||||
nb_reg_b1024_e08_scu: 'shortreg: batch 1024 epochs 08'
|
|
||||||
nb_reg_b1024_e10_scu: 'shortreg: batch 1024 epochs 10'
|
|
||||||
nb_reg_b1024_e20_scu: 'shortreg: batch 1024 epochs 20'
|
|
||||||
nb_reg_b1024_e30_scu: 'shortreg: batch 1024 epochs 30'
|
|
||||||
nb_reg_b512_e01: 'shortreg: batch 512 epochs 01'
|
|
||||||
nb_reg_b512_e02: 'shortreg: batch 512 epochs 02'
|
|
||||||
nb_reg_b512_e04: 'shortreg: batch 512 epochs 04'
|
|
||||||
nb_reg_b512_e06: 'shortreg: batch 512 epochs 06'
|
|
||||||
nb_reg_b512_e08: 'shortreg: batch 512 epochs 08'
|
|
||||||
nb_reg_b512_e10: 'shortreg: batch 512 epochs 10'
|
|
||||||
# nb_reg_b256_e01: 'shortreg: batch 256 epochs 01'
|
|
||||||
# nb_reg_b256_e02: 'shortreg: batch 256 epochs 02'
|
|
||||||
# nb_reg_b256_e04: 'shortreg: batch 256 epochs 04'
|
|
||||||
# nb_reg_b256_e06: 'shortreg: batch 256 epochs 06'
|
|
||||||
# nb_reg_b256_e08: 'shortreg: batch 256 epochs 08'
|
|
||||||
# nb_reg_b256_e10: 'shortreg: batch 256 epochs 10'
|
|
||||||
nb_reg_b256_e01_scu: 'shortreg: batch 256 epochs 01'
|
|
||||||
nb_reg_b256_e02_scu: 'shortreg: batch 256 epochs 02'
|
|
||||||
nb_reg_b256_e04_scu: 'shortreg: batch 256 epochs 04'
|
|
||||||
nb_reg_b256_e06_scu: 'shortreg: batch 256 epochs 06'
|
|
||||||
nb_reg_b256_e08_scu: 'shortreg: batch 256 epochs 08'
|
|
||||||
nb_reg_b256_e10_scu: 'shortreg: batch 256 epochs 10'
|
|
||||||
nb_reg_b256_e20_scu: 'shortreg: batch 256 epochs 20'
|
|
||||||
nb_reg_b256_e30_scu: 'shortreg: batch 256 epochs 30'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_cifar100:
|
|
||||||
freezetrain:
|
|
||||||
# ft_c100_fb1024_ftlr0.1_fte5_ct256_ftt0.3: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_c100_fb1024_ftlr0.1_fte10_ct256_ftt0.3: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_c100_fb1024_ftlr0.1_fte5_ct256_ftt0.3_scu: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.3 scu'
|
|
||||||
# ft_c100_fb1024_ftlr0.1_fte10_ct256_ftt0.3_scu: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.3 scu'
|
|
||||||
# ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_c100_fb512_ftlr0.1_fte10_ct256_ftt0.3: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.3'
|
|
||||||
ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_scu: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_c100_fb512_ftlr0.1_fte10_ct256_ftt0.3_scu: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_c100_fb256_ftlr0.1_fte5_ct256_ftt0.3: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.3'
|
|
||||||
# ft_c100_fb256_ftlr0.1_fte10_ct256_ftt0.3: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.3'
|
|
||||||
ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_nofreeze: 'ft_c100_fb512_ftlr0.1_fte5_ct256_ftt0.3_nofreeze'
|
|
||||||
|
|
||||||
zero_cost:
|
|
||||||
zc_cifar100: 'zero cost score at initialization'
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
nb_c100_reg_b256_e10: 'shortreg: batch 256 epochs 10'
|
|
||||||
nb_c100_reg_b256_e20: 'shortreg: batch 256 epochs 20'
|
|
||||||
nb_c100_reg_b256_e30: 'shortreg: batch 256 epochs 30'
|
|
||||||
nb_c100_reg_b512_e10: 'shortreg: batch 512 epochs 10'
|
|
||||||
nb_c100_reg_b512_e20: 'shortreg: batch 512 epochs 20'
|
|
||||||
nb_c100_reg_b512_e30: 'shortreg: batch 512 epochs 30'
|
|
||||||
# nb_c100_reg_b1024_e10: 'shortreg: batch 1024 epochs 10'
|
|
||||||
# nb_c100_reg_b1024_e20: 'shortreg: batch 1024 epochs 20'
|
|
||||||
# nb_c100_reg_b1024_e30: 'shortreg: batch 1024 epochs 30'
|
|
||||||
nb_c100_reg_b1024_e10_scu: 'shortreg: batch 1024 epochs 10'
|
|
||||||
nb_c100_reg_b1024_e20_scu: 'shortreg: batch 1024 epochs 20'
|
|
||||||
nb_c100_reg_b1024_e30_scu: 'shortreg: batch 1024 epochs 30'
|
|
||||||
nb_c100_reg_b2048_e10: 'shortreg: batch 2048 epochs 10'
|
|
||||||
nb_c100_reg_b2048_e20: 'shortreg: batch 2048 epochs 20'
|
|
||||||
nb_c100_reg_b2048_e30: 'shortreg: batch 2048 epochs 30'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
natsbench_imagenet16-120:
|
|
||||||
freezetrain:
|
|
||||||
#ft_i6_fb2048_ftlr0.1_fte5_ct256_ftt0.1: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.1'
|
|
||||||
#ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.1: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb1024_ftlr0.1_fte5_ct256_ftt0.1: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb1024_ftlr0.1_fte10_ct256_ftt0.1: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte5_ct256_ftt0.1: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.1: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb256_ftlr0.1_fte5_ct256_ftt0.1: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb256_ftlr0.1_fte10_ct256_ftt0.1: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.1'
|
|
||||||
# ft_i6_fb2048_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb1024_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb1024_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte5_ct256_ftt0.2_scu: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c14: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2 c14'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c15: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2 c15'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c16: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2 c16'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_lastact: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2 lastact'
|
|
||||||
# ft_i6_fb256_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb256_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb256_ftlr0.1_fte5_ct256_ftt0.2_scu: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb256_ftlr0.1_fte10_ct256_ftt0.2_scu: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb256_ftlr0.5_fte5_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb256_ftlr0.5_fte10_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb256_ftlr0.5_fte5_ct256_ftt0.2_scu: 'fear stage 2: batch 256, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb256_ftlr0.5_fte10_ct256_ftt0.2_scu: 'fear stage 2: batch 256, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb512_ftlr0.5_fte5_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.5_fte10_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.5_fte5_ct256_ftt0.2_scu: 'fear stage 2: batch 512, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb512_ftlr0.5_fte10_ct256_ftt0.2_scu: 'fear stage 2: batch 512, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.2 scu'
|
|
||||||
# ft_i6_fb1024_ftlr0.5_fte5_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb1024_ftlr0.5_fte10_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb2048_ftlr0.5_fte5_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb2048_ftlr0.5_fte10_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb256_ftlr1.0_fte5_ct256_ftt0.2: 'fear stage 2: batch 256, lr 1.0, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb256_ftlr1.0_fte10_ct256_ftt0.2: 'fear stage 2: batch 256, lr 1.0, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr1.0_fte5_ct256_ftt0.2: 'fear stage 2: batch 512, lr 1.0, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr1.0_fte10_ct256_ftt0.2: 'fear stage 2: batch 512, lr 1.0, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb1024_ftlr1.0_fte5_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 1.0, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb1024_ftlr1.0_fte10_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 1.0, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb2048_ftlr1.0_fte5_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 1.0, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb2048_ftlr1.0_fte10_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 1.0, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb1024_ftlr0.5_fte5_ct256_ftt0.1_val: 'fear stage 2: batch 1024, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb1024_ftlr0.5_fte10_ct256_ftt0.1_val: 'fear stage 2: batch 1024, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb2048_ftlr0.5_fte5_ct256_ftt0.1_val: 'fear stage 2: batch 2048, lr 0.5, <br> epochs 5, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb2048_ftlr0.5_fte10_ct256_ftt0.1_val: 'fear stage 2: batch 2048, lr 0.5, <br> epochs 10, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb1024_ftlr1.0_fte5_ct256_ftt0.1_val: 'fear stage 2: batch 1024, lr 1.0, <br> epochs 5, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb1024_ftlr1.0_fte10_ct256_ftt0.1_val: 'fear stage 2: batch 1024, lr 1.0, <br> epochs 10, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb2048_ftlr1.0_fte5_ct256_ftt0.1_val: 'fear stage 2: batch 2048, lr 1.0, <br> epochs 5, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb2048_ftlr1.0_fte10_ct256_ftt0.1_val: 'fear stage 2: batch 2048, lr 1.0, <br> epochs 10, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb2048_ftlr0.1_fte5_ct256_ftt0.1_scu: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.1 val scu'
|
|
||||||
# ft_i6_fb2048_ftlr0.1_fte10_ct256_ftt0.1_scu: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.1 val scu'
|
|
||||||
# ft_i6_fb1024_ftlr0.1_fte5_ct256_ftt0.1_scu: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.1 val scu'
|
|
||||||
# ft_i6_fb1024_ftlr0.1_fte10_ct256_ftt0.1_scu: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.1 val'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_nofreeze: 'fear stage 2: nofreeze, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_classifier: 'fear stage 2: last layer, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c16: 'fear stage 2: c16 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c15: 'fear stage 2: c15 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c14: 'fear stage 2: c14 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_scu: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c12: 'fear stage 2: c12 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c11: 'fear stage 2: c11 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c10: 'fear stage 2: c10 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.2_c9: 'fear stage 2: c9 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.25_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.25'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.15_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.15'
|
|
||||||
# ft_i6_fb512_ftlr0.1_fte10_ct256_ftt0.10_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.10'
|
|
||||||
ft_i6_fb512_ftlr0.1_fte15_ct256_ftt0.2_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_i6_fb512_ftlr0.1_fte20_ct256_ftt0.2_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 20, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_i6_fb512_ftlr0.1_fte25_ct256_ftt0.2_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 25, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_i6_fb512_ftlr0.1_fte30_ct256_ftt0.2_c13: 'fear stage 2: c13 onwards, batch 512, lr 0.1, <br> epochs 30, stage 1: batch 256, thresh 0.2'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
zero_cost:
|
|
||||||
zc_imagenet16-120: 'zero cost score at initialization'
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
nb_i16_reg_b256_e01_scu: 'shortreg: batch 256 epochs 01'
|
|
||||||
nb_i16_reg_b256_e02_scu: 'shortreg: batch 256 epochs 02'
|
|
||||||
nb_i16_reg_b256_e04_scu: 'shortreg: batch 256 epochs 04'
|
|
||||||
nb_i16_reg_b256_e06_scu: 'shortreg: batch 256 epochs 06'
|
|
||||||
nb_i16_reg_b256_e08_scu: 'shortreg: batch 256 epochs 08'
|
|
||||||
nb_i16_reg_b256_e10: 'shortreg: batch 256 epochs 10'
|
|
||||||
nb_i16_reg_b256_e12: 'shortreg: batch 256 epochs 12'
|
|
||||||
nb_i16_reg_b256_e14: 'shortreg: batch 256 epochs 14'
|
|
||||||
nb_i16_reg_b256_e16: 'shortreg: batch 256 epochs 16'
|
|
||||||
nb_i16_reg_b256_e18: 'shortreg: batch 256 epochs 18'
|
|
||||||
nb_i16_reg_b256_e20: 'shortreg: batch 256 epochs 20'
|
|
||||||
nb_i16_reg_b256_e30: 'shortreg: batch 256 epochs 30'
|
|
||||||
nb_i16_reg_b512_e10_scu: 'shortreg: batch 512 epochs 10'
|
|
||||||
nb_i16_reg_b512_e20_scu: 'shortreg: batch 512 epochs 20'
|
|
||||||
nb_i16_reg_b512_e22_scu: 'shortreg: batch 512 epochs 22'
|
|
||||||
nb_i16_reg_b512_e24_scu: 'shortreg: batch 512 epochs 24'
|
|
||||||
nb_i16_reg_b512_e26_scu: 'shortreg: batch 512 epochs 26'
|
|
||||||
nb_i16_reg_b512_e28_scu: 'shortreg: batch 512 epochs 28'
|
|
||||||
nb_i16_reg_b512_e30_scu: 'shortreg: batch 512 epochs 30'
|
|
||||||
nb_i16_reg_b1024_e10: 'shortreg: batch 1024 epochs 10'
|
|
||||||
nb_i16_reg_b1024_e20: 'shortreg: batch 1024 epochs 20'
|
|
||||||
nb_i16_reg_b1024_e30: 'shortreg: batch 1024 epochs 30'
|
|
||||||
nb_i6_reg_b2048_e10: 'shortreg: batch 2048 epochs 10'
|
|
||||||
nb_i6_reg_b2048_e20: 'shortreg: batch 2048 epochs 20'
|
|
||||||
nb_i6_reg_b2048_e30: 'shortreg: batch 2048 epochs 30'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_flower102:
|
|
||||||
freezetrain:
|
|
||||||
ft_f102_fb2048_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb2048_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb2048_ftlr0.1_fte15_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb2048_ftlr0.1_fte30_ct256_ftt0.2: 'fear stage 2: batch 2048, lr 0.1, <br> epochs 30, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb1024_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb1024_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb1024_ftlr0.1_fte15_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb1024_ftlr0.1_fte30_ct256_ftt0.2: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 30, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb512_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb512_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb512_ftlr0.1_fte15_ct256_ftt0.2: 'fear stage 2: batch 512, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb256_ftlr0.1_fte5_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb256_ftlr0.1_fte10_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.2'
|
|
||||||
ft_f102_fb256_ftlr0.1_fte15_ct256_ftt0.2: 'fear stage 2: batch 256, lr 0.1, <br> epochs 15, stage 1: batch 256, thresh 0.2'
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
nb_f102_reg_b256_e10: 'shortreg: batch 2048 epochs 10'
|
|
||||||
nb_f102_reg_b256_e20: 'shortreg: batch 2048 epochs 20'
|
|
||||||
nb_f102_reg_b256_e30: 'shortreg: batch 2048 epochs 30'
|
|
||||||
|
|
||||||
zero_cost:
|
|
||||||
zc_flower102: 'zero cost score at initialization'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
natsbench_synthetic_cifar10:
|
|
||||||
freezetrain:
|
|
||||||
ft_sc10_fb1024_ftlr0.1_fte5_ct256_ftt0.15: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 5, stage 1: batch 256, thresh 0.15'
|
|
||||||
ft_sc10_fb1024_ftlr0.1_fte10_ct256_ftt0.15: 'fear stage 2: batch 1024, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.15'
|
|
||||||
ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.15'
|
|
||||||
ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15_c14: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.15 c14'
|
|
||||||
ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15_c15: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.15 c15'
|
|
||||||
ft_sc10_fb256_ftlr0.1_fte10_ct256_ftt0.15_c16: 'fear stage 2: batch 256, lr 0.1, <br> epochs 10, stage 1: batch 256, thresh 0.15 c16'
|
|
||||||
|
|
||||||
zero_cost:
|
|
||||||
zc_synthetic_cifar10: 'zero cost score at initialization'
|
|
||||||
|
|
||||||
shortreg:
|
|
||||||
nb_reg_b256_e10_sc10: 'shortreg: batch 256 epochs 10'
|
|
||||||
nb_reg_b256_e20_sc10: 'shortreg: batch 256 epochs 20'
|
|
||||||
nb_reg_b256_e30_sc10: 'shortreg: batch 256 epochs 30'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
||||||
|
|
||||||
|
|
||||||
nasbench101:
|
|
||||||
freezetrain: [ft_nb101_fb256_ftlr0.2_fte5_ct256_ftt0.2,
|
|
||||||
ft_nb101_fb256_ftlr0.2_fte10_ct256_ftt0.2,
|
|
||||||
ft_nb101_fb1024_ftlr0.2_fte5_ct256_ftt0.2,
|
|
||||||
ft_nb101_fb1024_ftlr0.2_fte10_ct256_ftt0.2,
|
|
||||||
ft_nb101_fb2048_ftlr0.2_fte5_ct256_ftt0.6,
|
|
||||||
ft_nb101_fb2048_ftlr0.2_fte10_ct256_ftt0.6,
|
|
||||||
ft_nb101_fb2048_ftlr1.0_fte5_ct256_ftt0.6,
|
|
||||||
ft_nb101_fb2048_ftlr1.0_fte10_ct256_ftt0.6,
|
|
||||||
fa_nb1_s1_fb2048_ftlr0.001_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s1_fb2048_ftlr0.01_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s1_fb2048_ftlr0.025_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s1_fb3072_ftlr0.001_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s1_fb3072_ftlr0.01_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s1_fb3072_ftlr0.025_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb2048_ftlr0.001_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb2048_ftlr0.01_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb2048_ftlr0.025_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb3072_ftlr0.001_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb3072_ftlr0.01_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb3072_ftlr0.025_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_se_fb256_ftlr0.001_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_se_fb256_ftlr0.001_fte10_ct256_ftt0.6,
|
|
||||||
fa_nb1_se_fb256_ftlr0.01_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_se_fb256_ftlr0.01_fte10_ct256_ftt0.6,
|
|
||||||
fa_nb1_se_fb256_ftlr0.025_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_se_fb256_ftlr0.025_fte10_ct256_ftt0.6,
|
|
||||||
fa_nb1_s123_fb256_ftlr0.001_fte5_ct256_ftt0.6,
|
|
||||||
fa_nb1_s123_fb256_ftlr0.001_fte10_ct256_ftt0.6,
|
|
||||||
fa_nb1_s3_fb256_ftlr0.001_fte5_nocond,
|
|
||||||
fa_nb1_s3_fb256_ftlr0.001_fte10_nocond,
|
|
||||||
fa_nb1_s3_fb256_ftlr0.01_fte5_nocond,
|
|
||||||
fa_nb1_s3_fb256_ftlr0.01_fte10_nocond,
|
|
||||||
fa_nb1_s3_fb256_ftlr0.025_fte5_nocond,
|
|
||||||
fa_nb1_s3_fb256_ftlr0.025_fte10_nocond]
|
|
||||||
|
|
||||||
zero_cost: []
|
|
||||||
|
|
||||||
shortreg: [nb101_reg_b256_e01,
|
|
||||||
nb101_reg_b256_e02,
|
|
||||||
nb101_reg_b256_e04,
|
|
||||||
nb101_reg_b256_e06,
|
|
||||||
nb101_reg_b256_e08,
|
|
||||||
nb101_reg_b256_e10,
|
|
||||||
nb101_reg_b256_e20,
|
|
||||||
nb101_reg_b256_e30,
|
|
||||||
nb101_reg_b256_e108,
|
|
||||||
nb101_reg_b256_e108_rms]
|
|
||||||
|
|
||||||
colors:
|
|
||||||
freezetrain: 'red'
|
|
||||||
zero_cost: 'green'
|
|
||||||
shortreg: 'blue'
|
|
|
@ -1,120 +0,0 @@
|
||||||
import os
|
|
||||||
import yaml
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
from typing import List, Dict
|
|
||||||
from itertools import cycle
|
|
||||||
from cycler import cycler
|
|
||||||
from collections import OrderedDict
|
|
||||||
import math as ma
|
|
||||||
import yaml
|
|
||||||
import argparse
|
|
||||||
|
|
||||||
import plotly.express as px
|
|
||||||
from plotly.subplots import make_subplots
|
|
||||||
import plotly.graph_objects as go
|
|
||||||
|
|
||||||
|
|
||||||
def parse_raw_data(root_exp_folder:str, exp_list:List[str])->Dict:
|
|
||||||
data = {}
|
|
||||||
for exp in exp_list:
|
|
||||||
exp_full_path = os.path.join(root_exp_folder, exp)
|
|
||||||
with open(os.path.join(exp_full_path, 'raw_data.yaml')) as f:
|
|
||||||
data[exp] = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
return data
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description='Cross Experiment Random Search Plots')
|
|
||||||
parser.add_argument('--dataset', type=str, default='natsbench_cifar10',
|
|
||||||
help='dataset on which experiments have been run')
|
|
||||||
parser.add_argument('--conf-location', type=str, default='scripts/reports/fastarchrank_plots/cross_random_search.yaml',
|
|
||||||
help='location of conf file')
|
|
||||||
args, extra_args = parser.parse_known_args()
|
|
||||||
|
|
||||||
with open(args.conf_location, 'r') as f:
|
|
||||||
conf_data = yaml.load(f, Loader=yaml.Loader)
|
|
||||||
|
|
||||||
exp_folder = conf_data['exp_folder']
|
|
||||||
|
|
||||||
far_exp_list = list(conf_data[args.dataset]['fastarchrank'].keys())
|
|
||||||
reg_exp_list = list(conf_data[args.dataset]['regular'].keys())
|
|
||||||
|
|
||||||
# parse raw data from all processed experiments
|
|
||||||
far_data = parse_raw_data(exp_folder, far_exp_list)
|
|
||||||
reg_data = parse_raw_data(exp_folder, reg_exp_list)
|
|
||||||
|
|
||||||
fig = go.Figure()
|
|
||||||
for key in far_data.keys():
|
|
||||||
legend_name = conf_data[args.dataset]['fastarchrank'][key]
|
|
||||||
marker_color = conf_data[args.dataset]['colors']['fastarchrank']
|
|
||||||
error_x = dict(type='data', array=[far_data[key]['stderr_duration']], visible=True)
|
|
||||||
error_y = dict(type='data', array=[far_data[key]['stderr_max_acc']], visible=True)
|
|
||||||
fig.add_trace(go.Scatter(x=[far_data[key]['avg_duration']],
|
|
||||||
error_x=error_x,
|
|
||||||
y=[far_data[key]['avg_max_acc']],
|
|
||||||
error_y=error_y,
|
|
||||||
name=legend_name, mode='markers',
|
|
||||||
marker_color=marker_color,
|
|
||||||
showlegend=True))
|
|
||||||
for key in reg_data.keys():
|
|
||||||
legend_name = conf_data[args.dataset]['regular'][key]
|
|
||||||
marker_color = conf_data[args.dataset]['colors']['regular']
|
|
||||||
error_x = dict(type='data', array=[reg_data[key]['stderr_duration']], visible=True)
|
|
||||||
error_y = dict(type='data', array=[reg_data[key]['stderr_max_acc']], visible=True)
|
|
||||||
fig.add_trace(go.Scatter(x=[reg_data[key]['avg_duration']],
|
|
||||||
error_x=error_x,
|
|
||||||
y=[reg_data[key]['avg_max_acc']],
|
|
||||||
error_y=error_y,
|
|
||||||
name=legend_name, mode='markers',
|
|
||||||
marker_color=marker_color,
|
|
||||||
showlegend=True))
|
|
||||||
|
|
||||||
fig.update_yaxes(range=[0,100])
|
|
||||||
fig.update_layout(title_text="Duration vs. Max. Accuracy Random Search",
|
|
||||||
xaxis_title="Duration (s)",
|
|
||||||
yaxis_title='Avg. Top-1 Max Accuracy')
|
|
||||||
|
|
||||||
savename_html = os.path.join(exp_folder, f'{args.dataset}_random_search.html')
|
|
||||||
fig.write_html(savename_html)
|
|
||||||
fig.show()
|
|
||||||
|
|
||||||
fig_detail = go.Figure()
|
|
||||||
for key in far_data.keys():
|
|
||||||
legend_name = conf_data[args.dataset]['fastarchrank'][key]
|
|
||||||
marker_color = conf_data[args.dataset]['colors']['fastarchrank']
|
|
||||||
for i in range(len(far_data[key]['trajs'])):
|
|
||||||
xs = [duration for duration, test_acc in far_data[key]['trajs'][i]]
|
|
||||||
ys = [test_acc for duration, test_acc in far_data[key]['trajs'][i]]
|
|
||||||
fig_detail.add_trace(go.Scatter(x=xs,
|
|
||||||
y=ys,
|
|
||||||
name=legend_name,
|
|
||||||
mode='lines',
|
|
||||||
showlegend=True))
|
|
||||||
fig_detail.update_yaxes(range=[0,100])
|
|
||||||
|
|
||||||
for key in reg_data.keys():
|
|
||||||
legend_name = conf_data[args.dataset]['regular'][key]
|
|
||||||
marker_color = conf_data[args.dataset]['colors']['regular']
|
|
||||||
for i in range(len(reg_data[key]['trajs'])):
|
|
||||||
xs = [duration for duration, test_acc in reg_data[key]['trajs'][i]]
|
|
||||||
ys = [test_acc for duration, test_acc in reg_data[key]['trajs'][i]]
|
|
||||||
fig_detail.add_trace(go.Scatter(x=xs,
|
|
||||||
y=ys,
|
|
||||||
name=legend_name,
|
|
||||||
mode='lines',
|
|
||||||
showlegend=True))
|
|
||||||
fig_detail.update_yaxes(range=[0,100])
|
|
||||||
|
|
||||||
|
|
||||||
fig_detail.update_layout(title_text="Duration vs. Max. Accuracy Random Search",
|
|
||||||
xaxis_title="Duration (s)",
|
|
||||||
yaxis_title='Avg. Top-1 Max Accuracy')
|
|
||||||
savename_html = os.path.join(exp_folder, f'{args.dataset}_random_search_detail.html')
|
|
||||||
fig_detail.write_html(savename_html)
|
|
||||||
fig_detail.show()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
||||||
|
|
|
@ -1,112 +0,0 @@
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
exp_folder: 'F:\\archai_experiment_reports'
|
|
||||||
|
|
||||||
|
|
||||||
darts_cifar10:
|
|
||||||
|
|
||||||
fastarchrank:
|
|
||||||
rs_darts_reg_e5: 'rs_darts_reg_e5'
|
|
||||||
rs_darts_reg_e10: 'rs_darts_reg_e10'
|
|
||||||
rs_darts_reg_e15: 'rs_darts_reg_e15'
|
|
||||||
rs_darts_reg_e50: 'rs_darts_reg_e50'
|
|
||||||
|
|
||||||
regular:
|
|
||||||
rs_darts_far_ftt0.6_fte10_ratio_1.2: 'rs_darts_far_ftt0.6_fte10_ratio_1.2'
|
|
||||||
rs_darts_far_ftt0.6_fte10_ratio_2.0: 'rs_darts_far_ftt0.6_fte10_ratio_2.0'
|
|
||||||
rs_darts_far_ftt0.6_fte10_ratio_4.0: 'rs_darts_far_ftt0.6_fte10_ratio_4.0'
|
|
||||||
rs_darts_far_ftt0.6_fte10_ratio_8.0: 'rs_darts_far_ftt0.6_fte10_ratio_8.0'
|
|
||||||
rs_darts_far_ftt0.6_fte10_ratio_8.0_nofreeze: 'rs_darts_far_ftt0.6_fte10_ratio_8.0_nofreeze'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
fastarchrank: 'green'
|
|
||||||
regular: 'red'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_cifar10:
|
|
||||||
|
|
||||||
fastarchrank:
|
|
||||||
# rs_far_ftt0.6_max50_ratio1.2: 'rs_far_ftt0.6_max50_ratio1.2'
|
|
||||||
# rs_far_ftt0.6_max50_ratio1.0: 'rs_far_ftt0.6_max50_ratio1.0'
|
|
||||||
# rs_far_ftt0.3_max50_ratio1.2: 'rs_far_ftt0.3_max50_ratio1.2'
|
|
||||||
# rs_far_ftt0.3_max50_ratio1.0: 'rs_far_ftt0.3_max50_ratio1.0'
|
|
||||||
# rs_far_ftt0.15_max50_ratio1.2: 'rs_far_ftt0.15_max50_ratio1.2'
|
|
||||||
# rs_far_ftt0.15_max50_ratio1.0: 'rs_far_ftt0.15_max50_ratio1.0'
|
|
||||||
rs_far_ftt0.6_max500_ratio1.2_fixedseeds: 'rs_far_ftt0.6_max500_ratio1.2_fixedseeds'
|
|
||||||
rs_far_ftt0.6_max500_ratio2.0_fixedseeds: 'rs_far_ftt0.6_max500_ratio2.0_fixedseeds'
|
|
||||||
rs_far_ftt0.6_max500_ratio4.0_fixedseeds: 'rs_far_ftt0.6_max500_ratio4.0_fixedseeds'
|
|
||||||
rs_far_ftt0.6_max500_ratio8.0_fixedseeds: 'rs_far_ftt0.6_max500_ratio8.0_fixedseeds'
|
|
||||||
|
|
||||||
regular:
|
|
||||||
# rs_reg_max50_b1024_e01: 'rs_reg_max50_b1024_e01'
|
|
||||||
# rs_reg_max50_b1024_e02: 'rs_reg_max50_b1024_e02'
|
|
||||||
# rs_reg_max50_b1024_e04: 'rs_reg_max50_b1024_e04'
|
|
||||||
# rs_reg_max50_b1024_e06: 'rs_reg_max50_b1024_e06'
|
|
||||||
# rs_reg_max50_b1024_e08: 'rs_reg_max50_b1024_e08'
|
|
||||||
# rs_reg_max50_b1024_e10: 'rs_reg_max50_b1024_e10'
|
|
||||||
rs_reg_max500_b1024_e02_fixedseeds: 'rs_reg_max500_b1024_e02_fixedseeds'
|
|
||||||
rs_reg_max500_b1024_e04_fixedseeds: 'rs_reg_max500_b1024_e04_fixedseeds'
|
|
||||||
rs_reg_max500_b1024_e06_fixedseeds: 'rs_reg_max500_b1024_e06_fixedseeds'
|
|
||||||
rs_reg_max500_b1024_e08_fixedseeds: 'rs_reg_max500_b1024_e08_fixedseeds'
|
|
||||||
rs_reg_max500_b1024_e10_fixedseeds: 'rs_reg_max500_b1024_e10_fixedseeds'
|
|
||||||
rs_reg_max500_b1024_e50_fixedseeds: 'rs_reg_max500_b1024_e50_fixedseeds'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
fastarchrank: 'green'
|
|
||||||
regular: 'red'
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_cifar100:
|
|
||||||
|
|
||||||
fastarchrank:
|
|
||||||
# rs_far_c100_ftt0.3_max1000_ratio1.2_seed1.0: 'rs_far_c100_ftt0.3_max1000_ratio1.2_seed1.0'
|
|
||||||
# rs_far_c100_ftt0.3_max1000_ratio1.2: 'rs_far_c100_ftt0.3_max1000_ratio1.2'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio1.2_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio1.2_fixedseeds'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio1.4_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio1.4_fixedseeds'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio1.6_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio1.6_fixedseeds'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio1.8_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio1.8_fixedseeds'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio2.0_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio2.0_fixedseeds'
|
|
||||||
<<<<<<< HEAD
|
|
||||||
=======
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio4.0_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio4.0_fixedseeds'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio8.0_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio8.0_fixedseeds'
|
|
||||||
rs_far_c100_ftt0.3_max500_ratio2M_fixedseeds: 'rs_far_c100_ftt0.3_max500_ratio2M_fixedseeds'
|
|
||||||
rs_farpost_c100_ftt0.3_max500_ratio1.2_fixedseeds: 'rs_farpost_c100_ftt0.3_max500_ratio1.2_fixedseeds'
|
|
||||||
rs_farpost_c100_ftt0.3_max500_ratio2.0_fixedseeds: 'rs_farpost_c100_ftt0.3_max500_ratio2.0_fixedseeds'
|
|
||||||
>>>>>>> f9883b2e (Fixed the dataloader caching issue which causes OOM error on long discrete searcher jobs.)
|
|
||||||
|
|
||||||
regular:
|
|
||||||
# rs_reg_c100_max1000_b1024_e08_seed1.0: 'rs_reg_c100_max1000_b1024_e08_seed1.0'
|
|
||||||
# rs_reg_c100_max1000_b1024_e08: 'rs_reg_c100_max1000_b1024_e08'
|
|
||||||
rs_reg_c100_max500_b1024_e02_fixedseeds: 'rs_reg_c100_max500_b1024_e02_fixedseeds'
|
|
||||||
rs_reg_c100_max500_b1024_e04_fixedseeds: 'rs_reg_c100_max500_b1024_e04_fixedseeds'
|
|
||||||
rs_reg_c100_max500_b1024_e06_fixedseeds: 'rs_reg_c100_max500_b1024_e06_fixedseeds'
|
|
||||||
rs_reg_c100_max500_b1024_e08_fixedseeds: 'rs_reg_c100_max500_b1024_e08_fixedseeds'
|
|
||||||
rs_reg_c100_max500_b1024_e50_fixedseeds: 'rs_reg_c100_max500_b1024_e50_fixedseeds'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
fastarchrank: 'green'
|
|
||||||
regular: 'red'
|
|
||||||
|
|
||||||
|
|
||||||
natsbench_imagenet16-120:
|
|
||||||
|
|
||||||
fastarchrank:
|
|
||||||
rs_far_i16_ftt0.2_max500_ratio1.2_fixedseeds: 'rs_far_i16_ftt0.2_max500_ratio1.2_fixedseeds'
|
|
||||||
rs_far_i16_ftt0.2_max500_ratio2.0_fixedseeds: 'rs_far_i16_ftt0.2_max500_ratio2.0_fixedseeds'
|
|
||||||
rs_far_i16_ftt0.2_max500_ratio4.0_fixedseeds: 'rs_far_i16_ftt0.2_max500_ratio4.0_fixedseeds'
|
|
||||||
|
|
||||||
regular:
|
|
||||||
rs_reg_i16_max500_b512_e08_fixedseeds: 'rs_reg_i16_max500_b512_e08_fixedseeds'
|
|
||||||
rs_reg_i16_max500_b512_e16_fixedseeds: 'rs_reg_i16_max500_b512_e16_fixedseeds'
|
|
||||||
rs_reg_i16_max500_b512_e22_fixedseeds: 'rs_reg_i16_max500_b512_e22_fixedseeds'
|
|
||||||
rs_reg_i16_max500_b512_e50_fixedseeds: 'rs_reg_i16_max500_b512_e50_fixedseeds'
|
|
||||||
|
|
||||||
colors:
|
|
||||||
fastarchrank: 'green'
|
|
||||||
regular: 'red'
|
|
||||||
|
|
||||||
|
|
||||||
|
|
2
setup.py
2
setup.py
|
@ -1,7 +1,7 @@
|
||||||
# Copyright (c) Microsoft Corporation.
|
# Copyright (c) Microsoft Corporation.
|
||||||
# Licensed under the MIT license.
|
# Licensed under the MIT license.
|
||||||
|
|
||||||
import setuptools
|
from setuptools import find_packages, setup
|
||||||
|
|
||||||
with open("README.md", "r", encoding="utf_8") as f:
|
with open("README.md", "r", encoding="utf_8") as f:
|
||||||
long_description = f.read()
|
long_description = f.read()
|
||||||
|
|
Загрузка…
Ссылка в новой задаче