Preparing for 2019.12.28 comparison run

2019-12-29 04:29:07 +00:00 · 2019-12-29 04:29:07 +00:00 · 3fe03aca76
--- a/classify_images.py
+++ b/classify_images.py
@ -6,7 +6,7 @@
 # The script classifies one or more hard-coded image files.
 #
 # Because the inference code has not been assembled into a formal package yet,
-# you should define API_ROOT to point to the base of our repo.  This
+# you should define api_root to point to the base of our repo.  This
 # will be added to your Python path later in the script.
 #
 # This script has two non-code dependencies:
@ -28,15 +28,18 @@ import glob
 # Directory to which you sync'd the repo.  Probably the same
 # directory this file lives in, but for portability, this file is set up to only
 # take dependencies on the repo according to this constant.
-API_ROOT = r'/home/coyote/git/speciesclassification'
-SUBDIRS_TO_IMPORT = ['DetectionClassificationAPI','FasterRCNNDetection','PyTorchClassification']    
+api_root = r'/home/coyote/git/speciesclassification'
+subdirs_to_import = ['DetectionClassificationAPI','FasterRCNNDetection','PyTorchClassification']    
   
 # Path to taxa.csv, for latin --> common mapping
 #
 # Set to None to disable latin --> common mapping
-TAXONOMY_PATH = r'/data/species_classification/taxa.19.08.28.0536.csv' # None
+taxonomy_path = r'/data/species_classification/taxa.19.08.28.0536.csv' # None

-# IMAGES_TO_CLASSIFY can be:
+job_name = ''
+images_to_classify_base = None
+
+# images_to_classify can be:
 #
 # an array of filenames
 #
@ -45,40 +48,59 @@ TAXONOMY_PATH = r'/data/species_classification/taxa.19.08.28.0536.csv' # None
 #
 # a directory, which is recursively enumerated
 if False:
-    IMAGES_TO_CLASSIFY = [
+    images_to_classify = [
            '/data/species_classification/coyote.jpg',
            '/data/species_classification/meerkat.jpg',
            '/data/species_classification/elephant.jpg'
            ]
-    IMAGES_TO_CLASSIFY_BASE = None

+# Pick images from a .csv file
 if False:
-    IMAGES_TO_CLASSIFY = '/data/species_classification/animal_list.2018.10.23.12.58.16.csv'
-    IMAGES_TO_CLASSIFY_BASE = '/data/species_classification/sample_animals'
+    images_to_classify = '/data/species_classification/animal_list.2018.10.23.12.58.16.csv'
+    images_to_classify_base = '/data/species_classification/sample_animals'
+    
+# Pick images from a folder
+if True:
+    images_to_classify = '/data/species_classification/images/sample_images.2019.12.28'
+    job_name = 'sample_images.2019.12.28'
+    
+# Pick images from a folder
+if False:
+    images_to_classify = '/data/species_classification/elephants_and_hippos'
+    
+# Classification results will be written here
+classification_output_file = None
+
+model_base = '/data/species_classification/models'

 if True:
-    IMAGES_TO_CLASSIFY = '/data/species_classification/sample_animals'
-    IMAGES_TO_CLASSIFY_BASE = None
-
-# ...and classification results will be written here.
-
-CLASSIFICATION_OUTPUT_FILE = None
+    # 2019 fixed model
+    classification_model_path = os.path.join(model_base,
+                                             'iNat_all_extended/demosite-model-ensemble-resnext-inceptionV4-560-83.1/iNat_all_extended_ensemble_resnext_inceptionV4_560_83.1_model.2019.12.00.pytorch')

 if False:
-    CLASSIFICATION_MODEL_PATH = '/data/species_classification/sc_all_extended_ensemble_resnext_inceptionV4_560_2019.09.19_model.pytorch'
-    CLASSIFICATION_OUTPUT_FILE = '/data/species_classification/classifications_sc_all_extended_ensemble_resnext_inceptionV4_560_2019.09.19_model.csv'
+    # 2019 broken model
+    classification_model_path = os.path.join(model_base,
+                                             'iNat_all_extended_buggy/demosite-model-ensemble-resnext-inceptionV4-560-81.0/iNat_all_extended_ensemble_resnext_inceptionV4_560_81.9_model.2019.10.00.pytorch')

-if True:
-    CLASSIFICATION_MODEL_PATH = '/data/species_classification/inc4-incres2-560-78.5.model_deploy.pth.tar'
-    CLASSIFICATION_OUTPUT_FILE = '/data/species_classification/classifications_inc4-incres2-560-78.5.csv'
+if False:
+    # 2018 model    
+    classification_model_path = os.path.join(model_base,
+                                             'iNat_original/inc4-incres2-560-78.5/inc4-incres2-560-78.5.model_deploy.pth.tar')
+
+assert(os.path.isfile(classification_model_path))
+
+output_base = '/data/species_classification/output'
+model_name = os.path.basename(classification_model_path)
+classification_output_file = os.path.join(output_base,'classifications_{}_{}.csv'.format(job_name,model_name))
    
 # Detection (i.e., bounding box generation) is optional; set to None 
 # to disable detection
-DETECTION_MODEL_PATH = None
+detection_model_path = None

 # This must be True if detection is enabled.  Classification can be run
 # on the CPU or GPU.
-USE_GPU = True
+use_gpu = True

 # List of image sizes to use, one per model in the ensemble.  Images will be resized 
 # and reshaped to square images prior to classification.  
@ -86,23 +108,23 @@ USE_GPU = True
 # We typically specify [560,560] if we're loading our Inception/InceptionResnet 
 # ensemble. For ResNext, we typically specify [448].
 #
-IMAGE_SIZES = [560, 560]
-# IMAGE_SIZES = [448]
+image_sizes = [560, 560]
+# image_sizes = [448]

-MAX_K_TO_PRINT = 3
-DEBUG_MAX_IMAGES = -1
+mak_k_to_print = 3
+debug_max_images = -1


 #%% Path setup to import the classification code

-if (not API_ROOT.lower() in map(str.lower,sys.path)):
+if (not api_root.lower() in map(str.lower,sys.path)):
    
-    print("Adding {} to the python path".format(API_ROOT))
-    sys.path.insert(0,API_ROOT)
+    print("Adding {} to the python path".format(api_root))
+    sys.path.insert(0,api_root)

-for s in SUBDIRS_TO_IMPORT:
+for s in subdirs_to_import:
    if (not s.lower() in map(str.lower,sys.path)):
-        importPath = os.path.join(API_ROOT,s)
+        importPath = os.path.join(api_root,s)
        print("Adding {} to the python path".format(importPath))
        sys.path.insert(0,importPath)    

@ -116,12 +138,12 @@ import api as speciesapi

 latinToCommon = {}

-if TAXONOMY_PATH != None:
+if taxonomy_path != None:
        
    print("Reading taxonomy file")
    
    # Read taxonomy file; takes ~1 minute
-    df = pd.read_csv(TAXONOMY_PATH)
+    df = pd.read_csv(taxonomy_path)
    df = df.fillna('')
    
    # Columns are:
@ -172,12 +194,13 @@ def doLatinToCommon(latinName):

 #%% Create the model(s)

-assert os.path.isfile(CLASSIFICATION_MODEL_PATH)
-if DETECTION_MODEL_PATH != None:
-    assert os.path.isfile(DETECTION_MODEL_PATH)
+assert os.path.isfile(classification_model_path)
+if detection_model_path != None:
+    assert os.path.isfile(detection_model_path)

 print("Loading model")
-model = speciesapi.DetectionClassificationAPI(CLASSIFICATION_MODEL_PATH, DETECTION_MODEL_PATH, IMAGE_SIZES, USE_GPU)
+model = speciesapi.DetectionClassificationAPI(classification_model_path, 
+                                              detection_model_path, image_sizes, use_gpu)
 print("Finished loading model")


@ -185,17 +208,17 @@ print("Finished loading model")

 queries = None

-if isinstance(IMAGES_TO_CLASSIFY,str) and os.path.isdir(IMAGES_TO_CLASSIFY):
+if isinstance(images_to_classify,str) and os.path.isdir(images_to_classify):
    
-    images = glob.glob(os.path.join(IMAGES_TO_CLASSIFY,'**/*.*'), recursive=True)
+    images = glob.glob(os.path.join(images_to_classify,'**/*.*'), recursive=True)
    images = [fn for fn in images if os.path.isfile(fn)]
    queries = [os.path.basename(os.path.dirname(fn)) for fn in images]
    print('Loaded a folder of {} images'.format(len(images)))    
    
-elif isinstance(IMAGES_TO_CLASSIFY,str) and os.path.isfile(IMAGES_TO_CLASSIFY):
+elif isinstance(images_to_classify,str) and os.path.isfile(images_to_classify):
    
    print("Reading image list file")
-    df_images = pd.read_csv(IMAGES_TO_CLASSIFY,header=None)
+    df_images = pd.read_csv(images_to_classify,header=None)
    df_images.columns = ['filename','query_string']
    nImages = len(images)    
    print("Read {} image names".format(len(images)))
@ -205,8 +228,8 @@ elif isinstance(IMAGES_TO_CLASSIFY,str) and os.path.isfile(IMAGES_TO_CLASSIFY):
    
 else:
    
-    assert isinstance(IMAGES_TO_CLASSIFY,list)
-    images = IMAGES_TO_CLASSIFY
+    assert isinstance(images_to_classify,list)
+    images = images_to_classify
    queries = None
    print('Processing list of {} images'.format(len(images)))
    
@ -217,8 +240,8 @@ nErrors = 0
 nImagesClassified = 0
 nImages = len(images)

-if CLASSIFICATION_OUTPUT_FILE is not None:
-    f = open(CLASSIFICATION_OUTPUT_FILE,'w+')
+if classification_output_file is not None:
+    f = open(classification_output_file,'w+')

 # i_fn = 1; fn = images[i_fn]    
 for i_fn,fn in enumerate(images):
@ -229,14 +252,14 @@ for i_fn,fn in enumerate(images):
    if queries is not None:
        query = queries[i_fn]
        
-    if IMAGES_TO_CLASSIFY_BASE is not None and len(IMAGES_TO_CLASSIFY_BASE > 0):
-        fn = os.path.join(IMAGES_TO_CLASSIFY_BASE,fn)
+    if images_to_classify_base is not None and len(images_to_classify_base > 0):
+        fn = os.path.join(images_to_classify_base,fn)

    # with torch.no_grad():
    # print('Clasifying image {}'.format(fn))
    # def predict_image(self, image_path, topK=1, multiCrop=False, predict_mode=PredictMode.classifyUsingDetect):
    try:
-        prediction = model.predict_image(fn, topK=min(5,MAX_K_TO_PRINT), multiCrop=False, 
+        prediction = model.predict_image(fn, topK=min(5,mak_k_to_print), multiCrop=False, 
                                             predict_mode=speciesapi.PredictMode.classifyOnly)
        nImagesClassified = nImagesClassified + 1
        
@ -246,23 +269,23 @@ for i_fn,fn in enumerate(images):
        continue

    # i_prediction = 0
-    for i_prediction in range(0, min(len(prediction.species),MAX_K_TO_PRINT)):
+    for i_prediction in range(0, min(len(prediction.species),mak_k_to_print)):
        latinName = prediction.species[i_prediction]
        likelihood = prediction.species_scores[i_prediction]
        likelihood = '{0:0.3f}'.format(likelihood)
        commonName = doLatinToCommon(latinName)
        s = '"{}","{}","{}","{}","{}","{}","{}"'.format(
                i_fn,fn,query,i_prediction,latinName,commonName,likelihood)
-        if CLASSIFICATION_OUTPUT_FILE is not None:
+        if classification_output_file is not None:
            f.write(s + '\n')
        print(s)
        
-    if DEBUG_MAX_IMAGES > 0 and i_fn >= DEBUG_MAX_IMAGES:
+    if debug_max_images > 0 and i_fn >= debug_max_images:
        break

 # ...for each image
        
-if CLASSIFICATION_OUTPUT_FILE is not None:
+if classification_output_file is not None:
    f.close()
    
 print("Finished classifying {} of {} images ({} errors)".format(nImagesClassified,nImages,nErrors))