PyTorchWildlife Dev branch
|
@ -1,61 +1,9 @@
|
|||
################################################################################
|
||||
# This .gitignore file was automatically created by Microsoft(R) Visual Studio.
|
||||
################################################################################
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Apple directory
|
||||
*.DS_Store
|
||||
|
||||
# IDEs
|
||||
*.idea/
|
||||
*.project
|
||||
.spyderproject
|
||||
.spyproject
|
||||
.vscode
|
||||
|
||||
# Demo files
|
||||
demo/aadconfig.py
|
||||
demo/apiconfig.py
|
||||
demo/CameraTrapAssets
|
||||
demo/static/uploads
|
||||
demo/static/results
|
||||
.webassets-cache/
|
||||
|
||||
# CameraTrapJsonFileProcessingApp
|
||||
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/.vs/
|
||||
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/bin/
|
||||
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/obj/
|
||||
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/packages/
|
||||
|
||||
# TF and PyTorch model files
|
||||
*.pb
|
||||
*.pt
|
||||
|
||||
# batch processing API config files
|
||||
api_config*.py
|
||||
|
||||
# Other
|
||||
*.pth
|
||||
*.o
|
||||
debug.log
|
||||
*.swp
|
||||
|
||||
# Things created when building the sync API
|
||||
yolov5
|
||||
api/synchronous/api_core/animal_detection_api/detection
|
||||
|
||||
__pycache__
|
||||
*weights*
|
||||
*processed.*
|
||||
*output*
|
||||
*flagged*
|
||||
*temp*
|
||||
PytorchWildlife.egg-info/
|
||||
*dev*
|
||||
*test*
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
[submodule "demo/CameraTrapAssets"]
|
||||
path = demo/CameraTrapAssets
|
||||
url = https://ai4evisionexternal.visualstudio.com/gramener/_git/camera-trap-app-assets
|
34
LICENSE
|
@ -1,21 +1,21 @@
|
|||
MIT License
|
||||
MIT License
|
||||
|
||||
Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
Copyright (c) [2023] [Microsoft]
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
from .data import *
|
||||
from .models import *
|
||||
from .utils import *
|
|
@ -0,0 +1,2 @@
|
|||
from .datasets import *
|
||||
from .transforms import *
|
|
@ -0,0 +1,120 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import os
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import supervision as sv
|
||||
from torch.utils.data import Dataset
|
||||
|
||||
# Making the DetectionImageFolder class available for import from this module
|
||||
__all__ = [
|
||||
"DetectionImageFolder",
|
||||
]
|
||||
|
||||
|
||||
class DetectionImageFolder(Dataset):
|
||||
"""
|
||||
A PyTorch Dataset for loading images from a specified directory.
|
||||
Each item in the dataset is a tuple containing the image data,
|
||||
the image's path, and the original size of the image.
|
||||
"""
|
||||
|
||||
def __init__(self, image_dir, transform=None):
|
||||
"""
|
||||
Initializes the dataset.
|
||||
|
||||
Parameters:
|
||||
image_dir (str): Path to the directory containing the images.
|
||||
transform (callable, optional): Optional transform to be applied on the image.
|
||||
"""
|
||||
self.image_dir = image_dir
|
||||
# Listing and sorting all image files in the specified directory
|
||||
self.images = sorted(os.listdir(self.image_dir))
|
||||
self.transform = transform
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""
|
||||
Retrieves an image from the dataset.
|
||||
|
||||
Parameters:
|
||||
idx (int): Index of the image to retrieve.
|
||||
|
||||
Returns:
|
||||
tuple: Contains the image data, the image's path, and its original size.
|
||||
"""
|
||||
# Get image filename and path
|
||||
img = self.images[idx]
|
||||
img_path = os.path.join(self.image_dir, img)
|
||||
|
||||
# Load and convert image to RGB
|
||||
img = Image.open(img_path).convert("RGB")
|
||||
img = np.asarray(img)
|
||||
img_size_ori = img.shape
|
||||
|
||||
# Apply transformation if specified
|
||||
if self.transform:
|
||||
img = self.transform(img)
|
||||
|
||||
return img, img_path, np.array(img_size_ori)
|
||||
|
||||
def __len__(self):
|
||||
"""
|
||||
Returns the total number of images in the dataset.
|
||||
|
||||
Returns:
|
||||
int: Total number of images.
|
||||
"""
|
||||
return len(self.images)
|
||||
|
||||
|
||||
class DetectionCrops(Dataset):
|
||||
|
||||
def __init__(self, detection_results, transform=None, path_head=None, animal_cls_id=0):
|
||||
|
||||
self.detection_results = detection_results
|
||||
self.transform = transform
|
||||
self.path_head = path_head
|
||||
self.animal_cls_id = animal_cls_id # This determins which detection class id represents animals.
|
||||
self.img_ids = []
|
||||
self.xyxys = []
|
||||
|
||||
self.load_detection_results()
|
||||
|
||||
def load_detection_results(self):
|
||||
for det in self.detection_results:
|
||||
for xyxy, det_id in zip(det["detections"].xyxy, det["detections"].class_id):
|
||||
# Only run recognition on animal detections
|
||||
if det_id == self.animal_cls_id:
|
||||
self.img_ids.append(det["img_id"])
|
||||
self.xyxys.append(xyxy)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
"""
|
||||
Retrieves an image from the dataset.
|
||||
|
||||
Parameters:
|
||||
idx (int): Index of the image to retrieve.
|
||||
|
||||
Returns:
|
||||
tuple: Contains the image data and the image's path.
|
||||
"""
|
||||
|
||||
# Get image path and corresponding bbox xyxy for cropping
|
||||
img_id = self.img_ids[idx]
|
||||
xyxy = self.xyxys[idx]
|
||||
|
||||
img_path = os.path.join(self.path_head, img_id) if self.path_head else img_id
|
||||
|
||||
# Load and crop image with supervision
|
||||
img = sv.crop_image(np.array(Image.open(img_path).convert("RGB")),
|
||||
xyxy=xyxy)
|
||||
|
||||
# Apply transformation if specified
|
||||
if self.transform:
|
||||
img = self.transform(Image.fromarray(img))
|
||||
|
||||
return img, img_path
|
||||
|
||||
def __len__(self):
|
||||
return len(self.img_ids)
|
|
@ -0,0 +1,90 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from torchvision import transforms
|
||||
from yolov5.utils.augmentations import letterbox
|
||||
|
||||
# Making the provided classes available for import from this module
|
||||
__all__ = [
|
||||
"MegaDetector_v5_Transform",
|
||||
"Classification_Inference_Transform"
|
||||
]
|
||||
|
||||
class MegaDetector_v5_Transform:
|
||||
"""
|
||||
A transformation class to preprocess images for the MegaDetector v5 model.
|
||||
This includes resizing, transposing, and normalization operations.
|
||||
This is a required transformation for the YoloV5 model.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, target_size=1280, stride=32):
|
||||
"""
|
||||
Initializes the transform.
|
||||
|
||||
Args:
|
||||
target_size (int): Desired size for the image's longest side after resizing.
|
||||
stride (int): Stride value for resizing.
|
||||
"""
|
||||
self.target_size = target_size
|
||||
self.stride = stride
|
||||
|
||||
def __call__(self, np_img):
|
||||
"""
|
||||
Applies the transformation on the provided image.
|
||||
|
||||
Args:
|
||||
np_img (np.ndarray): Input image as a numpy array.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Transformed image.
|
||||
"""
|
||||
# Resize and pad the image using the letterbox function
|
||||
img = letterbox(np_img, new_shape=self.target_size, stride=self.stride, auto=False)[0]
|
||||
|
||||
# Transpose and convert image to PyTorch tensor
|
||||
img = img.transpose((2, 0, 1))
|
||||
img = np.ascontiguousarray(img)
|
||||
img = torch.from_numpy(img).float()
|
||||
img /= 255.0
|
||||
|
||||
return img
|
||||
|
||||
|
||||
class Classification_Inference_Transform:
|
||||
"""
|
||||
A transformation class to preprocess images for classification inference.
|
||||
This includes resizing, normalization, and conversion to a tensor.
|
||||
"""
|
||||
# Normalization constants
|
||||
mean = [0.485, 0.456, 0.406]
|
||||
std = [0.229, 0.224, 0.225]
|
||||
|
||||
def __init__(self, target_size=224):
|
||||
"""
|
||||
Initializes the transform.
|
||||
|
||||
Args:
|
||||
target_size (int): Desired size for the height and width after resizing.
|
||||
"""
|
||||
# Define the sequence of transformations
|
||||
self.trans = transforms.Compose([
|
||||
transforms.Resize((target_size, target_size)),
|
||||
transforms.ToTensor(),
|
||||
transforms.Normalize(self.mean, self.std)
|
||||
])
|
||||
|
||||
def __call__(self, img):
|
||||
"""
|
||||
Applies the transformation on the provided image.
|
||||
|
||||
Args:
|
||||
img (PIL.Image.Image): Input image in PIL format.
|
||||
|
||||
Returns:
|
||||
torch.Tensor: Transformed image.
|
||||
"""
|
||||
img = self.trans(img)
|
||||
return img
|
|
@ -0,0 +1 @@
|
|||
from .resnet import *
|
|
@ -0,0 +1,3 @@
|
|||
from .base_classifier import *
|
||||
from .opossum import *
|
||||
from .amazon import *
|
|
@ -0,0 +1,105 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import torch
|
||||
from .base_classifier import PlainResNetInference
|
||||
|
||||
__all__ = [
|
||||
"AI4GAmazonRainforest"
|
||||
]
|
||||
|
||||
|
||||
class AI4GAmazonRainforest(PlainResNetInference):
|
||||
"""
|
||||
Amazon Ranforest Animal Classifier that inherits from PlainResNetInference.
|
||||
This classifier is specialized for recognizing 36 different animals in the Amazon Rainforest.
|
||||
"""
|
||||
|
||||
# Image size for the Opossum classifier
|
||||
IMAGE_SIZE = 224
|
||||
|
||||
# Class names for prediction
|
||||
CLASS_NAMES = {
|
||||
0: 'Dasyprocta',
|
||||
1: 'Bos',
|
||||
2: 'Pecari',
|
||||
3: 'Mazama',
|
||||
4: 'Cuniculus',
|
||||
5: 'Leptotila',
|
||||
6: 'Human',
|
||||
7: 'Aramides',
|
||||
8: 'Tinamus',
|
||||
9: 'Eira',
|
||||
10: 'Crax',
|
||||
11: 'Procyon',
|
||||
12: 'Capra',
|
||||
13: 'Dasypus',
|
||||
14: 'Sciurus',
|
||||
15: 'Crypturellus',
|
||||
16: 'Tamandua',
|
||||
17: 'Proechimys',
|
||||
18: 'Leopardus',
|
||||
19: 'Equus',
|
||||
20: 'Columbina',
|
||||
21: 'Nyctidromus',
|
||||
22: 'Ortalis',
|
||||
23: 'Emballonura',
|
||||
24: 'Odontophorus',
|
||||
25: 'Geotrygon',
|
||||
26: 'Metachirus',
|
||||
27: 'Catharus',
|
||||
28: 'Cerdocyon',
|
||||
29: 'Momotus',
|
||||
30: 'Tapirus',
|
||||
31: 'Canis',
|
||||
32: 'Furnarius',
|
||||
33: 'Didelphis',
|
||||
34: 'Sylvilagus',
|
||||
35: 'Unknown'
|
||||
}
|
||||
|
||||
def __init__(self, weights=None, device="cpu", pretrained=True):
|
||||
"""
|
||||
Initialize the Amazon animal Classifier.
|
||||
|
||||
Args:
|
||||
weights (str, optional): Path to the model weights. Defaults to None.
|
||||
device (str, optional): Device for model inference. Defaults to "cpu".
|
||||
pretrained (bool, optional): Whether to use pretrained weights. Defaults to True.
|
||||
"""
|
||||
|
||||
# If pretrained, use the provided URL to fetch the weights
|
||||
if pretrained:
|
||||
url = "https://zenodo.org/records/10042023/files/AI4GAmazonClassification_v0.0.0.ckpt?download=1"
|
||||
else:
|
||||
url = None
|
||||
|
||||
super(AI4GAmazonRainforest, self).__init__(weights=weights, device=device,
|
||||
num_cls=36, num_layers=50, url=url)
|
||||
|
||||
def results_generation(self, logits, img_ids, id_strip=None):
|
||||
"""
|
||||
Generate results for classification.
|
||||
|
||||
Args:
|
||||
logits (torch.Tensor): Output tensor from the model.
|
||||
img_id (str): Image identifier.
|
||||
id_strip (str): stiping string for better image id saving.
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing image ID, prediction, and confidence score.
|
||||
"""
|
||||
|
||||
probs = torch.softmax(logits, dim=1)
|
||||
preds = probs.argmax(dim=1)
|
||||
confs = probs.max(dim=1)[0]
|
||||
|
||||
results = []
|
||||
for pred, img_id, conf in zip(preds, img_ids, confs):
|
||||
r = {"img_id": str(img_id).strip(id_strip)}
|
||||
r["prediction"] = self.CLASS_NAMES[pred.item()]
|
||||
r["class_id"] = pred.item()
|
||||
r["confidence"] = conf.item()
|
||||
results.append(r)
|
||||
|
||||
return results
|
|
@ -0,0 +1,155 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torchvision.models.resnet import BasicBlock, Bottleneck, ResNet
|
||||
from torch.hub import load_state_dict_from_url
|
||||
from tqdm import tqdm
|
||||
from collections import OrderedDict
|
||||
|
||||
# Making the PlainResNetInference class available for import from this module
|
||||
__all__ = ["PlainResNetInference"]
|
||||
|
||||
|
||||
class ResNetBackbone(ResNet):
|
||||
"""
|
||||
Custom ResNet Backbone that extracts features from input images.
|
||||
"""
|
||||
def _forward_impl(self, x):
|
||||
# Following the ResNet structure to extract features
|
||||
x = self.conv1(x)
|
||||
x = self.bn1(x)
|
||||
x = self.relu(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
x = self.avgpool(x)
|
||||
x = torch.flatten(x, 1)
|
||||
return x
|
||||
|
||||
|
||||
class PlainResNetClassifier(nn.Module):
|
||||
"""
|
||||
Basic ResNet Classifier that uses a custom ResNet backbone.
|
||||
"""
|
||||
name = "PlainResNetClassifier"
|
||||
|
||||
def __init__(self, num_cls=1, num_layers=50):
|
||||
super(PlainResNetClassifier, self).__init__()
|
||||
self.num_cls = num_cls
|
||||
self.num_layers = num_layers
|
||||
self.feature = None
|
||||
self.classifier = None
|
||||
self.criterion_cls = None
|
||||
# Initialize the network and weights
|
||||
self.setup_net()
|
||||
|
||||
def setup_net(self):
|
||||
"""
|
||||
Set up the ResNet classifier according to the specified number of layers.
|
||||
"""
|
||||
kwargs = {}
|
||||
|
||||
if self.num_layers == 18:
|
||||
block = BasicBlock
|
||||
layers = [2, 2, 2, 2]
|
||||
# ... [Missing weight URL definition for ResNet18]
|
||||
elif self.num_layers == 50:
|
||||
block = Bottleneck
|
||||
layers = [3, 4, 6, 3]
|
||||
# ... [Missing weight URL definition for ResNet50]
|
||||
else:
|
||||
raise Exception("ResNet Type not supported.")
|
||||
|
||||
self.feature = ResNetBackbone(block, layers, **kwargs)
|
||||
self.classifier = nn.Linear(512 * block.expansion, self.num_cls)
|
||||
|
||||
def setup_criteria(self):
|
||||
"""
|
||||
Setup the criterion for classification.
|
||||
"""
|
||||
self.criterion_cls = nn.CrossEntropyLoss()
|
||||
|
||||
def feat_init(self):
|
||||
"""
|
||||
Initialize the features using pretrained weights.
|
||||
"""
|
||||
init_weights = self.pretrained_weights.get_state_dict(progress=True)
|
||||
init_weights = OrderedDict({k.replace("module.", "").replace("feature.", ""): init_weights[k]
|
||||
for k in init_weights})
|
||||
self.feature.load_state_dict(init_weights, strict=False)
|
||||
# Print missing and unused keys for debugging purposes
|
||||
load_keys = set(init_weights.keys())
|
||||
self_keys = set(self.feature.state_dict().keys())
|
||||
missing_keys = self_keys - load_keys
|
||||
unused_keys = load_keys - self_keys
|
||||
print("missing keys:", sorted(list(missing_keys)))
|
||||
print("unused_keys:", sorted(list(unused_keys)))
|
||||
|
||||
|
||||
class PlainResNetInference(nn.Module):
|
||||
"""
|
||||
Inference module for the PlainResNet Classifier.
|
||||
"""
|
||||
def __init__(self, num_cls=36, num_layers=50, weights=None, device="cpu", url=None):
|
||||
super(PlainResNetInference, self).__init__()
|
||||
self.device = device
|
||||
self.net = PlainResNetClassifier(num_cls=num_cls, num_layers=num_layers)
|
||||
if weights:
|
||||
clf_weights = torch.load(weights, map_location=torch.device(self.device))
|
||||
elif url:
|
||||
clf_weights = load_state_dict_from_url(url, map_location=torch.device(self.device))
|
||||
else:
|
||||
raise Exception("Need weights for inference.")
|
||||
self.load_state_dict(clf_weights["state_dict"], strict=True)
|
||||
self.eval()
|
||||
self.net.to(self.device)
|
||||
|
||||
def results_generation(self, logits, img_id, id_strip=None):
|
||||
"""
|
||||
Process logits to produce final results.
|
||||
|
||||
Args:
|
||||
logits (torch.Tensor): Logits from the network.
|
||||
img_id (str): image path.
|
||||
id_strip (str): stiping string for better image id saving.
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing the results.
|
||||
"""
|
||||
pass
|
||||
|
||||
def forward(self, img):
|
||||
feats = self.net.feature(img)
|
||||
logits = self.net.classifier(feats)
|
||||
return logits
|
||||
|
||||
def single_image_classification(self, img, img_id=None, id_strip=None):
|
||||
logits = self.forward(img.unsqueeze(0).to(self.device))
|
||||
return self.results_generation(logits.cpu(), [img_id], id_strip=id_strip)[0]
|
||||
|
||||
def batch_image_classification(self, dataloader, id_strip=None):
|
||||
"""
|
||||
Process a batch of images for classification.
|
||||
"""
|
||||
total_logits = []
|
||||
total_paths = []
|
||||
|
||||
with tqdm(total=len(dataloader)) as pbar:
|
||||
for batch in dataloader:
|
||||
imgs, paths = batch
|
||||
imgs = imgs.to(self.device)
|
||||
total_logits.append(self.forward(imgs))
|
||||
total_paths.append(paths)
|
||||
pbar.update(1)
|
||||
|
||||
total_logits = torch.cat(total_logits, dim=0).cpu()
|
||||
total_paths = np.concatenate(total_paths, axis=0)
|
||||
|
||||
return self.results_generation(total_logits, total_paths, id_strip=id_strip)
|
|
@ -0,0 +1,70 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import torch
|
||||
from .base_classifier import PlainResNetInference
|
||||
|
||||
__all__ = [
|
||||
"AI4GOpossum"
|
||||
]
|
||||
|
||||
|
||||
class AI4GOpossum(PlainResNetInference):
|
||||
"""
|
||||
Opossum Classifier that inherits from PlainResNetInference.
|
||||
This classifier is specialized for distinguishing between Opossums and Non-opossums.
|
||||
"""
|
||||
|
||||
# Image size for the Opossum classifier
|
||||
IMAGE_SIZE = 224
|
||||
|
||||
# Class names for prediction
|
||||
CLASS_NAMES = {
|
||||
0: "Non-opossum",
|
||||
1: "Opossum"
|
||||
}
|
||||
|
||||
def __init__(self, weights=None, device="cpu", pretrained=True):
|
||||
"""
|
||||
Initialize the Opossum Classifier.
|
||||
|
||||
Args:
|
||||
weights (str, optional): Path to the model weights. Defaults to None.
|
||||
device (str, optional): Device for model inference. Defaults to "cpu".
|
||||
pretrained (bool, optional): Whether to use pretrained weights. Defaults to True.
|
||||
"""
|
||||
|
||||
# If pretrained, use the provided URL to fetch the weights
|
||||
if pretrained:
|
||||
url = "https://zenodo.org/records/10023414/files/OpossumClassification_v0.0.0.ckpt?download=1"
|
||||
else:
|
||||
url = None
|
||||
|
||||
super(AI4GOpossum, self).__init__(weights=weights, device=device,
|
||||
num_cls=1, num_layers=50, url=url)
|
||||
|
||||
def results_generation(self, logits, img_ids, id_strip=None):
|
||||
"""
|
||||
Generate results for classification.
|
||||
|
||||
Args:
|
||||
logits (torch.Tensor): Output tensor from the model.
|
||||
img_id (list): List of image identifier.
|
||||
id_strip (str): stiping string for better image id saving.
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing image ID, prediction, and confidence score.
|
||||
"""
|
||||
|
||||
probs = torch.sigmoid(logits)
|
||||
preds = (probs > 0.5).squeeze(1).numpy().astype(int)
|
||||
|
||||
results = []
|
||||
for pred, img_id, prob in zip(preds, img_ids, probs):
|
||||
r = {"img_id": str(img_id).strip(id_strip)}
|
||||
r["prediction"] = self.CLASS_NAMES[pred]
|
||||
r["class_id"] = pred
|
||||
r["confidence"] = prob.item() if pred == 1 else (1 - prob.item())
|
||||
results.append(r)
|
||||
|
||||
return results
|
|
@ -0,0 +1 @@
|
|||
from .yolov5 import *
|
|
@ -0,0 +1,2 @@
|
|||
from .base_detector import *
|
||||
from .megadetector import *
|
|
@ -0,0 +1,158 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
""" YoloV5 base detector class. """
|
||||
|
||||
# Importing basic libraries
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import supervision as sv
|
||||
import torch
|
||||
from torch.hub import load_state_dict_from_url
|
||||
from yolov5.utils.general import non_max_suppression, scale_coords
|
||||
|
||||
class YOLOV5Base:
|
||||
"""
|
||||
Base detector class for YOLO V5. This class provides utility methods for
|
||||
loading the model, generating results, and performing single and batch image detections.
|
||||
"""
|
||||
|
||||
# Placeholder class-level attributes to be defined in derived classes
|
||||
IMAGE_SIZE = None
|
||||
STRIDE = None
|
||||
CLASS_NAMES = None
|
||||
TRANSFORM = None
|
||||
|
||||
def __init__(self, weights=None, device="cpu", url=None):
|
||||
"""
|
||||
Initialize the YOLO V5 detector.
|
||||
|
||||
Args:
|
||||
weights (str, optional):
|
||||
Path to the model weights. Defaults to None.
|
||||
device (str, optional):
|
||||
Device for model inference. Defaults to "cpu".
|
||||
url (str, optional):
|
||||
URL to fetch the model weights. Defaults to None.
|
||||
"""
|
||||
self.model = None
|
||||
self.device = device
|
||||
self._load_model(weights, self.device, url)
|
||||
self.model.to(self.device)
|
||||
|
||||
def _load_model(self, weights=None, device="cpu", url=None):
|
||||
"""
|
||||
Load the YOLO V5 model weights.
|
||||
|
||||
Args:
|
||||
weights (str, optional):
|
||||
Path to the model weights. Defaults to None.
|
||||
device (str, optional):
|
||||
Device for model inference. Defaults to "cpu".
|
||||
url (str, optional):
|
||||
URL to fetch the model weights. Defaults to None.
|
||||
Raises:
|
||||
Exception: If weights are not provided.
|
||||
"""
|
||||
if weights:
|
||||
checkpoint = torch.load(weights, map_location=torch.device(device))
|
||||
elif url:
|
||||
checkpoint = load_state_dict_from_url(url, map_location=torch.device(self.device))
|
||||
else:
|
||||
raise Exception("Need weights for inference.")
|
||||
self.model = checkpoint["model"].float().fuse().eval() # Convert to FP32 model
|
||||
|
||||
def results_generation(self, preds, img_id, id_strip=None):
|
||||
"""
|
||||
Generate results for detection based on model predictions.
|
||||
|
||||
Args:
|
||||
preds (numpy.ndarray):
|
||||
Model predictions.
|
||||
img_id (str):
|
||||
Image identifier.
|
||||
id_strip (str, optional):
|
||||
Strip specific characters from img_id. Defaults to None.
|
||||
|
||||
Returns:
|
||||
dict: Dictionary containing image ID, detections, and labels.
|
||||
"""
|
||||
results = {"img_id": str(img_id).strip(id_strip)}
|
||||
results["detections"] = sv.Detections(
|
||||
xyxy=preds[:, :4],
|
||||
confidence=preds[:, 4],
|
||||
class_id=preds[:, 5].astype(int)
|
||||
)
|
||||
results["labels"] = [
|
||||
f"{self.CLASS_NAMES[class_id]} {confidence:0.2f}"
|
||||
for _, _, confidence, class_id, _ in results["detections"]
|
||||
]
|
||||
return results
|
||||
|
||||
def single_image_detection(self, img, img_size, img_path, conf_thres=0.2, id_strip=None):
|
||||
"""
|
||||
Perform detection on a single image.
|
||||
|
||||
Args:
|
||||
img (torch.Tensor):
|
||||
Input image tensor.
|
||||
img_size (tuple):
|
||||
Original image size.
|
||||
img_path (str):
|
||||
Image path or identifier.
|
||||
conf_thres (float, optional):
|
||||
Confidence threshold for predictions. Defaults to 0.2.
|
||||
id_strip (str, optional):
|
||||
Characters to strip from img_id. Defaults to None.
|
||||
|
||||
Returns:
|
||||
dict: Detection results.
|
||||
"""
|
||||
preds = self.model(img.unsqueeze(0).to(self.device))[0]
|
||||
preds = torch.cat(non_max_suppression(prediction=preds, conf_thres=conf_thres), axis=0)
|
||||
preds[:, :4] = scale_coords([self.IMAGE_SIZE] * 2, preds[:, :4], img_size).round()
|
||||
return self.results_generation(preds.cpu().numpy(), img_path, id_strip)
|
||||
|
||||
def batch_image_detection(self, dataloader, conf_thres=0.2, id_strip=None):
|
||||
"""
|
||||
Perform detection on a batch of images.
|
||||
|
||||
Args:
|
||||
dataloader (DataLoader):
|
||||
DataLoader containing image batches.
|
||||
conf_thres (float, optional):
|
||||
Confidence threshold for predictions. Defaults to 0.2.
|
||||
id_strip (str, optional):
|
||||
Characters to strip from img_id. Defaults to None.
|
||||
|
||||
Returns:
|
||||
list: List of detection results for all images.
|
||||
"""
|
||||
results = []
|
||||
total_preds = []
|
||||
total_paths = []
|
||||
total_img_sizes = []
|
||||
|
||||
with tqdm(total=len(dataloader)) as pbar:
|
||||
for batch in dataloader:
|
||||
imgs, paths, sizes = batch
|
||||
imgs = imgs.to(self.device)
|
||||
total_preds.append(self.model(imgs)[0])
|
||||
total_paths.append(paths)
|
||||
total_img_sizes.append(sizes)
|
||||
pbar.update(1)
|
||||
|
||||
total_preds = [
|
||||
non_max_suppression(prediction=pred.unsqueeze(0), conf_thres=conf_thres)[0].numpy()
|
||||
for pred in torch.cat(total_preds, dim=0).cpu()
|
||||
]
|
||||
total_paths = np.concatenate(total_paths, axis=0)
|
||||
total_img_sizes = np.concatenate(total_img_sizes, axis=0)
|
||||
|
||||
# If there are size differences in the input images, use a for loop instead of matrix processing for scaling
|
||||
for pred, size, path in zip(total_preds, total_img_sizes, total_paths):
|
||||
pred[:, :4] = scale_coords([self.IMAGE_SIZE] * 2, pred[:, :4], size).round()
|
||||
results.append(self.results_generation(pred, path, id_strip))
|
||||
|
||||
return results
|
|
@ -0,0 +1,47 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
from .base_detector import YOLOV5Base
|
||||
|
||||
__all__ = [
|
||||
'MegaDetectorV5',
|
||||
]
|
||||
|
||||
class MegaDetectorV5(YOLOV5Base):
|
||||
"""
|
||||
MegaDetectorV5 is a specialized class derived from the YOLOV5Base class
|
||||
that is specifically designed for detecting animals, persons, and vehicles.
|
||||
|
||||
Attributes:
|
||||
IMAGE_SIZE (int): The standard image size used during training.
|
||||
STRIDE (int): Stride value used in the detector.
|
||||
CLASS_NAMES (dict): Mapping of class IDs to their respective names.
|
||||
"""
|
||||
|
||||
IMAGE_SIZE = 1280 # image size used in training
|
||||
STRIDE = 64
|
||||
CLASS_NAMES = {
|
||||
0: "animal",
|
||||
1: "person",
|
||||
2: "vehicle"
|
||||
}
|
||||
|
||||
def __init__(self, weights=None, device="cpu", pretrained=True):
|
||||
"""
|
||||
Initializes the MegaDetectorV5 model with the option to load pretrained weights.
|
||||
|
||||
Args:
|
||||
weights (str, optional): Path to the weights file.
|
||||
device (str, optional): Device to load the model on (e.g., "cpu" or "cuda"). Default is "cpu".
|
||||
pretrained (bool, optional): Whether to load the pretrained model. Default is True.
|
||||
"""
|
||||
|
||||
if pretrained:
|
||||
url = "https://zenodo.org/records/10023414/files/MegaDetector_v5b.0.0.pt?download=1"
|
||||
else:
|
||||
url = None
|
||||
|
||||
super(MegaDetectorV5, self).__init__(weights=weights, device=device, url=url)
|
||||
|
||||
|
||||
# %%
|
|
@ -0,0 +1,2 @@
|
|||
from .misc import *
|
||||
from .post_process import *
|
|
@ -0,0 +1,51 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
""" Miscellaneous functions."""
|
||||
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from typing import Callable
|
||||
from supervision import VideoInfo, VideoSink, get_video_frames_generator
|
||||
|
||||
__all__ = [
|
||||
"process_video"
|
||||
]
|
||||
|
||||
|
||||
def process_video(
|
||||
source_path: str,
|
||||
target_path: str,
|
||||
callback: Callable[[np.ndarray, int], np.ndarray],
|
||||
target_fps: int = 1,
|
||||
codec: str = "avc1"
|
||||
) -> None:
|
||||
"""
|
||||
Process a video frame-by-frame, applying a callback function to each frame and saving the results
|
||||
to a new video. This version includes a progress bar and allows codec selection.
|
||||
|
||||
Args:
|
||||
source_path (str):
|
||||
Path to the source video file.
|
||||
target_path (str):
|
||||
Path to save the processed video.
|
||||
callback (Callable[[np.ndarray, int], np.ndarray]):
|
||||
A function that takes a video frame and its index as input and returns the processed frame.
|
||||
codec (str, optional):
|
||||
Codec used to encode the processed video. Default is "avc1".
|
||||
"""
|
||||
source_video_info = VideoInfo.from_video_path(video_path=source_path)
|
||||
if source_video_info.fps > target_fps:
|
||||
stride = int(source_video_info.fps / target_fps)
|
||||
source_video_info.fps = target_fps
|
||||
else:
|
||||
stride = 1
|
||||
|
||||
with VideoSink(target_path=target_path, video_info=source_video_info, codec=codec) as sink:
|
||||
with tqdm(total=int(source_video_info.total_frames / stride)) as pbar:
|
||||
for index, frame in enumerate(
|
||||
get_video_frames_generator(source_path=source_path, stride=stride)
|
||||
):
|
||||
result_frame = callback(frame, index)
|
||||
sink.write_frame(frame=result_frame)
|
||||
pbar.update(1)
|
|
@ -0,0 +1,148 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
""" Post-processing functions."""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import json
|
||||
from PIL import Image
|
||||
import supervision as sv
|
||||
|
||||
__all__ = [
|
||||
"save_detection_images",
|
||||
"save_crop_images",
|
||||
"save_detection_json",
|
||||
"save_detection_classification_json"
|
||||
]
|
||||
|
||||
|
||||
# !!! Output paths need to be optimized !!!
|
||||
def save_detection_images(results, output_dir):
|
||||
"""
|
||||
Save detected images with bounding boxes and labels annotated.
|
||||
|
||||
Args:
|
||||
results (list or dict):
|
||||
Detection results containing image ID, detections, and labels.
|
||||
output_dir (str):
|
||||
Directory to save the annotated images.
|
||||
"""
|
||||
box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=4, text_scale=2)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
with sv.ImageSink(target_dir_path=output_dir, overwrite=True) as sink:
|
||||
if isinstance(results, list):
|
||||
for entry in results:
|
||||
annotated_img = box_annotator.annotate(
|
||||
scene=np.array(Image.open(entry["img_id"])),
|
||||
detections=entry["detections"],
|
||||
labels=entry["labels"]
|
||||
)
|
||||
sink.save_image(image=annotated_img, image_name=entry["img_id"].rsplit('/', 1)[1])
|
||||
else:
|
||||
annotated_img = box_annotator.annotate(
|
||||
scene=np.array(Image.open(results["img_id"])),
|
||||
detections=results["detections"],
|
||||
labels=results["labels"]
|
||||
)
|
||||
sink.save_image(image=annotated_img, image_name=results["img_id"].rsplit('/', 1)[1])
|
||||
|
||||
|
||||
# !!! Output paths need to be optimized !!!
|
||||
def save_crop_images(results, output_dir):
|
||||
"""
|
||||
Save cropped images based on the detection bounding boxes.
|
||||
|
||||
Args:
|
||||
results (list):
|
||||
Detection results containing image ID and detections.
|
||||
output_dir (str):
|
||||
Directory to save the cropped images.
|
||||
"""
|
||||
assert(isinstance(results, list))
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
with sv.ImageSink(target_dir_path=output_dir, overwrite=True) as sink:
|
||||
for entry in results:
|
||||
for i, (xyxy, _, _, cat, _) in enumerate(entry["detections"]):
|
||||
cropped_img = sv.crop_image(image=np.array(Image.open(entry["img_id"])), xyxy=xyxy)
|
||||
sink.save_image(
|
||||
image=cropped_img,
|
||||
image_name="{}_{}_{}".format(int(cat), i, entry["img_id"].rsplit('/', 1)[1])
|
||||
)
|
||||
|
||||
|
||||
def save_detection_json(results, output_dir, categories=None):
|
||||
"""
|
||||
Save detection results to a JSON file.
|
||||
|
||||
Args:
|
||||
results (list):
|
||||
Detection results containing image ID, bounding boxes, category, and confidence.
|
||||
output_dir (str):
|
||||
Path to save the output JSON file.
|
||||
categories (list, optional):
|
||||
List of categories for detected objects. Defaults to None.
|
||||
"""
|
||||
json_results = {
|
||||
"annotations": [],
|
||||
"categories": categories
|
||||
}
|
||||
with open(output_dir, 'w') as f:
|
||||
for r in results:
|
||||
json_results["annotations"].append({
|
||||
"img_id": r["img_id"],
|
||||
"bbox": r["detections"].xyxy.astype(int).tolist(),
|
||||
"category": r["detections"].class_id.tolist(),
|
||||
"confidence": r["detections"].confidence.tolist()
|
||||
})
|
||||
json.dump(json_results, f)
|
||||
|
||||
|
||||
def save_detection_classification_json(det_results, clf_results, output_path,
|
||||
det_categories=None, clf_categories=None):
|
||||
"""
|
||||
Save classification results to a JSON file.
|
||||
|
||||
Args:
|
||||
det_results (list):
|
||||
Detection results containing image ID, bounding boxes, detection category, and confidence.
|
||||
clf_results (list):
|
||||
classification results containing image ID, classification category, and confidence.
|
||||
output_dir (str):
|
||||
Path to save the output JSON file.
|
||||
det_categories (list, optional):
|
||||
List of categories for detected objects. Defaults to None.
|
||||
clf_categories (list, optional):
|
||||
List of categories for classified objects. Defaults to None.
|
||||
"""
|
||||
|
||||
json_results = {
|
||||
"annotations": [],
|
||||
"det_categories": det_categories,
|
||||
"clf_categories": clf_categories
|
||||
}
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
counter = 0
|
||||
for det_r in det_results:
|
||||
clf_categories = []
|
||||
clf_confidence = []
|
||||
for i in range(counter, len(clf_results)):
|
||||
clf_r = clf_results[i]
|
||||
if clf_r["img_id"] == det_r["img_id"]:
|
||||
clf_categories.append(clf_r["class_id"])
|
||||
clf_confidence.append(clf_r["confidence"])
|
||||
counter += 1
|
||||
else:
|
||||
break
|
||||
json_results["annotations"].append({
|
||||
"img_id": det_r["img_id"],
|
||||
"bbox": det_r["detections"].xyxy.astype(int).tolist(),
|
||||
"det_category": det_r["detections"].class_id.tolist(),
|
||||
"det_confidence": det_r["detections"].confidence.tolist(),
|
||||
"clf_category": clf_categories,
|
||||
"clf_confidence": clf_confidence
|
||||
})
|
||||
json.dump(json_results, f)
|
||||
|
41
SECURITY.md
|
@ -1,41 +0,0 @@
|
|||
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
|
||||
|
||||
## Security
|
||||
|
||||
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
|
||||
|
||||
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
|
||||
|
||||
## Reporting Security Issues
|
||||
|
||||
**Please do not report security vulnerabilities through public GitHub issues.**
|
||||
|
||||
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
|
||||
|
||||
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
|
||||
|
||||
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
|
||||
|
||||
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
|
||||
|
||||
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
|
||||
* Full paths of source file(s) related to the manifestation of the issue
|
||||
* The location of the affected source code (tag/branch/commit or direct URL)
|
||||
* Any special configuration required to reproduce the issue
|
||||
* Step-by-step instructions to reproduce the issue
|
||||
* Proof-of-concept or exploit code (if possible)
|
||||
* Impact of the issue, including how an attacker might exploit the issue
|
||||
|
||||
This information will help us triage your report more quickly.
|
||||
|
||||
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
|
||||
|
||||
## Preferred Languages
|
||||
|
||||
We prefer all communications to be in English.
|
||||
|
||||
## Policy
|
||||
|
||||
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
|
||||
|
||||
<!-- END MICROSOFT SECURITY.MD BLOCK -->
|
|
@ -1,13 +0,0 @@
|
|||
# Camera Trap Image Processing APIs
|
||||
|
||||
Though most of our users either use the MegaDetector model directly or work with us to run MegaDetector on the cloud, we also package useful components developed in the Camera Traps project into APIs that users can operate (on the cloud or on local computers) to process camera trap images in a variety of scenarios. This folder contains the source code of the APIs and documentation on how to set them up.
|
||||
|
||||
|
||||
### Synchronous API
|
||||
|
||||
This API is intended for real-time scenarios where a small number of images are processed at a time and latency is a priority. See documentation [here](synchronous).
|
||||
|
||||
### Batch processing API
|
||||
|
||||
This API runs the detector on lots of images (typically millions) and distributes the work over potentially many nodes using [Azure Batch](https://azure.microsoft.com/en-us/services/batch/). See documentation [here](batch_processing).
|
||||
|
|
@ -1,313 +0,0 @@
|
|||
# Camera trap batch processing API user guide
|
||||
|
||||
Though most of our users either use the [MegaDetector](https://github.com/ecologize/CameraTraps#megadetector) model directly or work with us to run MegaDetector on the cloud, we also offer an open-source reference implementation for a an API that processes a large quantity of camera trap images, to support a variety of online scenarios. The output is most helpful for separating empty from non-empty images based on a detector confidence threshold that you select, and putting bounding boxes around animals, people, and vehicles to help manual review proceed more quickly. If you are interested in setting up an endpoint to process very small numbers of images for real-time applications (e.g. for anti-poaching applications), see the source for our [real-time camera trap image processing API](https://github.com/ecologize/CameraTraps/tree/main/api/synchronous).
|
||||
|
||||
With the batch processing API, you can process a batch of up to a few million images in one request to the API. If in addition you have some images that are labeled, we can evaluate the performance of the MegaDetector on your labeled images (see [Post-processing tools](#post-processing-tools)).
|
||||
|
||||
All references to “container” in this document refer to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) containers.
|
||||
|
||||
We have referred to one submission of images as a "request" in this documentation but as a "job" elsewhere in the source code and emails; confusingly, the endpoint for checking the status of a request/job is called `/task` and the RequestID is called `task_id`. Consider "request" and "job" interchangeable, and the `/task` endpoint a legacy issue. Note that the terms "job" and "task" mean different things in the source code (in the context of Azure Batch).
|
||||
|
||||
|
||||
## API
|
||||
|
||||
### API endpoints
|
||||
|
||||
Once configured to run on a live instance, the endpoints of this API are available at
|
||||
|
||||
```
|
||||
http://URL/v4/camera-trap/detection-batch
|
||||
```
|
||||
|
||||
#### `/request_detections`
|
||||
|
||||
To submit a request for batch processing, make a POST call to this endpoint with a json body containing input fields defined below. The API will return with a json response very quickly to give you a RequestID (UUID4 hex) representing the request you have submitted, for example:
|
||||
```json
|
||||
{
|
||||
"request_id": "f940ecd58c7746b1bde89bd6ba5a5202"
|
||||
}
|
||||
```
|
||||
or an error message, if your inputs are not acceptable:
|
||||
```json
|
||||
{
|
||||
"error": "error message."
|
||||
}
|
||||
```
|
||||
In particular the endpoint will return a 503 error if the queue of requests is full. Please re-try later in that case.
|
||||
|
||||
|
||||
#### `/task`
|
||||
|
||||
Check the status of your request by calling the `/task` endpoint via a GET call, passing in your RequestID:
|
||||
|
||||
```http://URL/v4/camera-trap/detection-batch/task/RequestID```
|
||||
|
||||
This returns a json with the fields `Status`, `TaskId` (which is the `request_id` in this document), and a few others. The `Status` field is a json object with the following fields:
|
||||
|
||||
- `request_status`: one of `running`, `failed`, `problem`, `completed`, and `canceled`.
|
||||
- The status `failed` indicates that the images have not been submitted to the cluster for processing, and so you can go ahead and call the `\request_detections` endpoint again, correcting your inputs according to the error message returned with the status.
|
||||
- The status `problem` indicates that the images have already been submitted for processing but the API encountered an error while monitoring progress; in this case, please contact us to retrieve your results so that no unnecessary processing would occupy the cluster (`message` field will mention "please contact us").
|
||||
- `canceled` if your call to the `/cancel_request` endpoint took effect.
|
||||
|
||||
- `message`: a longer string describing the `request_status` and any errors; when the request is completed, the URLs to the output files will also be here (see [Outputs](#23-outputs) section below).
|
||||
|
||||
|
||||
#### `/supported_model_versions`
|
||||
Check which versions of the MegaDetector are supported by this API by making a GET call to this endpoint.
|
||||
|
||||
#### `/default_model_version`
|
||||
Check which version of the MegaDetector is used by default by making a GET call to this endpoint.
|
||||
|
||||
#### `/cancel_request`
|
||||
If you have submitted a request by mistake, you can make a POST call to this endpoint to cancel it.
|
||||
|
||||
The body should contain the `caller` (see next section on _API inputs_) and `request_id` fields. You should get back a response immediately with status code 200 if the signal was successfully sent. You can verify that the request has been canceled using the `/task` endpoint.
|
||||
|
||||
|
||||
### API inputs
|
||||
|
||||
| Parameter | Is required | Type | Explanation |
|
||||
|--------------------------|-------------|-------|----------------------------|
|
||||
| input_container_sas | Yes<sup>1</sup> | string | SAS URL with list and read permissions to the Blob Storage container where the images are stored. |
|
||||
| images_requested_json_sas | No<sup>1</sup> | string | SAS URL with list and read permissions to a json file in Blob Storage. See below for explanation of the content of the json to provide. |
|
||||
| image_path_prefix | No | string | Only process images whose full path starts with `image_path_prefix` (case-_sensitive_). Note that any image paths specified in `images_requested_json_sas` will need to be the full path from the root of the container, regardless whether `image_path_prefix` is provided. |
|
||||
| first_n | No | int | Only process the first `first_n` images. Order of images is not guaranteed, but is likely to be alphabetical. Set this to a small number to avoid taking time to fully list all images in the blob (about 15 minutes for 1 million images) if you just want to try this API. |
|
||||
| sample_n | No | int | Randomly select `sample_n` images to process. |
|
||||
| model_version | No | string | Version of the MegaDetector model to use. Default is the most updated stable version (check using the `/default_model_version` endpoint). Supported versions are available at the `/supported_model_versions` endpoint.|
|
||||
| request_name | No | string | A string (letters, digits, `_`, `-` allowed, max length 92 characters) that will be appended to the output file names to help you identify the resulting files. A timestamp in UTC (`%Y%m%d%H%M%S`) of the time of submission will be appended to the resulting files automatically. |
|
||||
| use_url | No | bool | Set to `true` if you are providing public image URLs. |
|
||||
| caller | Yes | string | An identifier that we use to whitelist users for now. |
|
||||
| country | No (but recommended) | string | Country where the majority of the images in this batch are taken. Preferably use an [ISO 3166-1 alpha-3 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3#Officially_assigned_code_elements), such as "BWA" for Botswana and "USA" for the United States |
|
||||
| organization_name | No (but recommended) | string | Organization conducting the survey. |
|
||||
|
||||
|
||||
<sup>1</sup> There are two ways of giving the API access to your images.
|
||||
|
||||
1 - If you have all your images in a container in Azure Blob Storage, provide the parameter `input_container_sas` as described above. This means that your images do not have to be at publicly accessible URLs. In this case, the json pointed to by `images_requested_json_sas` should look like:
|
||||
```json
|
||||
[
|
||||
"Season1/Location1/Camera1/image1.jpg",
|
||||
"Season1/Location1/Camera1/image2.jpg"
|
||||
]
|
||||
```
|
||||
Only images whose paths are listed here will be processed if you provide this list.
|
||||
|
||||
2 - If your images are stored elsewhere and you can provide a publicly accessible URL to each, you do not need to specify `input_container_sas`. Instead, list the URLs to all the images (instead of their paths) you’d like to process in the json at `images_requested_json_sas`.
|
||||
|
||||
|
||||
#### Attaching metadata
|
||||
|
||||
We can store a (short) string of metadata with each image path or URL. The json at `images_requested_json_sas` should then look like:
|
||||
```json
|
||||
[
|
||||
["Season1/Location1/Camera1/image1.jpg", "metadata_string1"],
|
||||
["Season1/Location1/Camera1/image2.jpg", "metadata_string2"]
|
||||
]
|
||||
```
|
||||
The metadata string will be copied to the `meta` field in the image's entry in the output file (format see below).
|
||||
|
||||
|
||||
#### Other notes and example
|
||||
|
||||
- Only images with file name ending in ".jpg", ".jpeg" or ".png" (case insensitive) will be processed, so please make sure the file names are compliant before you upload them to the container (you cannot rename a blob without copying it entirely once it is in Blob Storage).
|
||||
|
||||
- By default we process all such images in the specified container. You can choose to only process a subset of them by specifying the other input parameters. The images will be filtered out accordingly in this order:
|
||||
- `images_requested_json_sas`
|
||||
- `image_path_prefix`
|
||||
- `first_n`
|
||||
- `sample_n`
|
||||
|
||||
- For example, if you specified both `images_requested_json_sas` and `first_n`, only images that are in your provided list at `images_requested_json_sas` will be considered, and then we process the `first_n` of those.
|
||||
|
||||
Example body of the POST request:
|
||||
```json
|
||||
{
|
||||
"input_container_sas": "https://storageaccountname.blob.core.windows.net/container-name?se=2019-04-23T01%3A30%3A00Z&sp=rl&sv=2018-03-28&sr=c&sig=A_LONG_STRING",
|
||||
"images_requested_json_sas": "https://storageaccountname2.blob.core.windows.net/container-name2/possibly_in_a_folder/my_list_of_images.json?se=2019-04-19T20%3A31%3A00Z&sp=rl&sv=2018-03-28&sr=b&sig=ANOTHER_LONG_STRING",
|
||||
"image_path_prefix": "2020/Alberta",
|
||||
"first_n": 100000,
|
||||
"request_name": "Alberta_2020",
|
||||
"model_version": "4.1",
|
||||
"caller": "allowlisted_user_x",
|
||||
"country": "CAN",
|
||||
"organization_name": "Name of Organization"
|
||||
}
|
||||
```
|
||||
|
||||
You can manually call the API using applications such as Postman:
|
||||
|
||||
![Screenshot of Azure Storage Explorer used for generating SAS tokens with read and list permissions](./images/Postman_screenshot.png)
|
||||
|
||||
|
||||
#### How to obtain a SAS token
|
||||
|
||||
You can easily generate a [SAS token](https://docs.microsoft.com/en-us/azure/storage/common/storage-dotnet-shared-access-signature-part-1) to a container using the desktop app [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) (available on Windows, macOS and Linux). You can also issue SAS tokens programmatically by using the [Azure Storage SDK](https://azure-storage.readthedocs.io/ref/azure.storage.blob.baseblobservice.html#azure.storage.blob.baseblobservice.BaseBlobService.generate_blob_shared_access_signature).
|
||||
|
||||
|
||||
Using Storage Explorer, right click on the container or blob you’d like to grant access for, and choose “Get Shared Access Signature...”. On the dialog window that appears,
|
||||
- cross out the “Start time” field if you will be using the SAS token right away
|
||||
- set the “Expiry time” to a date in the future, about a month ahead is reasonable. The SAS token needs to be valid for the duration of the batch processing request.
|
||||
- make sure “Read” and “List” are checked under “Permissions” (see screenshot)
|
||||
|
||||
Click “Create”, and the “URL” field on the next screen is the value required for `input_container_sas` or `images_requested_json_sas`.
|
||||
|
||||
![Screenshot of Azure Storage Explorer used for generating SAS tokens with read and list permissions](./images/SAS_screenshot.png)
|
||||
|
||||
|
||||
### API outputs
|
||||
|
||||
Once your request is submitted and parameters validated, the API divides all images into shards of about 2000 images each, and send them to an [Azure Batch](https://azure.microsoft.com/en-us/services/batch/) node pool to be scored by the model. Another process will monitor how many shards have been evaluated, checking every 15 minutes, and update the status of the request, which you can check via the `/task` endpoint.
|
||||
|
||||
When all shards have finished processing, the `status` returned by the `/task` endpoint will have the `request_status` field as `completed`, and the `message` field will contain a URL to the output file. The returned body looks like
|
||||
|
||||
```json
|
||||
{
|
||||
"Status": {
|
||||
"request_status": "completed",
|
||||
"message": {
|
||||
"num_failed_shards": 0,
|
||||
"output_file_urls": {
|
||||
"detections": "https://cameratrap.blob.core.windows.net/async-api-internal/ee26326e-7e0d-4524-a9ea-f57a5799d4ba/ee26326e-7e0d-4524-a9ea-f57a5799d4ba_detections_4_1_on_test_images_20200709211752.json?sv=2019-02-02&sr=b&sig=key1"
|
||||
}
|
||||
},
|
||||
"time": "2020-07-09 21:27:17"
|
||||
},
|
||||
"Timestamp": "2020-07-09 21:27:17",
|
||||
"Endpoint": "/v3/camera-trap/detection-batch/request_detections",
|
||||
"TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
|
||||
}
|
||||
```
|
||||
|
||||
To obtain the URL of the output file:
|
||||
```python
|
||||
task_status = body['Status']
|
||||
assert task_status['request_status'] == 'completed'
|
||||
message = task_status['message']
|
||||
assert message['num_failed_shards'] == 0
|
||||
|
||||
url_to_results_file = message['output_file_urls']['detections']
|
||||
```
|
||||
Note that the field `Status` in the returned body is capitalized (since July 2020).
|
||||
|
||||
The URL to the output file is valid for 180 days from the time the request has finished. If you neglected to retrieve them before the link expired, contact us with the RequestID and we can send the results to you.
|
||||
|
||||
The output file is a JSON in the format described below.
|
||||
|
||||
|
||||
#### Batch processing API output format
|
||||
|
||||
The output of the detector is saved in `requestID_detections_requestName_timestamp.json`. The `classifications` fields will be added if a classifier was trained for your project and applied to the images.
|
||||
|
||||
If an image could not be opened or an error occurred when applying the model to it, it will still have an entry in the output file images list, but it will have a `failure` field indicating the type of error (see last entry in the example below). However, if the API runs into problems processing an entire shard of images (usually 2000 images per shard), they will not have entries in the results file - this should be very rare.
|
||||
|
||||
Example output with both detection and classification results:
|
||||
|
||||
```json
|
||||
{
|
||||
"info": {
|
||||
"format_version": "1.3",
|
||||
"detector": "md_v4.1.0.pb",
|
||||
"detection_completion_time": "2019-05-22 02:12:19",
|
||||
"classifier": "ecosystem1_v2",
|
||||
"classification_completion_time": "2019-05-26 01:52:08",
|
||||
"detector_metadata": {
|
||||
"megadetector_version":"v4.1.0",
|
||||
"typical_detection_threshold":0.8,
|
||||
"conservative_detection_threshold":0.6
|
||||
}
|
||||
"classifier_metadata": {
|
||||
"typical_classification_threshold":0.75
|
||||
}
|
||||
},
|
||||
"detection_categories": {
|
||||
"1": "animal",
|
||||
"2": "person",
|
||||
"3": "vehicle"
|
||||
},
|
||||
"classification_categories": {
|
||||
"0": "fox",
|
||||
"1": "elk",
|
||||
"2": "wolf",
|
||||
"3": "bear",
|
||||
"4": "moose"
|
||||
},
|
||||
"images": [
|
||||
{
|
||||
"file": "path/from/base/dir/image_with_animal.jpg",
|
||||
"meta": "optional free-text metadata",
|
||||
"detections": [
|
||||
{
|
||||
"category": "1",
|
||||
"conf": 0.926,
|
||||
"bbox": [0.0, 0.2762, 0.1539, 0.2825],
|
||||
"classifications": [
|
||||
["3", 0.901],
|
||||
["1", 0.071],
|
||||
["4", 0.025]
|
||||
]
|
||||
},
|
||||
{
|
||||
"category": "1",
|
||||
"conf": 0.061,
|
||||
"bbox": [0.0451, 0.1849, 0.3642, 0.4636]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"file": "/path/from/base/dir/empty_image.jpg",
|
||||
"meta": "",
|
||||
"detections": []
|
||||
},
|
||||
{
|
||||
"file": "/path/from/base/dir2/corrupted_image.jpg",
|
||||
"failure": "Failure image access"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
##### Model metadata
|
||||
|
||||
The 'detector' field (within the 'info' field) specifies the filename of the detector model that produced this results file. It was omitted in old files generated with run_detector_batch.py, so with extremely high probability, if this field is not present, you can assume the file was generated with MegaDetector v4.
|
||||
|
||||
In newer files, this should contain the filename (base name only) of the model file, which typically will be one of:
|
||||
|
||||
* megadetector_v4.1 (MegaDetector v4, run via the batch API)
|
||||
* md_v4.1.0.pb (MegaDetector v4, run locally)
|
||||
* md_v5a.0.0.pt (MegaDetector v5a)
|
||||
* md_v5b.0.0.pt (MegaDetector v5b)
|
||||
|
||||
This string is used by some tools to choose appropriate default confidence values, which depend on the model version. If you change the name of the MegaDetector file, you will break this convention, and YMMV.
|
||||
|
||||
The "detector_metadata" and "classifier_metadata" fields are also optionally added as of format version 1.2. These currently contain useful default confidence values for downstream tools (particularly Timelapse), but we strongly recommend against blindly trusting these defaults; always explore your data before choosing a confidence threshold, as the optimal value can vary widely.
|
||||
|
||||
##### Detector outputs
|
||||
|
||||
The bounding box in the `bbox` field is represented as
|
||||
|
||||
```
|
||||
[x_min, y_min, width_of_box, height_of_box]
|
||||
```
|
||||
|
||||
where `(x_min, y_min)` is the upper-left corner of the detection bounding box, with the origin in the upper-left corner of the image. The coordinates and box width and height are *relative* to the width and height of the image. Note that this is different from the coordinate format used in the [COCO Camera Traps](data_management/README.md) databases, which are in absolute coordinates.
|
||||
|
||||
The detection category `category` can be interpreted using the `detection_categories` dictionary.
|
||||
|
||||
Detection categories not listed here are allowed by this format specification, but should be treated as "no detection".
|
||||
|
||||
When the detector model detects no animal (or person or vehicle), the confidence `conf` is shown as 0.0 (not confident that there is an object of interest) and the `detections` field is an empty list.
|
||||
|
||||
|
||||
##### Classifier outputs
|
||||
|
||||
After a classifier is applied, each tuple in a `classifications` list represents `[species, confidence]`. They are listed in order of confidence. The species categories should be interpreted using the `classification_categories` dictionary. Keys in `classification_categories` will always be nonnegative integers formatted as strings.
|
||||
|
||||
|
||||
## Post-processing tools
|
||||
|
||||
The [postprocessing](postprocessing) folder contains tools for working with the output of our detector API. In particular, [postprocess_batch_results.py](postprocessing/postprocess_batch_results.py) provides visualization and accuracy assessment tools for the output of the batch processing API. A sample output for the Snapshot Serengeti data when using ground-truth annotations can be seen [here](http://dolphinvm.westus2.cloudapp.azure.com/data/snapshot_serengeti/serengeti_val_detections_from_pkl_MDv1_20190528_w_classifications_eval/).
|
||||
|
||||
|
||||
## Integration with other tools
|
||||
|
||||
The [integration](integration) folder contains guidelines and postprocessing scripts for using the output of our API in other applications.
|
|
@ -1,87 +0,0 @@
|
|||
# Camera trap batch processing API developer readme
|
||||
|
||||
|
||||
## Build the Docker image for Batch node pools
|
||||
|
||||
We need to build a Docker image with the necessary packages (mainly TensorFlow) to run the scoring script. Azure Batch will pull this image from a private container registry, which needs to be in the same region as the Batch account.
|
||||
|
||||
Navigate to the subdirectory `batch_service` (otherwise you need to specify the Docker context).
|
||||
|
||||
Build the image from the Dockerfile in this folder:
|
||||
```commandline
|
||||
export IMAGE_NAME=***REMOVED***.azurecr.io/tensorflow:1.14.0-gpu-py3
|
||||
export REGISTRY_NAME=***REMOVED***
|
||||
sudo docker image build --rm --tag $IMAGE_NAME --file ./Dockerfile .
|
||||
```
|
||||
|
||||
Test that TensorFlow can use the GPU in an interactive Python session:
|
||||
```commandline
|
||||
sudo docker run --gpus all -it --rm $IMAGE_NAME /bin/bash
|
||||
|
||||
python
|
||||
import tensorflow as tf
|
||||
print('tensorflow version:', tf.__version__)
|
||||
print('tf.test.is_gpu_available:', tf.test.is_gpu_available())
|
||||
quit()
|
||||
```
|
||||
You can now exit/stop the container.
|
||||
|
||||
Log in to the Azure Container Registry for the batch API project and push the image; you may have to `az login` first:
|
||||
```commandline
|
||||
sudo az acr login --name $REGISTRY_NAME
|
||||
|
||||
sudo docker image push $IMAGE_NAME
|
||||
```
|
||||
|
||||
|
||||
## Create a Batch node pool
|
||||
|
||||
We create a separate node pool for each instance of the API. For example, our `internal` instance of the API has one node pool.
|
||||
|
||||
Follow the notebook [api_support/create_batch_pool.ipynb](../api_support/create_batch_pool.ipynb) to create one. You should only need to do this for new instances of the API.
|
||||
|
||||
|
||||
## Flask app
|
||||
|
||||
The API endpoints are in a Flask web application, which needs to be run in the conda environment `cameratraps-batch-api` specified by [environment-batch-api.yml](environment-batch-api.yml).
|
||||
|
||||
In addition, the API uses the `sas_blob_utils` module from the `ai4eutils` [repo](https://github.com/microsoft/ai4eutils), so that repo folder should be on the PYTHONPATH.
|
||||
|
||||
Make sure to update the `API_INSTANCE_NAME`, `POOL_ID`, `BATCH_ACCOUNT_NAME`, and `BATCH_ACCOUNT_URL` values in [server_api_config.py](./server_api_config.py) to reflect which instance of the API is being deployed.
|
||||
|
||||
To start the Flask app in development mode, first source `start_batch_api.sh` to retrieve secrets required for the various Azure services from KeyVault and export them as environment variables in the current shell:
|
||||
```commandline
|
||||
source start_batch_api.sh
|
||||
```
|
||||
|
||||
You will be prompted to authenticate via AAD (you need to have access to the AI4E engineering subscription).
|
||||
|
||||
Set the logs directory as needed, and the name of the Flask app:
|
||||
```
|
||||
export LOGS_DIR=/home/otter/camtrap/batch_api_logs
|
||||
export FLASK_APP=server
|
||||
```
|
||||
|
||||
To start the app locally in debug mode:
|
||||
```commandline
|
||||
export FLASK_ENV=development
|
||||
flask run -p 5000 --eager-loading --no-reload
|
||||
```
|
||||
|
||||
To start the app on a VM, with external access:
|
||||
```commandline
|
||||
flask run -h 0.0.0.0 -p 6011 --eager-loading --no-reload |& tee -a $LOGS_DIR/log_internal_dev_20210216.txt
|
||||
```
|
||||
|
||||
To start the app using the production server:
|
||||
```commandline
|
||||
gunicorn -w 1 -b 0.0.0.0:6011 --threads 4 --access-logfile $LOGS_DIR/log_internal_dev_20210218_access.txt --log-file $LOGS_DIR/log_internal_dev_20210218_error.txt --capture-output server:app --log-level=info
|
||||
```
|
||||
The logs will only be written to these two log files and will not show in the console.
|
||||
|
||||
The API should work with more than one process/Gunicorn worker, but we have not tested it.
|
||||
|
||||
|
||||
## Send daily activity summary to Teams
|
||||
|
||||
Running [api_support/start_summarize_daily_activities.sh](../api_support/start_summarize_daily_activities.sh) will retrieve credentials from the KeyVault (you need to authenticate again) and run a script to send a summary of images processed on *all* instances of the API in the past day to a Teams webhook.
|
|
@ -1,443 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.core.interactiveshell import InteractiveShell\n",
|
||||
"InteractiveShell.ast_node_interactivity = 'all' # default is ‘last_expr’\n",
|
||||
"\n",
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'10.0.0'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import azure.batch\n",
|
||||
"azure.batch.__version__"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import json\n",
|
||||
"\n",
|
||||
"from azure.batch import BatchServiceClient\n",
|
||||
"from azure.batch.batch_auth import SharedKeyCredentials\n",
|
||||
"from azure.batch.models import *\n",
|
||||
"from azure.common.credentials import ServicePrincipalCredentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Azure Batch\n",
|
||||
"\n",
|
||||
"Documentation\n",
|
||||
"- https://github.com/Azure-Samples/batch-python-quickstart/blob/master/src/python_quickstart_client.py\n",
|
||||
"- https://docs.microsoft.com/en-us/azure/batch/batch-docker-container-workloads#prefetch-images-for-container-configuration\n",
|
||||
"\n",
|
||||
"TODO\n",
|
||||
"\n",
|
||||
"- Turn `enable_auto_scale` on and set the appropriate `auto_scale_formula`. This way we can cap the maximum available nodes. https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling\n",
|
||||
"\n",
|
||||
"## Create a pool for each instance of the API\n",
|
||||
"\n",
|
||||
"List all Batch supported images with their \"capabilities\" (e.g. \"DockerCompatible\", \"NvidiaTeslaDriverInstalled\"):\n",
|
||||
"```\n",
|
||||
"az batch pool supported-images list\n",
|
||||
"```\n",
|
||||
"with the pool information provided in additional parameters.\n",
|
||||
"\n",
|
||||
"Listing all versions of a SKU of image:\n",
|
||||
"```\n",
|
||||
"az vm image list --all --publisher microsoft-dsvm\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You may need to accept the terms of an image:\n",
|
||||
"```\n",
|
||||
"az vm image list --all --publisher <publisher>\n",
|
||||
"```\n",
|
||||
"to find the URN for the image you want to use, followed by:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"az vm image terms accept --urn <corresponding-urn>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"os.environ['BATCH_ACCOUNT_NAME'] = ''\n",
|
||||
"os.environ['BATCH_ACCOUNT_URL'] = ''\n",
|
||||
"\n",
|
||||
"os.environ['APP_CLIENT_ID'] = ''\n",
|
||||
"os.environ['APP_CLIENT_SECRET'] = ''\n",
|
||||
"os.environ['APP_TENANT_ID'] = ''\n",
|
||||
"\n",
|
||||
"os.environ['REGISTRY_SERVER'] = '.azurecr.io' # e.g. registryname.azurecr.io\n",
|
||||
"os.environ['REGISTRY_USERNAME'] = ''\n",
|
||||
"os.environ['REGISTRY_PASSWORD'] = ''\n",
|
||||
"os.environ['REGISTRY_IMAGE_NAME'] = '.azurecr.io/tensorflow:1.14.0-gpu-py3' # login server/repository:tag"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"POOL_ID = 'internal_2'\n",
|
||||
"assert len(POOL_ID) <= 64, 'pool_id has more than 64 characters'\n",
|
||||
"\n",
|
||||
"POOL_NODE_COUNT = 1\n",
|
||||
"\n",
|
||||
"POOL_VM_SIZE = 'Standard_NC6s_v3' # https://docs.microsoft.com/en-us/azure/virtual-machines/ncv3-series\n",
|
||||
"\n",
|
||||
"registry_server = os.environ['REGISTRY_SERVER']\n",
|
||||
"registry_username = os.environ['REGISTRY_USERNAME']\n",
|
||||
"registry_password = os.environ['REGISTRY_PASSWORD']\n",
|
||||
"docker_image = os.environ['REGISTRY_IMAGE_NAME']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 21,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def print_batch_exception(batch_exception):\n",
|
||||
" \"\"\"\n",
|
||||
" Prints the contents of the specified Batch exception.\n",
|
||||
" \"\"\"\n",
|
||||
" print('-------------------------------------------')\n",
|
||||
" print('Exception encountered:')\n",
|
||||
" if batch_exception.error and \\\n",
|
||||
" batch_exception.error.message and \\\n",
|
||||
" batch_exception.error.message.value:\n",
|
||||
" print(batch_exception.error.message.value)\n",
|
||||
" if batch_exception.error.values:\n",
|
||||
" print()\n",
|
||||
" for mesg in batch_exception.error.values:\n",
|
||||
" print(f'{mesg.key}:\\t{mesg.value}')\n",
|
||||
" print('-------------------------------------------')\n",
|
||||
" \n",
|
||||
"def create_pool(batch_service_client, pool_id):\n",
|
||||
" \"\"\"\n",
|
||||
" Create a pool with pool_id and the Docker image specified in the env variables.\n",
|
||||
" \"\"\"\n",
|
||||
" image_ref = ImageReference(\n",
|
||||
" publisher=\"microsoft-azure-batch\",\n",
|
||||
" offer=\"ubuntu-server-container\",\n",
|
||||
" sku=\"16-04-lts\",\n",
|
||||
" version=\"latest\" # URN: microsoft-azure-batch:ubuntu-server-container:16-04-lts:1.1.0\n",
|
||||
" # The Azure Batch container image only accepts 'latest' version\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" # Specify a container registry\n",
|
||||
" container_registry = ContainerRegistry(\n",
|
||||
" registry_server=registry_server,\n",
|
||||
" user_name=registry_username,\n",
|
||||
" password=registry_password\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" container_conf = ContainerConfiguration(\n",
|
||||
" container_image_names = [docker_image],\n",
|
||||
" container_registries =[container_registry]\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" vm_config = VirtualMachineConfiguration(\n",
|
||||
" image_reference=image_ref,\n",
|
||||
" container_configuration=container_conf,\n",
|
||||
" node_agent_sku_id=\"batch.node.ubuntu 16.04\"\n",
|
||||
" )\n",
|
||||
" \n",
|
||||
" new_pool = PoolAddParameter(\n",
|
||||
" id=POOL_ID,\n",
|
||||
" display_name=POOL_ID,\n",
|
||||
" \n",
|
||||
" vm_size=POOL_VM_SIZE,\n",
|
||||
" target_dedicated_nodes=POOL_NODE_COUNT, # we only used dedicated nodes\n",
|
||||
" \n",
|
||||
" virtual_machine_configuration=vm_config\n",
|
||||
" )\n",
|
||||
" batch_service_client.pool.add(new_pool)\n",
|
||||
"\n",
|
||||
"def create_job():\n",
|
||||
" pass\n",
|
||||
"\n",
|
||||
"def create_task():\n",
|
||||
" \"\"\"\n",
|
||||
" All Tasks should be idempotent as they may need to be retried due to a recovery operation.\n",
|
||||
" \"\"\"\n",
|
||||
" pass"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"account_url = os.environ['BATCH_ACCOUNT_URL']\n",
|
||||
"\n",
|
||||
"app_client_id = os.environ['APP_CLIENT_ID']\n",
|
||||
"app_client_secret = os.environ['APP_CLIENT_SECRET']\n",
|
||||
"app_tenant_id = os.environ['APP_TENANT_ID']\n",
|
||||
"\n",
|
||||
"credentials = ServicePrincipalCredentials(\n",
|
||||
" client_id=app_client_id,\n",
|
||||
" secret=app_client_secret,\n",
|
||||
" tenant=app_tenant_id,\n",
|
||||
" resource=\"https://batch.core.windows.net/\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# if using the Batch quota system, use https://docs.microsoft.com/en-us/python/api/azure-batch/azure.batch.batch_auth.sharedkeycredentials?view=azure-python\n",
|
||||
"# to authenticate instead of the service principal is also okay.\n",
|
||||
"\n",
|
||||
"batch_client = BatchServiceClient(credentials=credentials, batch_url=account_url)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"try:\n",
|
||||
" create_pool(batch_client, POOL_ID)\n",
|
||||
"except BatchErrorException as e:\n",
|
||||
" print_batch_exception(e)\n",
|
||||
" raise"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submitting a job\n",
|
||||
"\n",
|
||||
"Job is what we have been referring to as Requests. Each shard corresponds to a Task.\n",
|
||||
"\n",
|
||||
"The Azure Batch service sets these environment variables on the compute nodes:\n",
|
||||
"\n",
|
||||
"- AZ_BATCH_JOB_ID\n",
|
||||
"\n",
|
||||
"- AZ_BATCH_TASK_ID\n",
|
||||
"- AZ_BATCH_TASK_DIR\n",
|
||||
"- AZ_BATCH_TASK_WORKING_DIR - currently running task has read/write access to this directory"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"job_id = 'test_docker0'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# job id is the request id in the old API context\n",
|
||||
"\n",
|
||||
"job = JobAddParameter(\n",
|
||||
" id=job_id,\n",
|
||||
" pool_info=PoolInformation(pool_id=POOL_ID),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"batch_client.job.add(job)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Submit tasks to the job (the shards)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"command = \"\"\"/bin/sh -c \"python /app/score.py\" \"\"\"\n",
|
||||
"\n",
|
||||
"task = TaskAddParameter(\n",
|
||||
" id='task_{}'.format(0),\n",
|
||||
" command_line=command,\n",
|
||||
" container_settings=TaskContainerSettings(\n",
|
||||
" image_name=docker_image,\n",
|
||||
" working_directory='taskWorkingDirectory'\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"azure.batch.models._models_py3.TaskAddParameter"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"type(task)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"batch_client.task.add(job_id, task)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Monitoring a job\n",
|
||||
"\n",
|
||||
"Optimization: remember which tasks have already Completed so that we do not repeatedly query for their status.\n",
|
||||
"\n",
|
||||
"Documentation: https://docs.microsoft.com/en-us/azure/batch/batch-efficient-list-queries"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tasks = batch_client.task.list(job_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tasks = [task for task in tasks]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"azure.batch.models._models_py3.CloudTask"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"type(tasks[0])"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"incomplete_tasks = [task for task in tasks if\n",
|
||||
" task.state != TaskState.completed]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"incomplete_tasks"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda env:cameratraps-batch-api]",
|
||||
"language": "python",
|
||||
"name": "conda-env-cameratraps-batch-api-py"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -1,5 +0,0 @@
|
|||
FROM tensorflow/tensorflow:1.14.0-gpu-py3
|
||||
# Python version is 3.6.8
|
||||
|
||||
RUN pip install --upgrade pip
|
||||
RUN pip install azure-storage-blob==12.7.1 pillow numpy requests
|
|
@ -1,439 +0,0 @@
|
|||
import io
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from typing import Union
|
||||
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import requests
|
||||
import tensorflow as tf
|
||||
from azure.storage.blob import ContainerClient
|
||||
|
||||
print('score.py, tensorflow version:', tf.__version__)
|
||||
print('score.py, tf.test.is_gpu_available:', tf.test.is_gpu_available())
|
||||
|
||||
PRINT_EVERY = 500
|
||||
|
||||
|
||||
#%% Helper functions *copied* from ct_utils.py and visualization/visualization_utils.py
|
||||
IMAGE_ROTATIONS = {
|
||||
3: 180,
|
||||
6: 270,
|
||||
8: 90
|
||||
}
|
||||
|
||||
def truncate_float(x, precision=3):
|
||||
"""
|
||||
Function for truncating a float scalar to the defined precision.
|
||||
For example: truncate_float(0.0003214884) --> 0.000321
|
||||
This function is primarily used to achieve a certain float representation
|
||||
before exporting to JSON
|
||||
Args:
|
||||
x (float) Scalar to truncate
|
||||
precision (int) The number of significant digits to preserve, should be
|
||||
greater or equal 1
|
||||
"""
|
||||
|
||||
assert precision > 0
|
||||
|
||||
if np.isclose(x, 0):
|
||||
return 0
|
||||
else:
|
||||
# Determine the factor, which shifts the decimal point of x
|
||||
# just behind the last significant digit
|
||||
factor = math.pow(10, precision - 1 - math.floor(math.log10(abs(x))))
|
||||
# Shift decimal point by multiplicatipon with factor, flooring, and
|
||||
# division by factor
|
||||
return math.floor(x * factor)/factor
|
||||
|
||||
|
||||
def open_image(input_file: Union[str, BytesIO]) -> Image:
|
||||
"""Opens an image in binary format using PIL.Image and converts to RGB mode.
|
||||
|
||||
This operation is lazy; image will not be actually loaded until the first
|
||||
operation that needs to load it (for example, resizing), so file opening
|
||||
errors can show up later.
|
||||
|
||||
Args:
|
||||
input_file: str or BytesIO, either a path to an image file (anything
|
||||
that PIL can open), or an image as a stream of bytes
|
||||
|
||||
Returns:
|
||||
an PIL image object in RGB mode
|
||||
"""
|
||||
if (isinstance(input_file, str)
|
||||
and input_file.startswith(('http://', 'https://'))):
|
||||
response = requests.get(input_file)
|
||||
image = Image.open(BytesIO(response.content))
|
||||
try:
|
||||
response = requests.get(input_file)
|
||||
image = Image.open(BytesIO(response.content))
|
||||
except Exception as e:
|
||||
print(f'Error opening image {input_file}: {e}')
|
||||
raise
|
||||
else:
|
||||
image = Image.open(input_file)
|
||||
if image.mode not in ('RGBA', 'RGB', 'L'):
|
||||
raise AttributeError(f'Image {input_file} uses unsupported mode {image.mode}')
|
||||
if image.mode == 'RGBA' or image.mode == 'L':
|
||||
# PIL.Image.convert() returns a converted copy of this image
|
||||
image = image.convert(mode='RGB')
|
||||
|
||||
# alter orientation as needed according to EXIF tag 0x112 (274) for Orientation
|
||||
# https://gist.github.com/dangtrinhnt/a577ece4cbe5364aad28
|
||||
# https://www.media.mit.edu/pia/Research/deepview/exif.html
|
||||
try:
|
||||
exif = image._getexif()
|
||||
orientation: int = exif.get(274, None) # 274 is the key for the Orientation field
|
||||
if orientation is not None and orientation in IMAGE_ROTATIONS:
|
||||
image = image.rotate(IMAGE_ROTATIONS[orientation], expand=True) # returns a rotated copy
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return image
|
||||
|
||||
|
||||
def load_image(input_file: Union[str, BytesIO]) -> Image.Image:
|
||||
"""Loads the image at input_file as a PIL Image into memory.
|
||||
Image.open() used in open_image() is lazy and errors will occur downstream
|
||||
if not explicitly loaded.
|
||||
Args:
|
||||
input_file: str or BytesIO, either a path to an image file (anything
|
||||
that PIL can open), or an image as a stream of bytes
|
||||
Returns: PIL.Image.Image, in RGB mode
|
||||
"""
|
||||
image = open_image(input_file)
|
||||
image.load()
|
||||
return image
|
||||
|
||||
|
||||
#%% TFDetector class, an unmodified *copy* of the class in detection/tf_detector.py,
|
||||
# so we do not have to import the packages required by run_detector.py
|
||||
|
||||
class TFDetector:
|
||||
"""
|
||||
A detector model loaded at the time of initialization. It is intended to be used with
|
||||
the MegaDetector (TF). The inference batch size is set to 1; code needs to be modified
|
||||
to support larger batch sizes, including resizing appropriately.
|
||||
"""
|
||||
|
||||
# Number of decimal places to round to for confidence and bbox coordinates
|
||||
CONF_DIGITS = 3
|
||||
COORD_DIGITS = 4
|
||||
|
||||
# MegaDetector was trained with batch size of 1, and the resizing function is a part
|
||||
# of the inference graph
|
||||
BATCH_SIZE = 1
|
||||
|
||||
# An enumeration of failure reasons
|
||||
FAILURE_TF_INFER = 'Failure TF inference'
|
||||
FAILURE_IMAGE_OPEN = 'Failure image access'
|
||||
|
||||
DEFAULT_RENDERING_CONFIDENCE_THRESHOLD = 0.85 # to render bounding boxes
|
||||
DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD = 0.1 # to include in the output json file
|
||||
|
||||
DEFAULT_DETECTOR_LABEL_MAP = {
|
||||
'1': 'animal',
|
||||
'2': 'person',
|
||||
'3': 'vehicle' # available in megadetector v4+
|
||||
}
|
||||
|
||||
NUM_DETECTOR_CATEGORIES = 4 # animal, person, group, vehicle - for color assignment
|
||||
|
||||
def __init__(self, model_path):
|
||||
"""Loads model from model_path and starts a tf.Session with this graph. Obtains
|
||||
input and output tensor handles."""
|
||||
detection_graph = TFDetector.__load_model(model_path)
|
||||
self.tf_session = tf.Session(graph=detection_graph)
|
||||
|
||||
self.image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
|
||||
self.box_tensor = detection_graph.get_tensor_by_name('detection_boxes:0')
|
||||
self.score_tensor = detection_graph.get_tensor_by_name('detection_scores:0')
|
||||
self.class_tensor = detection_graph.get_tensor_by_name('detection_classes:0')
|
||||
|
||||
@staticmethod
|
||||
def round_and_make_float(d, precision=4):
|
||||
return truncate_float(float(d), precision=precision)
|
||||
|
||||
@staticmethod
|
||||
def __convert_coords(tf_coords):
|
||||
"""Converts coordinates from the model's output format [y1, x1, y2, x2] to the
|
||||
format used by our API and MegaDB: [x1, y1, width, height]. All coordinates
|
||||
(including model outputs) are normalized in the range [0, 1].
|
||||
Args:
|
||||
tf_coords: np.array of predicted bounding box coordinates from the TF detector,
|
||||
has format [y1, x1, y2, x2]
|
||||
Returns: list of Python float, predicted bounding box coordinates [x1, y1, width, height]
|
||||
"""
|
||||
# change from [y1, x1, y2, x2] to [x1, y1, width, height]
|
||||
width = tf_coords[3] - tf_coords[1]
|
||||
height = tf_coords[2] - tf_coords[0]
|
||||
|
||||
new = [tf_coords[1], tf_coords[0], width, height] # must be a list instead of np.array
|
||||
|
||||
# convert numpy floats to Python floats
|
||||
for i, d in enumerate(new):
|
||||
new[i] = TFDetector.round_and_make_float(d, precision=TFDetector.COORD_DIGITS)
|
||||
return new
|
||||
|
||||
@staticmethod
|
||||
def convert_to_tf_coords(array):
|
||||
"""From [x1, y1, width, height] to [y1, x1, y2, x2], where x1 is x_min, x2 is x_max
|
||||
This is an extraneous step as the model outputs [y1, x1, y2, x2] but were converted to the API
|
||||
output format - only to keep the interface of the sync API.
|
||||
"""
|
||||
x1 = array[0]
|
||||
y1 = array[1]
|
||||
width = array[2]
|
||||
height = array[3]
|
||||
x2 = x1 + width
|
||||
y2 = y1 + height
|
||||
return [y1, x1, y2, x2]
|
||||
|
||||
@staticmethod
|
||||
def __load_model(model_path):
|
||||
"""Loads a detection model (i.e., create a graph) from a .pb file.
|
||||
Args:
|
||||
model_path: .pb file of the model.
|
||||
Returns: the loaded graph.
|
||||
"""
|
||||
print('TFDetector: Loading graph...')
|
||||
detection_graph = tf.Graph()
|
||||
with detection_graph.as_default():
|
||||
od_graph_def = tf.GraphDef()
|
||||
with tf.gfile.GFile(model_path, 'rb') as fid:
|
||||
serialized_graph = fid.read()
|
||||
od_graph_def.ParseFromString(serialized_graph)
|
||||
tf.import_graph_def(od_graph_def, name='')
|
||||
print('TFDetector: Detection graph loaded.')
|
||||
|
||||
return detection_graph
|
||||
|
||||
def _generate_detections_one_image(self, image):
|
||||
np_im = np.asarray(image, np.uint8)
|
||||
im_w_batch_dim = np.expand_dims(np_im, axis=0)
|
||||
|
||||
# need to change the above line to the following if supporting a batch size > 1 and resizing to the same size
|
||||
# np_images = [np.asarray(image, np.uint8) for image in images]
|
||||
# images_stacked = np.stack(np_images, axis=0) if len(images) > 1 else np.expand_dims(np_images[0], axis=0)
|
||||
|
||||
# performs inference
|
||||
(box_tensor_out, score_tensor_out, class_tensor_out) = self.tf_session.run(
|
||||
[self.box_tensor, self.score_tensor, self.class_tensor],
|
||||
feed_dict={self.image_tensor: im_w_batch_dim})
|
||||
|
||||
return box_tensor_out, score_tensor_out, class_tensor_out
|
||||
|
||||
def generate_detections_one_image(self, image, image_id,
|
||||
detection_threshold=DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD):
|
||||
"""Apply the detector to an image.
|
||||
Args:
|
||||
image: the PIL Image object
|
||||
image_id: a path to identify the image; will be in the "file" field of the output object
|
||||
detection_threshold: confidence above which to include the detection proposal
|
||||
Returns:
|
||||
A dict with the following fields, see the 'images' key in https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing#batch-processing-api-output-format
|
||||
- 'file' (always present)
|
||||
- 'max_detection_conf'
|
||||
- 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox'
|
||||
- 'failure'
|
||||
"""
|
||||
result = {
|
||||
'file': image_id
|
||||
}
|
||||
try:
|
||||
b_box, b_score, b_class = self._generate_detections_one_image(image)
|
||||
|
||||
# our batch size is 1; need to loop the batch dim if supporting batch size > 1
|
||||
boxes, scores, classes = b_box[0], b_score[0], b_class[0]
|
||||
|
||||
detections_cur_image = [] # will be empty for an image with no confident detections
|
||||
max_detection_conf = 0.0
|
||||
for b, s, c in zip(boxes, scores, classes):
|
||||
if s > detection_threshold:
|
||||
detection_entry = {
|
||||
'category': str(int(c)), # use string type for the numerical class label, not int
|
||||
'conf': truncate_float(float(s), # cast to float for json serialization
|
||||
precision=TFDetector.CONF_DIGITS),
|
||||
'bbox': TFDetector.__convert_coords(b)
|
||||
}
|
||||
detections_cur_image.append(detection_entry)
|
||||
if s > max_detection_conf:
|
||||
max_detection_conf = s
|
||||
|
||||
result['max_detection_conf'] = truncate_float(float(max_detection_conf),
|
||||
precision=TFDetector.CONF_DIGITS)
|
||||
result['detections'] = detections_cur_image
|
||||
|
||||
except Exception as e:
|
||||
result['failure'] = TFDetector.FAILURE_TF_INFER
|
||||
print('TFDetector: image {} failed during inference: {}'.format(image_id, str(e)))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
#%% Scoring script
|
||||
|
||||
class BatchScorer:
|
||||
"""
|
||||
Coordinates scoring images in this Task.
|
||||
|
||||
1. have a synchronized queue that download tasks enqueue and scoring function dequeues - but need to be able to
|
||||
limit the size of the queue. We do not want to write the image to disk and then load it in the scoring func.
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
print('score.py BatchScorer, __init__()')
|
||||
|
||||
detector_path = kwargs.get('detector_path')
|
||||
self.detector = TFDetector(detector_path)
|
||||
|
||||
self.use_url = kwargs.get('use_url')
|
||||
if not self.use_url:
|
||||
input_container_sas = kwargs.get('input_container_sas')
|
||||
self.input_container_client = ContainerClient.from_container_url(input_container_sas)
|
||||
|
||||
self.detection_threshold = kwargs.get('detection_threshold')
|
||||
|
||||
self.image_ids_to_score = kwargs.get('image_ids_to_score')
|
||||
|
||||
# determine if there is metadata attached to each image_id
|
||||
self.metadata_available = True if isinstance(self.image_ids_to_score[0], list) else False
|
||||
|
||||
def _download_image(self, image_file) -> Image:
|
||||
"""
|
||||
Args:
|
||||
image_file: Public URL if use_url, else the full path from container root
|
||||
|
||||
Returns:
|
||||
PIL image loaded
|
||||
"""
|
||||
if not self.use_url:
|
||||
downloader = self.input_container_client.download_blob(image_file)
|
||||
image_file = io.BytesIO()
|
||||
blob_props = downloader.download_to_stream(image_file)
|
||||
|
||||
image = open_image(image_file)
|
||||
return image
|
||||
|
||||
def score_images(self) -> list:
|
||||
detections = []
|
||||
|
||||
for i in self.image_ids_to_score:
|
||||
|
||||
if self.metadata_available:
|
||||
image_id = i[0]
|
||||
image_metadata = i[1]
|
||||
else:
|
||||
image_id = i
|
||||
|
||||
try:
|
||||
image = self._download_image(image_id)
|
||||
except Exception as e:
|
||||
print(f'score.py BatchScorer, score_images, download_image exception: {e}')
|
||||
result = {
|
||||
'file': image_id,
|
||||
'failure': TFDetector.FAILURE_IMAGE_OPEN
|
||||
}
|
||||
else:
|
||||
result = self.detector.generate_detections_one_image(
|
||||
image, image_id, detection_threshold=self.detection_threshold)
|
||||
|
||||
if self.metadata_available:
|
||||
result['meta'] = image_metadata
|
||||
|
||||
detections.append(result)
|
||||
if len(detections) % PRINT_EVERY == 0:
|
||||
print(f'scored {len(detections)} images')
|
||||
|
||||
return detections
|
||||
|
||||
|
||||
def main():
|
||||
print('score.py, main()')
|
||||
|
||||
# information to determine input and output locations
|
||||
api_instance_name = os.environ['API_INSTANCE_NAME']
|
||||
job_id = os.environ['AZ_BATCH_JOB_ID']
|
||||
task_id = os.environ['AZ_BATCH_TASK_ID']
|
||||
mount_point = os.environ['AZ_BATCH_NODE_MOUNTS_DIR']
|
||||
|
||||
# other parameters for the task
|
||||
begin_index = int(os.environ['TASK_BEGIN_INDEX'])
|
||||
end_index = int(os.environ['TASK_END_INDEX'])
|
||||
|
||||
input_container_sas = os.environ.get('JOB_CONTAINER_SAS', None) # could be None if use_url
|
||||
use_url = os.environ.get('JOB_USE_URL', None)
|
||||
|
||||
if use_url and use_url.lower() == 'true': # bool of any non-empty string is True
|
||||
use_url = True
|
||||
else:
|
||||
use_url = False
|
||||
|
||||
detection_threshold = float(os.environ['DETECTION_CONF_THRESHOLD'])
|
||||
|
||||
print(f'score.py, main(), api_instance_name: {api_instance_name}, job_id: {job_id}, task_id: {task_id}, '
|
||||
f'mount_point: {mount_point}, begin_index: {begin_index}, end_index: {end_index}, '
|
||||
f'input_container_sas: {input_container_sas}, use_url (parsed): {use_url}'
|
||||
f'detection_threshold: {detection_threshold}')
|
||||
|
||||
job_folder_mounted = os.path.join(mount_point, 'batch-api', f'api_{api_instance_name}', f'job_{job_id}')
|
||||
task_out_dir = os.path.join(job_folder_mounted, 'task_outputs')
|
||||
os.makedirs(task_out_dir, exist_ok=True)
|
||||
task_output_path = os.path.join(task_out_dir, f'job_{job_id}_task_{task_id}.json')
|
||||
|
||||
# test that we can write to output path; also in case there is no image to process
|
||||
with open(task_output_path, 'w') as f:
|
||||
json.dump([], f)
|
||||
|
||||
# list images to process
|
||||
list_images_path = os.path.join(job_folder_mounted, f'{job_id}_images.json')
|
||||
with open(list_images_path) as f:
|
||||
list_images = json.load(f)
|
||||
print(f'score.py, main(), length of list_images: {len(list_images)}')
|
||||
|
||||
if (not isinstance(list_images, list)) or len(list_images) == 0:
|
||||
print('score.py, main(), zero images in specified overall list, exiting...')
|
||||
sys.exit(0)
|
||||
|
||||
# items in this list can be strings or [image_id, metadata]
|
||||
list_images = list_images[begin_index: end_index]
|
||||
if len(list_images) == 0:
|
||||
print('score.py, main(), zero images in the shard, exiting')
|
||||
sys.exit(0)
|
||||
|
||||
print(f'score.py, main(), processing {len(list_images)} images in this Task')
|
||||
|
||||
# model path
|
||||
# Path to .pb TensorFlow detector model file, relative to the
|
||||
# models/megadetector_copies folder in mounted container
|
||||
detector_model_rel_path = os.environ['DETECTOR_REL_PATH']
|
||||
detector_path = os.path.join(mount_point, 'models', 'megadetector_copies', detector_model_rel_path)
|
||||
assert os.path.exists(detector_path), f'detector is not found at the specified path: {detector_path}'
|
||||
|
||||
# score the images
|
||||
scorer = BatchScorer(
|
||||
detector_path=detector_path,
|
||||
use_url=use_url,
|
||||
input_container_sas=input_container_sas,
|
||||
detection_threshold=detection_threshold,
|
||||
image_ids_to_score=list_images
|
||||
)
|
||||
|
||||
try:
|
||||
tick = datetime.now()
|
||||
detections = scorer.score_images()
|
||||
duration = datetime.now() - tick
|
||||
print(f'score.py, main(), score_images() duration: {duration}')
|
||||
except Exception as e:
|
||||
raise RuntimeError(f'score.py, main(), exception in score_images(): {e}')
|
||||
|
||||
with open(task_output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(detections, f, ensure_ascii=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,22 +0,0 @@
|
|||
# Conda environment file for running the batch API.
|
||||
#
|
||||
# conda activate cameratraps-batch-api
|
||||
#
|
||||
# conda env update --name cameratraps-batch-api --file environment-batch-api.yml --prune
|
||||
|
||||
name: cameratraps-batch-api
|
||||
|
||||
dependencies:
|
||||
- python>=3.6
|
||||
- nb_conda_kernels
|
||||
- humanfriendly
|
||||
- requests
|
||||
- flask>=1.1.0 # known-compatible Flask 1.1.2, Werkzeug 1.0.1
|
||||
- gunicorn
|
||||
- tqdm # required by sas_blob_utils.py
|
||||
- pip
|
||||
- pip:
|
||||
- azure-storage-blob>=12.3.0 # known-compatible 12.7.1
|
||||
- azure-batch>=4.0.0 # known-compatible 10.0.0
|
||||
- azure-cosmos # known-compatible 4.2.0
|
||||
- azure-appconfiguration # known-compatible 1.1.1
|
|
@ -1,294 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
import string
|
||||
import uuid
|
||||
import threading
|
||||
from datetime import timedelta
|
||||
|
||||
import sas_blob_utils # from ai4eutils
|
||||
from flask import Flask, request, jsonify
|
||||
|
||||
import server_api_config as api_config
|
||||
from server_app_config import AppConfig
|
||||
from server_batch_job_manager import BatchJobManager
|
||||
from server_orchestration import create_batch_job, monitor_batch_job
|
||||
from server_job_status_table import JobStatusTable
|
||||
from server_utils import *
|
||||
|
||||
# %% Flask app
|
||||
app = Flask(__name__)
|
||||
|
||||
# reference: https://trstringer.com/logging-flask-gunicorn-the-manageable-way/
|
||||
if __name__ != '__main__':
|
||||
gunicorn_logger = logging.getLogger('gunicorn.error')
|
||||
app.logger.handlers = gunicorn_logger.handlers
|
||||
app.logger.setLevel(gunicorn_logger.level)
|
||||
|
||||
|
||||
API_PREFIX = api_config.API_PREFIX
|
||||
app.logger.info('server, created Flask application...')
|
||||
|
||||
# %% Helper classes
|
||||
|
||||
app_config = AppConfig()
|
||||
job_status_table = JobStatusTable()
|
||||
batch_job_manager = BatchJobManager()
|
||||
app.logger.info('server, finished instantiating helper classes')
|
||||
|
||||
|
||||
# %% Flask endpoints
|
||||
|
||||
@app.route(f'{API_PREFIX}/')
|
||||
def hello():
|
||||
return f'Camera traps batch processing API. Instance: {api_config.API_INSTANCE_NAME}'
|
||||
|
||||
|
||||
@app.route(f'{API_PREFIX}/request_detections', methods=['POST'])
|
||||
def request_detections():
|
||||
"""
|
||||
Checks that the input parameters to this endpoint are valid, starts a thread
|
||||
to launch the batch processing job, and return the job_id/request_id to the user.
|
||||
"""
|
||||
if not request.is_json:
|
||||
msg = 'Body needs to have a JSON mimetype (e.g., application/json).'
|
||||
return make_error(415, msg)
|
||||
|
||||
try:
|
||||
post_body = request.get_json()
|
||||
except Exception as e:
|
||||
return make_error(415, f'Error occurred reading POST request body: {e}.')
|
||||
|
||||
app.logger.info(f'server, request_detections, post_body: {post_body}')
|
||||
|
||||
# required params
|
||||
|
||||
caller_id = post_body.get('caller', None)
|
||||
if caller_id is None or caller_id not in app_config.get_allowlist():
|
||||
msg = ('Parameter caller is not supplied or is not on our allowlist. '
|
||||
'Please email cameratraps@lila.science to request access.')
|
||||
return make_error(401, msg)
|
||||
|
||||
use_url = post_body.get('use_url', False)
|
||||
if use_url and isinstance(use_url, str): # in case it is included but is intended to be False
|
||||
if use_url.lower() in ['false', 'f', 'no', 'n']:
|
||||
use_url = False
|
||||
|
||||
input_container_sas = post_body.get('input_container_sas', None)
|
||||
if not input_container_sas and not use_url:
|
||||
msg = ('input_container_sas with read and list access is a required '
|
||||
'field when not using image URLs.')
|
||||
return make_error(400, msg)
|
||||
|
||||
if input_container_sas is not None:
|
||||
if not sas_blob_utils.is_container_uri(input_container_sas):
|
||||
return make_error(400, 'input_container_sas provided is not for a container.')
|
||||
|
||||
result = check_data_container_sas(input_container_sas)
|
||||
if result is not None:
|
||||
return make_error(result[0], result[1])
|
||||
|
||||
# can be an URL to a file not hosted in an Azure blob storage container
|
||||
images_requested_json_sas = post_body.get('images_requested_json_sas', None)
|
||||
|
||||
if images_requested_json_sas is not None:
|
||||
if not images_requested_json_sas.startswith(('http://', 'https://')):
|
||||
return make_error(400, 'images_requested_json_sas needs to be an URL.')
|
||||
|
||||
# if use_url, then images_requested_json_sas is required
|
||||
if use_url and images_requested_json_sas is None:
|
||||
return make_error(400, 'images_requested_json_sas is required since use_url is true.')
|
||||
|
||||
# optional params
|
||||
|
||||
# check model_version is among the available model versions
|
||||
model_version = post_body.get('model_version', '')
|
||||
if model_version != '':
|
||||
model_version = str(model_version) # in case user used an int
|
||||
if model_version not in api_config.MD_VERSIONS_TO_REL_PATH: # TODO use AppConfig to store model version info
|
||||
return make_error(400, f'model_version {model_version} is not supported.')
|
||||
|
||||
# check request_name has only allowed characters
|
||||
request_name = post_body.get('request_name', '')
|
||||
if request_name != '':
|
||||
if len(request_name) > 92:
|
||||
return make_error(400, 'request_name is longer than 92 characters.')
|
||||
allowed = set(string.ascii_letters + string.digits + '_' + '-')
|
||||
if not set(request_name) <= allowed:
|
||||
msg = ('request_name contains invalid characters (only letters, '
|
||||
'digits, - and _ are allowed).')
|
||||
return make_error(400, msg)
|
||||
|
||||
# optional params for telemetry collection - logged to status table for now as part of call_params
|
||||
country = post_body.get('country', None)
|
||||
organization_name = post_body.get('organization_name', None)
|
||||
|
||||
# All API instances / node pools share a quota on total number of active Jobs;
|
||||
# we cannot accept new Job submissions if we are at the quota
|
||||
try:
|
||||
num_active_jobs = batch_job_manager.get_num_active_jobs()
|
||||
if num_active_jobs >= api_config.MAX_BATCH_ACCOUNT_ACTIVE_JOBS:
|
||||
return make_error(503, f'Too many active jobs, please try again later')
|
||||
except Exception as e:
|
||||
return make_error(500, f'Error checking number of active jobs: {e}')
|
||||
|
||||
try:
|
||||
job_id = uuid.uuid4().hex
|
||||
job_status_table.create_job_status(
|
||||
job_id=job_id,
|
||||
status= get_job_status('created', 'Request received. Listing images next...'),
|
||||
call_params=post_body
|
||||
)
|
||||
except Exception as e:
|
||||
return make_error(500, f'Error creating a job status entry: {e}')
|
||||
|
||||
try:
|
||||
thread = threading.Thread(
|
||||
target=create_batch_job,
|
||||
name=f'job_{job_id}',
|
||||
kwargs={'job_id': job_id, 'body': post_body}
|
||||
)
|
||||
thread.start()
|
||||
except Exception as e:
|
||||
return make_error(500, f'Error creating or starting the batch processing thread: {e}')
|
||||
|
||||
return {'request_id': job_id}
|
||||
|
||||
|
||||
@app.route(f'{API_PREFIX}/cancel_request', methods=['POST'])
|
||||
def cancel_request():
|
||||
"""
|
||||
Cancels a request/job given the job_id and caller_id
|
||||
"""
|
||||
if not request.is_json:
|
||||
msg = 'Body needs to have a JSON mimetype (e.g., application/json).'
|
||||
return make_error(415, msg)
|
||||
try:
|
||||
post_body = request.get_json()
|
||||
except Exception as e:
|
||||
return make_error(415, f'Error occurred reading POST request body: {e}.')
|
||||
|
||||
app.logger.info(f'server, cancel_request received, body: {post_body}')
|
||||
|
||||
# required fields
|
||||
job_id = post_body.get('request_id', None)
|
||||
if job_id is None:
|
||||
return make_error(400, 'request_id is a required field.')
|
||||
|
||||
caller_id = post_body.get('caller', None)
|
||||
if caller_id is None or caller_id not in app_config.get_allowlist():
|
||||
return make_error(401, 'Parameter caller is not supplied or is not on our allowlist.')
|
||||
|
||||
item_read = job_status_table.read_job_status(job_id)
|
||||
if item_read is None:
|
||||
return make_error(404, 'Task is not found.')
|
||||
if 'status' not in item_read:
|
||||
return make_error(404, 'Something went wrong. This task does not have a status field.')
|
||||
|
||||
request_status = item_read['status']['request_status']
|
||||
if request_status not in ['running', 'problem']:
|
||||
# request_status is either completed or failed
|
||||
return make_error(400, f'Task has {request_status} and cannot be canceled')
|
||||
|
||||
try:
|
||||
batch_job_manager.cancel_batch_job(job_id)
|
||||
# the create_batch_job thread will stop when it wakes up the next time
|
||||
except Exception as e:
|
||||
return make_error(500, f'Error when canceling the request: {e}')
|
||||
else:
|
||||
job_status_table.update_job_status(job_id, {
|
||||
'request_status': 'canceled',
|
||||
'message': 'Request has been canceled by the user.'
|
||||
})
|
||||
return 'Canceling signal has been sent. You can verify the status at the /task endpoint'
|
||||
|
||||
|
||||
@app.route(f'{API_PREFIX}/task/<job_id>')
|
||||
def retrieve_job_status(job_id: str):
|
||||
"""
|
||||
Does not require the "caller" field to avoid checking the allowlist in App Configurations.
|
||||
Retains the /task endpoint name to be compatible with previous versions.
|
||||
"""
|
||||
# Fix for Zooniverse - deleting any "-" characters in the job_id
|
||||
job_id = job_id.replace('-', '')
|
||||
|
||||
item_read = job_status_table.read_job_status(job_id) # just what the monitoring thread wrote to the DB
|
||||
if item_read is None:
|
||||
return make_error(404, 'Task is not found.')
|
||||
if 'status' not in item_read or 'last_updated' not in item_read or 'call_params' not in item_read:
|
||||
return make_error(404, 'Something went wrong. This task does not have a valid status.')
|
||||
|
||||
# If the status is running, it could be a Job submitted before the last restart of this
|
||||
# API instance. If that is the case, we should start to monitor its progress again.
|
||||
status = item_read['status']
|
||||
|
||||
last_updated = datetime.fromisoformat(item_read['last_updated'][:-1]) # get rid of "Z" (required by Cosmos DB)
|
||||
time_passed = datetime.utcnow() - last_updated
|
||||
job_is_unmonitored = True if time_passed > timedelta(minutes=(api_config.MONITOR_PERIOD_MINUTES + 1)) else False
|
||||
|
||||
if isinstance(status, dict) and \
|
||||
'request_status' in status and \
|
||||
status['request_status'] in ['running', 'problem'] and \
|
||||
'num_tasks' in status and \
|
||||
job_id not in get_thread_names() and \
|
||||
job_is_unmonitored:
|
||||
# WARNING model_version could be wrong (a newer version number gets written to the output file) around
|
||||
# the time that the model is updated, if this request was submitted before the model update
|
||||
# and the API restart; this should be quite rare
|
||||
model_version = item_read['call_params'].get('model_version', api_config.DEFAULT_MD_VERSION)
|
||||
|
||||
num_tasks = status['num_tasks']
|
||||
job_name = item_read['call_params'].get('request_name', '')
|
||||
job_submission_timestamp = item_read.get('job_submission_time', '')
|
||||
|
||||
thread = threading.Thread(
|
||||
target=monitor_batch_job,
|
||||
name=f'job_{job_id}',
|
||||
kwargs={
|
||||
'job_id': job_id,
|
||||
'num_tasks': num_tasks,
|
||||
'model_version': model_version,
|
||||
'job_name': job_name,
|
||||
'job_submission_timestamp': job_submission_timestamp
|
||||
}
|
||||
)
|
||||
thread.start()
|
||||
app.logger.info(f'server, started a new thread to monitor job {job_id}')
|
||||
|
||||
# conform to previous schemes
|
||||
if 'num_tasks' in status:
|
||||
del status['num_tasks']
|
||||
item_to_return = {
|
||||
'Status': status,
|
||||
'Endpoint': f'{API_PREFIX}/request_detections',
|
||||
'TaskId': job_id,
|
||||
'Timestamp': item_read['last_updated']
|
||||
}
|
||||
return item_to_return
|
||||
|
||||
|
||||
@app.route(f'{API_PREFIX}/default_model_version')
|
||||
def get_default_model_version() -> str:
|
||||
return api_config.DEFAULT_MD_VERSION
|
||||
|
||||
|
||||
@app.route(f'{API_PREFIX}/supported_model_versions')
|
||||
def get_supported_model_versions() -> str:
|
||||
return jsonify(sorted(list(api_config.MD_VERSIONS_TO_REL_PATH.keys())))
|
||||
|
||||
|
||||
# %% undocumented endpoints
|
||||
|
||||
def get_thread_names() -> list:
|
||||
thread_names = []
|
||||
for thread in threading.enumerate():
|
||||
if thread.name.startswith('job_'):
|
||||
thread_names.append(thread.name.split('_')[1])
|
||||
return sorted(thread_names)
|
||||
|
||||
|
||||
@app.route(f'{API_PREFIX}/all_jobs')
|
||||
def get_all_jobs():
|
||||
"""List all Jobs being monitored since this API instance started"""
|
||||
thread_names = get_thread_names()
|
||||
return jsonify(thread_names)
|
|
@ -1,98 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
A module to hold the configurations specific to an instance of the API.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
|
||||
#%% instance-specific API settings
|
||||
# you likely need to modify these when deploying a new instance of the API
|
||||
|
||||
API_INSTANCE_NAME = 'cm' # 'internal', 'cm', 'camelot', 'zooniverse'
|
||||
POOL_ID = 'cm_1' # name of the Batch pool created for this API instance
|
||||
|
||||
MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB = 4 * 1000 * 1000 # inclusive
|
||||
|
||||
# Azure Batch for batch processing
|
||||
BATCH_ACCOUNT_NAME = 'cameratrapssc'
|
||||
BATCH_ACCOUNT_URL = 'https://cameratrapssc.southcentralus.batch.azure.com'
|
||||
|
||||
|
||||
#%% general API settings
|
||||
API_PREFIX = '/v4/camera-trap/detection-batch' # URL to root is http://127.0.0.1:5000/v4/camera-trap/detection-batch/
|
||||
|
||||
MONITOR_PERIOD_MINUTES = 10
|
||||
|
||||
# if this number of times the thread wakes up to check is exceeded, stop the monitoring thread
|
||||
MAX_MONITOR_CYCLES = 4 * 7 * int((60 * 24) / MONITOR_PERIOD_MINUTES) # 4 weeks
|
||||
|
||||
IMAGE_SUFFIXES_ACCEPTED = ('.jpg', '.jpeg', '.png') # case-insensitive
|
||||
assert isinstance(IMAGE_SUFFIXES_ACCEPTED, tuple)
|
||||
|
||||
OUTPUT_FORMAT_VERSION = '1.1'
|
||||
|
||||
NUM_IMAGES_PER_TASK = 2000
|
||||
|
||||
OUTPUT_SAS_EXPIRATION_DAYS = 180
|
||||
|
||||
# quota of active Jobs in our Batch account, which all node pools i.e. API instances share;
|
||||
# cannot accept job submissions if there are this many active Jobs already
|
||||
MAX_BATCH_ACCOUNT_ACTIVE_JOBS = 300
|
||||
|
||||
|
||||
#%% MegaDetector info
|
||||
DETECTION_CONF_THRESHOLD = 0.1
|
||||
|
||||
# relative to the `megadetector_copies` folder in the container `models`
|
||||
# TODO add MD versions info to AppConfig
|
||||
MD_VERSIONS_TO_REL_PATH = {
|
||||
'4.1': 'megadetector_v4_1/md_v4.1.0.pb',
|
||||
'3': 'megadetector_v3/megadetector_v3_tf19.pb',
|
||||
'2': 'megadetector_v2/frozen_inference_graph.pb'
|
||||
}
|
||||
DEFAULT_MD_VERSION = '4.1'
|
||||
assert DEFAULT_MD_VERSION in MD_VERSIONS_TO_REL_PATH
|
||||
|
||||
# copied from TFDetector class in detection/run_detector.py
|
||||
DETECTOR_LABEL_MAP = {
|
||||
'1': 'animal',
|
||||
'2': 'person',
|
||||
'3': 'vehicle'
|
||||
}
|
||||
|
||||
|
||||
#%% Azure Batch settings
|
||||
NUM_TASKS_PER_SUBMISSION = 20 # max for the Python SDK without extension is 100
|
||||
|
||||
NUM_TASKS_PER_RESUBMISSION = 5
|
||||
|
||||
|
||||
#%% env variables for service credentials, and info related to these services
|
||||
|
||||
# Cosmos DB `batch-api-jobs` table for job status
|
||||
COSMOS_ENDPOINT = os.environ['COSMOS_ENDPOINT']
|
||||
COSMOS_WRITE_KEY = os.environ['COSMOS_WRITE_KEY']
|
||||
|
||||
# Service principal of this "application", authorized to use Azure Batch
|
||||
APP_TENANT_ID = os.environ['APP_TENANT_ID']
|
||||
APP_CLIENT_ID = os.environ['APP_CLIENT_ID']
|
||||
APP_CLIENT_SECRET = os.environ['APP_CLIENT_SECRET']
|
||||
|
||||
# Blob storage account for storing Batch tasks' outputs and scoring script
|
||||
STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME']
|
||||
STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY']
|
||||
|
||||
# STORAGE_CONTAINER_MODELS = 'models' # names of the two containers supporting Batch
|
||||
STORAGE_CONTAINER_API = 'batch-api'
|
||||
|
||||
# Azure Container Registry for Docker image used by our Batch node pools
|
||||
REGISTRY_SERVER = os.environ['REGISTRY_SERVER']
|
||||
REGISTRY_PASSWORD = os.environ['REGISTRY_PASSWORD']
|
||||
CONTAINER_IMAGE_NAME = '***REMOVED***.azurecr.io/tensorflow:1.14.0-gpu-py3'
|
||||
|
||||
# Azure App Configuration instance to get configurations specific to
|
||||
# this instance of the API
|
||||
APP_CONFIG_CONNECTION_STR = os.environ['APP_CONFIG_CONNECTION_STR']
|
|
@ -1,55 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
A class wrapping the Azure App Configuration client to get configurations
|
||||
for each instance of the API.
|
||||
"""
|
||||
import logging
|
||||
import os
|
||||
|
||||
from server_api_config import APP_CONFIG_CONNECTION_STR, API_INSTANCE_NAME
|
||||
|
||||
from azure.appconfiguration import AzureAppConfigurationClient
|
||||
|
||||
|
||||
log = logging.getLogger(os.environ['FLASK_APP'])
|
||||
|
||||
|
||||
class AppConfig:
|
||||
"""Wrapper around the Azure App Configuration client"""
|
||||
|
||||
def __init__(self):
|
||||
self.client = AzureAppConfigurationClient.from_connection_string(APP_CONFIG_CONNECTION_STR)
|
||||
|
||||
self.api_instance = API_INSTANCE_NAME
|
||||
|
||||
# sentinel should change if new configurations are available
|
||||
self.sentinel = self._get_sentinel() # get initial sentinel and allowlist values
|
||||
self.allowlist = self._get_allowlist()
|
||||
|
||||
def _get_sentinel(self):
|
||||
return self.client.get_configuration_setting(key='batch_api:sentinel').value
|
||||
|
||||
def _get_allowlist(self):
|
||||
filtered_listed = self.client.list_configuration_settings(key_filter='batch_api_allow:*')
|
||||
allowlist = []
|
||||
for item in filtered_listed:
|
||||
if item.value == self.api_instance:
|
||||
allowlist.append(item.key.split('batch_api_allow:')[1])
|
||||
return allowlist
|
||||
|
||||
def get_allowlist(self):
|
||||
try:
|
||||
cur_sentinel = self._get_sentinel()
|
||||
if cur_sentinel == self.sentinel:
|
||||
# configs have not changed
|
||||
return self.allowlist
|
||||
else:
|
||||
self.sentinel = cur_sentinel
|
||||
self.allowlist = self._get_allowlist()
|
||||
return self.allowlist
|
||||
|
||||
except Exception as e:
|
||||
log.error(f'AppConfig, get_allowlist, exception so using old allowlist: {e}')
|
||||
return self.allowlist
|
|
@ -1,220 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
A class wrapping the Azure Batch client.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import math
|
||||
from typing import Tuple
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import sas_blob_utils # from ai4eutils
|
||||
from azure.storage.blob import ContainerClient, ContainerSasPermissions, generate_container_sas
|
||||
from azure.batch import BatchServiceClient
|
||||
from azure.batch.models import *
|
||||
from azure.common.credentials import ServicePrincipalCredentials
|
||||
|
||||
import server_api_config as api_config
|
||||
|
||||
|
||||
# Gunicorn logger handler will get attached if needed in server.py
|
||||
log = logging.getLogger(os.environ['FLASK_APP'])
|
||||
|
||||
|
||||
class BatchJobManager:
|
||||
"""Wrapper around the Azure App Configuration client"""
|
||||
|
||||
def __init__(self):
|
||||
credentials = ServicePrincipalCredentials(
|
||||
client_id=api_config.APP_CLIENT_ID,
|
||||
secret=api_config.APP_CLIENT_SECRET,
|
||||
tenant=api_config.APP_TENANT_ID,
|
||||
resource='https://batch.core.windows.net/'
|
||||
)
|
||||
self.batch_client = BatchServiceClient(credentials=credentials,
|
||||
batch_url=api_config.BATCH_ACCOUNT_URL)
|
||||
|
||||
def create_job(self, job_id: str, detector_model_rel_path: str,
|
||||
input_container_sas: str, use_url: bool):
|
||||
log.info(f'BatchJobManager, create_job, job_id: {job_id}')
|
||||
job = JobAddParameter(
|
||||
id=job_id,
|
||||
pool_info=PoolInformation(pool_id=api_config.POOL_ID),
|
||||
|
||||
# set for all tasks in the job
|
||||
common_environment_settings=[
|
||||
EnvironmentSetting(name='DETECTOR_REL_PATH', value=detector_model_rel_path),
|
||||
EnvironmentSetting(name='API_INSTANCE_NAME', value=api_config.API_INSTANCE_NAME),
|
||||
EnvironmentSetting(name='JOB_CONTAINER_SAS', value=input_container_sas),
|
||||
EnvironmentSetting(name='JOB_USE_URL', value=str(use_url)),
|
||||
EnvironmentSetting(name='DETECTION_CONF_THRESHOLD', value=api_config.DETECTION_CONF_THRESHOLD)
|
||||
]
|
||||
)
|
||||
self.batch_client.job.add(job)
|
||||
|
||||
def submit_tasks(self, job_id: str, num_images: int) -> Tuple[int, list]:
|
||||
"""
|
||||
Shard the images and submit each shard as a Task under the Job pointed to by this job_id
|
||||
Args:
|
||||
job_id: ID of the Batch Job to submit the tasks to
|
||||
num_images: total number of images to be processed in this Job
|
||||
|
||||
Returns:
|
||||
num_task: total number of Tasks that should be in this Job
|
||||
task_ids_failed_to_submit: which Tasks from the above failed to be submitted
|
||||
"""
|
||||
log.info('BatchJobManager, submit_tasks')
|
||||
|
||||
# cannot execute the scoring script that is in the mounted directory; has to be copied to cwd
|
||||
# not luck giving the commandline arguments via formatted string - set as env vars instead
|
||||
score_command = '/bin/bash -c \"cp $AZ_BATCH_NODE_MOUNTS_DIR/batch-api/scripts/score.py . && python score.py\" '
|
||||
|
||||
num_images_per_task = api_config.NUM_IMAGES_PER_TASK
|
||||
|
||||
# form shards of images and assign each shard to a Task
|
||||
num_tasks = math.ceil(num_images / num_images_per_task)
|
||||
|
||||
# for persisting stdout and stderr
|
||||
permissions = ContainerSasPermissions(read=True, write=True, list=True)
|
||||
access_duration_hrs = api_config.MONITOR_PERIOD_MINUTES * api_config.MAX_MONITOR_CYCLES / 60
|
||||
container_sas_token = generate_container_sas(
|
||||
account_name=api_config.STORAGE_ACCOUNT_NAME,
|
||||
container_name=api_config.STORAGE_CONTAINER_API,
|
||||
account_key=api_config.STORAGE_ACCOUNT_KEY,
|
||||
permission=permissions,
|
||||
expiry=datetime.utcnow() + timedelta(hours=access_duration_hrs))
|
||||
container_sas_url = sas_blob_utils.build_azure_storage_uri(
|
||||
account=api_config.STORAGE_ACCOUNT_NAME,
|
||||
container=api_config.STORAGE_CONTAINER_API,
|
||||
sas_token=container_sas_token)
|
||||
|
||||
tasks = []
|
||||
for task_id in range(num_tasks):
|
||||
begin_index = task_id * num_images_per_task
|
||||
end_index = begin_index + num_images_per_task
|
||||
|
||||
# persist stdout and stderr (will be removed when node removed)
|
||||
# paths are relative to the Task working directory
|
||||
stderr_destination = OutputFileDestination(
|
||||
container=OutputFileBlobContainerDestination(
|
||||
container_url=container_sas_url,
|
||||
path=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_logs/job_{job_id}_task_{task_id}_stderr.txt'
|
||||
)
|
||||
)
|
||||
stdout_destination = OutputFileDestination(
|
||||
container=OutputFileBlobContainerDestination(
|
||||
container_url=container_sas_url,
|
||||
path=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_logs/job_{job_id}_task_{task_id}_stdout.txt'
|
||||
)
|
||||
)
|
||||
std_err_and_out = [
|
||||
OutputFile(
|
||||
file_pattern='../stderr.txt', # stderr.txt is at the same level as wd
|
||||
destination=stderr_destination,
|
||||
upload_options=OutputFileUploadOptions(upload_condition=OutputFileUploadCondition.task_completion)
|
||||
# can also just upload on failure
|
||||
),
|
||||
OutputFile(
|
||||
file_pattern='../stdout.txt',
|
||||
destination=stdout_destination,
|
||||
upload_options=OutputFileUploadOptions(upload_condition=OutputFileUploadCondition.task_completion)
|
||||
)
|
||||
]
|
||||
|
||||
task = TaskAddParameter(
|
||||
id=str(task_id),
|
||||
command_line=score_command,
|
||||
container_settings=TaskContainerSettings(
|
||||
image_name=api_config.CONTAINER_IMAGE_NAME,
|
||||
working_directory='taskWorkingDirectory'
|
||||
),
|
||||
environment_settings=[
|
||||
EnvironmentSetting(name='TASK_BEGIN_INDEX', value=begin_index),
|
||||
EnvironmentSetting(name='TASK_END_INDEX', value=end_index),
|
||||
],
|
||||
output_files=std_err_and_out
|
||||
)
|
||||
tasks.append(task)
|
||||
|
||||
# first try submitting Tasks
|
||||
task_ids_failed_to_submit = self._create_tasks(job_id, tasks, api_config.NUM_TASKS_PER_SUBMISSION, 1)
|
||||
|
||||
# retry submitting Tasks
|
||||
if len(task_ids_failed_to_submit) > 0:
|
||||
task_ids_failed_to_submit_set = set(task_ids_failed_to_submit)
|
||||
tasks_to_retry = [t for t in tasks if t.id in task_ids_failed_to_submit_set]
|
||||
task_ids_failed_to_submit = self._create_tasks(job_id,
|
||||
tasks_to_retry,
|
||||
api_config.NUM_TASKS_PER_RESUBMISSION,
|
||||
2)
|
||||
|
||||
if len(task_ids_failed_to_submit) > 0:
|
||||
log.info('BatchJobManager, submit_tasks, after retry, '
|
||||
f'len of task_ids_failed_to_submit: {len(task_ids_failed_to_submit)}')
|
||||
else:
|
||||
log.info('BatchJobManager, submit_tasks, after retry, all Tasks submitted')
|
||||
else:
|
||||
log.info('BatchJobManager, submit_tasks, all Tasks submitted after first try')
|
||||
|
||||
# Change the Job's on_all_tasks_complete option to 'terminateJob' so the Job's status changes automatically
|
||||
# after all submitted tasks are done
|
||||
# This is so that we do not take up the quota for active Jobs in the Batch account.
|
||||
job_patch_params = JobPatchParameter(
|
||||
on_all_tasks_complete=OnAllTasksComplete.terminate_job
|
||||
)
|
||||
self.batch_client.job.patch(job_id, job_patch_params)
|
||||
|
||||
return num_tasks, task_ids_failed_to_submit
|
||||
|
||||
def _create_tasks(self, job_id, tasks, num_tasks_per_submission, n_th_try) -> list:
|
||||
task_ids_failed_to_submit = []
|
||||
|
||||
for i in range(0, len(tasks), num_tasks_per_submission):
|
||||
tasks_to_submit = tasks[i: i + num_tasks_per_submission]
|
||||
|
||||
# return type: TaskAddCollectionResult
|
||||
collection_results = self.batch_client.task.add_collection(job_id, tasks_to_submit, threads=10)
|
||||
|
||||
for task_result in collection_results.value:
|
||||
if task_result.status is not TaskAddStatus.success:
|
||||
# actually we should probably only re-submit if it's a server_error
|
||||
task_ids_failed_to_submit.append(task_result.task_id)
|
||||
log.info(f'task {task_result.task_id} failed to submitted after {n_th_try} try/tries, '
|
||||
f'status: {task_result.status}, error: {task_result.error}')
|
||||
|
||||
return task_ids_failed_to_submit
|
||||
|
||||
def get_num_completed_tasks(self, job_id: str) -> Tuple[int, int]:
|
||||
"""
|
||||
Returns the number of completed tasks for the job of job_id, as a tuple:
|
||||
(number of succeeded jobs, number of failed jobs) - both are considered "completed".=
|
||||
"""
|
||||
# docs: # https://docs.microsoft.com/en-us/rest/api/batchservice/odata-filters-in-batch#list-tasks
|
||||
tasks = self.batch_client.task.list(job_id,
|
||||
task_list_options=TaskListOptions(
|
||||
filter='state eq \'completed\'',
|
||||
select='id, executionInfo' # only the id field will be non-empty
|
||||
))
|
||||
num_succeeded, num_failed = 0, 0
|
||||
for task in tasks:
|
||||
exit_code: int = task.execution_info.exit_code
|
||||
if exit_code == 0:
|
||||
num_succeeded += 1
|
||||
else:
|
||||
num_failed += 1
|
||||
return num_succeeded, num_failed
|
||||
|
||||
def cancel_batch_job(self, job_id: str):
|
||||
self.batch_client.job.terminate(job_id, terminate_reason='APIUserCanceled')
|
||||
|
||||
def get_num_active_jobs(self) -> int:
|
||||
jobs_generator = self.batch_client.job.list(
|
||||
job_list_options=JobListOptions(
|
||||
filter='state eq \'active\'',
|
||||
select='id'
|
||||
))
|
||||
jobs_list = [j for j in jobs_generator]
|
||||
return len(jobs_list)
|
|
@ -1,153 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
A class to manage updating the status of an API request / Azure Batch Job using
|
||||
the Cosmos DB table "batch_api_jobs".
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
import uuid
|
||||
from typing import Union, Optional
|
||||
|
||||
from azure.cosmos.cosmos_client import CosmosClient
|
||||
from azure.cosmos.exceptions import CosmosResourceNotFoundError
|
||||
|
||||
from server_api_config import API_INSTANCE_NAME, COSMOS_ENDPOINT, COSMOS_WRITE_KEY
|
||||
from server_utils import get_utc_time
|
||||
|
||||
|
||||
log = logging.getLogger(os.environ['FLASK_APP'])
|
||||
|
||||
|
||||
class JobStatusTable:
|
||||
"""
|
||||
A wrapper around the Cosmos DB client. Each item in the table "batch_api_jobs" represents
|
||||
a request/Batch Job, and should have the following fields:
|
||||
- id: this is the job_id
|
||||
- api_instance
|
||||
- status
|
||||
- last_updated
|
||||
- call_params: the dict representing the body of the POST request from the user
|
||||
The 'status' field is a dict with the following fields:
|
||||
- request_status
|
||||
- message
|
||||
- num_tasks (present after Batch Job created)
|
||||
- num_images (present after Batch Job created)
|
||||
"""
|
||||
# a job moves from created to running/problem after the Batch Job has been submitted
|
||||
allowed_statuses = ['created', 'running', 'failed', 'problem', 'completed', 'canceled']
|
||||
|
||||
def __init__(self, api_instance=None):
|
||||
self.api_instance = api_instance if api_instance is not None else API_INSTANCE_NAME
|
||||
cosmos_client = CosmosClient(COSMOS_ENDPOINT, credential=COSMOS_WRITE_KEY)
|
||||
db_client = cosmos_client.get_database_client('camera-trap')
|
||||
self.db_jobs_client = db_client.get_container_client('batch_api_jobs')
|
||||
|
||||
def create_job_status(self, job_id: str, status: Union[dict, str], call_params: dict) -> dict:
|
||||
assert 'request_status' in status and 'message' in status
|
||||
assert status['request_status'] in JobStatusTable.allowed_statuses
|
||||
|
||||
# job_id should be unique across all instances, and is also the partition key
|
||||
cur_time = get_utc_time()
|
||||
item = {
|
||||
'id': job_id,
|
||||
'api_instance': self.api_instance,
|
||||
'status': status,
|
||||
'job_submission_time': cur_time,
|
||||
'last_updated': cur_time,
|
||||
'call_params': call_params
|
||||
}
|
||||
created_item = self.db_jobs_client.create_item(item)
|
||||
return created_item
|
||||
|
||||
def update_job_status(self, job_id: str, status: Union[dict, str]) -> dict:
|
||||
assert 'request_status' in status and 'message' in status
|
||||
assert status['request_status'] in JobStatusTable.allowed_statuses
|
||||
|
||||
# TODO do not read the entry first to get the call_params when the Cosmos SDK add a
|
||||
# patching functionality:
|
||||
# https://feedback.azure.com/forums/263030-azure-cosmos-db/suggestions/6693091-be-able-to-do-partial-updates-on-document
|
||||
item_old = self.read_job_status(job_id)
|
||||
if item_old is None:
|
||||
raise ValueError
|
||||
|
||||
# need to retain other fields in 'status' to be able to restart monitoring thread
|
||||
if 'status' in item_old and isinstance(item_old['status'], dict):
|
||||
# retain existing fields; update as needed
|
||||
for k, v in item_old['status'].items():
|
||||
if k not in status:
|
||||
status[k] = v
|
||||
item = {
|
||||
'id': job_id,
|
||||
'api_instance': self.api_instance,
|
||||
'status': status,
|
||||
'job_submission_time': item_old['job_submission_time'],
|
||||
'last_updated': get_utc_time(),
|
||||
'call_params': item_old['call_params']
|
||||
}
|
||||
replaced_item = self.db_jobs_client.replace_item(job_id, item)
|
||||
return replaced_item
|
||||
|
||||
def read_job_status(self, job_id) -> Optional[dict]:
|
||||
"""
|
||||
Read the status of the job from the Cosmos DB table of job status.
|
||||
Note that it does not check the actual status of the job on Batch, and just returns what
|
||||
the monitoring thread wrote to the database.
|
||||
job_id is also the partition key
|
||||
"""
|
||||
try:
|
||||
read_item = self.db_jobs_client.read_item(job_id, partition_key=job_id)
|
||||
assert read_item['api_instance'] == self.api_instance, 'Job does not belong to this API instance'
|
||||
except CosmosResourceNotFoundError:
|
||||
return None # job_id not a key
|
||||
except Exception as e:
|
||||
logging.error(f'server_job_status_table, read_job_status, exception: {e}')
|
||||
raise
|
||||
else:
|
||||
item = {k: v for k, v in read_item.items() if not k.startswith('_')}
|
||||
return item
|
||||
|
||||
|
||||
class TestJobStatusTable(unittest.TestCase):
|
||||
api_instance = 'api_test'
|
||||
|
||||
def test_insert(self):
|
||||
table = JobStatusTable(TestJobStatusTable.api_instance)
|
||||
status = {
|
||||
'request_status': 'running',
|
||||
'message': 'this is a test'
|
||||
}
|
||||
job_id = uuid.uuid4().hex
|
||||
item = table.create_job_status(job_id, status, {'container_sas': 'random_string'})
|
||||
self.assertTrue(job_id == item['id'], 'Expect job_id to be the id of the item')
|
||||
self.assertTrue(item['status']['request_status'] == 'running', 'Expect fields to be inserted correctly')
|
||||
|
||||
def test_update_and_read(self):
|
||||
table = JobStatusTable(TestJobStatusTable.api_instance)
|
||||
status = {
|
||||
'request_status': 'running',
|
||||
'message': 'this is a test'
|
||||
}
|
||||
job_id = uuid.uuid4().hex
|
||||
res = table.create_job_status(job_id, status, {'container_sas': 'random_string'})
|
||||
|
||||
status = {
|
||||
'request_status': 'completed',
|
||||
'message': 'this is a test again'
|
||||
}
|
||||
res = table.update_job_status(job_id, status)
|
||||
item_read = table.read_job_status(job_id)
|
||||
self.assertTrue(item_read['status']['request_status'] == 'completed', 'Expect field to have updated')
|
||||
|
||||
def test_read_invalid_id(self):
|
||||
table = JobStatusTable(TestJobStatusTable.api_instance)
|
||||
job_id = uuid.uuid4().hex # should not be in the database
|
||||
item_read = table.read_job_status(job_id)
|
||||
self.assertIsNone(item_read)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -1,360 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
Functions to submit images to the Azure Batch node pool for processing, monitor
|
||||
the Job and fetch results when completed.
|
||||
"""
|
||||
|
||||
import io
|
||||
import json
|
||||
import threading
|
||||
import time
|
||||
import logging
|
||||
import os
|
||||
import urllib.parse
|
||||
from datetime import timedelta
|
||||
from random import shuffle
|
||||
|
||||
import sas_blob_utils # from ai4eutils
|
||||
import requests
|
||||
from azure.storage.blob import ContainerClient, BlobSasPermissions, generate_blob_sas
|
||||
from tqdm import tqdm
|
||||
|
||||
from server_utils import *
|
||||
import server_api_config as api_config
|
||||
from server_batch_job_manager import BatchJobManager
|
||||
from server_job_status_table import JobStatusTable
|
||||
|
||||
|
||||
# Gunicorn logger handler will get attached if needed in server.py
|
||||
log = logging.getLogger(os.environ['FLASK_APP'])
|
||||
|
||||
|
||||
def create_batch_job(job_id: str, body: dict):
|
||||
"""
|
||||
This is the target to be run in a thread to submit a batch processing job and monitor progress
|
||||
"""
|
||||
job_status_table = JobStatusTable()
|
||||
try:
|
||||
log.info(f'server_job, create_batch_job, job_id {job_id}, {body}')
|
||||
|
||||
input_container_sas = body.get('input_container_sas', None)
|
||||
|
||||
use_url = body.get('use_url', False)
|
||||
|
||||
images_requested_json_sas = body.get('images_requested_json_sas', None)
|
||||
|
||||
image_path_prefix = body.get('image_path_prefix', None)
|
||||
|
||||
first_n = body.get('first_n', None)
|
||||
first_n = int(first_n) if first_n else None
|
||||
|
||||
sample_n = body.get('sample_n', None)
|
||||
sample_n = int(sample_n) if sample_n else None
|
||||
|
||||
model_version = body.get('model_version', '')
|
||||
if model_version == '':
|
||||
model_version = api_config.DEFAULT_MD_VERSION
|
||||
|
||||
# request_name and request_submission_timestamp are for appending to
|
||||
# output file names
|
||||
job_name = body.get('request_name', '') # in earlier versions we used "request" to mean a "job"
|
||||
job_submission_timestamp = get_utc_time()
|
||||
|
||||
# image_paths can be a list of strings (Azure blob names or public URLs)
|
||||
# or a list of length-2 lists where each is a [image_id, metadata] pair
|
||||
|
||||
# Case 1: listing all images in the container
|
||||
# - not possible to have attached metadata if listing images in a blob
|
||||
if images_requested_json_sas is None:
|
||||
log.info('server_job, create_batch_job, listing all images to process.')
|
||||
|
||||
# list all images to process
|
||||
image_paths = sas_blob_utils.list_blobs_in_container(
|
||||
container_uri=input_container_sas,
|
||||
blob_prefix=image_path_prefix, # check will be case-sensitive
|
||||
blob_suffix=api_config.IMAGE_SUFFIXES_ACCEPTED, # check will be case-insensitive
|
||||
limit=api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB + 1
|
||||
# + 1 so if the number of images listed > MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB
|
||||
# we will know and not proceed
|
||||
)
|
||||
|
||||
# Case 2: user supplied a list of images to process; can include metadata
|
||||
else:
|
||||
log.info('server_job, create_batch_job, using provided list of images.')
|
||||
|
||||
response = requests.get(images_requested_json_sas) # could be a file hosted anywhere
|
||||
image_paths = response.json()
|
||||
|
||||
log.info('server_job, create_batch_job, length of image_paths provided by the user: {}'.format(
|
||||
len(image_paths)))
|
||||
if len(image_paths) == 0:
|
||||
job_status = get_job_status(
|
||||
'completed', '0 images found in provided list of images.')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
return
|
||||
|
||||
error, metadata_available = validate_provided_image_paths(image_paths)
|
||||
if error is not None:
|
||||
msg = 'image paths provided in the json are not valid: {}'.format(error)
|
||||
raise ValueError(msg)
|
||||
|
||||
# filter down to those conforming to the provided prefix and accepted suffixes (image file types)
|
||||
valid_image_paths = []
|
||||
for p in image_paths:
|
||||
locator = p[0] if metadata_available else p
|
||||
|
||||
# prefix is case-sensitive; suffix is not
|
||||
if image_path_prefix is not None and not locator.startswith(image_path_prefix):
|
||||
continue
|
||||
|
||||
# Although urlparse(p).path preserves the extension on local paths, it will not work for
|
||||
# blob file names that contains "#", which will be treated as indication of a query.
|
||||
# If the URL is generated via Azure Blob Storage, the "#" char will be properly encoded
|
||||
path = urllib.parse.urlparse(locator).path if use_url else locator
|
||||
|
||||
if path.lower().endswith(api_config.IMAGE_SUFFIXES_ACCEPTED):
|
||||
valid_image_paths.append(p)
|
||||
image_paths = valid_image_paths
|
||||
log.info(('server_job, create_batch_job, length of image_paths provided by user, '
|
||||
f'after filtering to jpg: {len(image_paths)}'))
|
||||
|
||||
# apply the first_n and sample_n filters
|
||||
if first_n:
|
||||
assert first_n > 0, 'parameter first_n is 0.'
|
||||
# OK if first_n > total number of images
|
||||
image_paths = image_paths[:first_n]
|
||||
|
||||
if sample_n:
|
||||
assert sample_n > 0, 'parameter sample_n is 0.'
|
||||
if sample_n > len(image_paths):
|
||||
msg = ('parameter sample_n specifies more images than '
|
||||
'available (after filtering by other provided params).')
|
||||
raise ValueError(msg)
|
||||
|
||||
# sample by shuffling image paths and take the first sample_n images
|
||||
log.info('First path before shuffling:', image_paths[0])
|
||||
shuffle(image_paths)
|
||||
log.info('First path after shuffling:', image_paths[0])
|
||||
image_paths = image_paths[:sample_n]
|
||||
|
||||
num_images = len(image_paths)
|
||||
log.info(f'server_job, create_batch_job, num_images after applying all filters: {num_images}')
|
||||
|
||||
if num_images < 1:
|
||||
job_status = get_job_status('completed', (
|
||||
'Zero images found in container or in provided list of images '
|
||||
'after filtering with the provided parameters.'))
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
return
|
||||
if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB:
|
||||
job_status = get_job_status(
|
||||
'failed',
|
||||
(f'The number of images ({num_images}) requested for processing exceeds the maximum '
|
||||
f'accepted {api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB} in one call'))
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
return
|
||||
|
||||
# upload the image list to the container, which is also mounted on all nodes
|
||||
# all sharding and scoring use the uploaded list
|
||||
images_list_str_as_bytes = bytes(json.dumps(image_paths, ensure_ascii=False), encoding='utf-8')
|
||||
|
||||
container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
|
||||
container=api_config.STORAGE_CONTAINER_API)
|
||||
with ContainerClient.from_container_url(container_url,
|
||||
credential=api_config.STORAGE_ACCOUNT_KEY) as api_container_client:
|
||||
_ = api_container_client.upload_blob(
|
||||
name=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_images.json',
|
||||
data=images_list_str_as_bytes)
|
||||
|
||||
job_status = get_job_status('created', f'{num_images} images listed; submitting the job...')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
|
||||
except Exception as e:
|
||||
job_status = get_job_status('failed', f'Error occurred while preparing the Batch job: {e}')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.error(f'server_job, create_batch_job, Error occurred while preparing the Batch job: {e}')
|
||||
return # do not start monitoring
|
||||
|
||||
try:
|
||||
batch_job_manager = BatchJobManager()
|
||||
|
||||
model_rel_path = api_config.MD_VERSIONS_TO_REL_PATH[model_version]
|
||||
batch_job_manager.create_job(job_id,
|
||||
model_rel_path,
|
||||
input_container_sas,
|
||||
use_url)
|
||||
|
||||
num_tasks, task_ids_failed_to_submit = batch_job_manager.submit_tasks(job_id, num_images)
|
||||
|
||||
# now request_status moves from created to running
|
||||
job_status = get_job_status('running',
|
||||
(f'Submitted {num_images} images to cluster in {num_tasks} shards. '
|
||||
f'Number of shards failed to be submitted: {len(task_ids_failed_to_submit)}'))
|
||||
|
||||
# an extra field to allow the monitoring thread to restart after an API restart: total number of tasks
|
||||
job_status['num_tasks'] = num_tasks
|
||||
# also record the number of images to process for reporting
|
||||
job_status['num_images'] = num_images
|
||||
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
except Exception as e:
|
||||
job_status = get_job_status('problem', f'Please contact us. Error occurred while submitting the Batch job: {e}')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.error(f'server_job, create_batch_job, Error occurred while submitting the Batch job: {e}')
|
||||
return
|
||||
|
||||
# start the monitor thread with the same name
|
||||
try:
|
||||
thread = threading.Thread(
|
||||
target=monitor_batch_job,
|
||||
name=f'job_{job_id}',
|
||||
kwargs={
|
||||
'job_id': job_id,
|
||||
'num_tasks': num_tasks,
|
||||
'model_version': model_version,
|
||||
'job_name': job_name,
|
||||
'job_submission_timestamp': job_submission_timestamp
|
||||
}
|
||||
)
|
||||
thread.start()
|
||||
except Exception as e:
|
||||
job_status = get_job_status('problem', f'Error occurred while starting the monitoring thread: {e}')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.error(f'server_job, create_batch_job, Error occurred while starting the monitoring thread: {e}')
|
||||
return
|
||||
|
||||
|
||||
def monitor_batch_job(job_id: str,
|
||||
num_tasks: int,
|
||||
model_version: str,
|
||||
job_name: str,
|
||||
job_submission_timestamp: str):
|
||||
|
||||
job_status_table = JobStatusTable()
|
||||
batch_job_manager = BatchJobManager()
|
||||
|
||||
try:
|
||||
num_checks = 0
|
||||
|
||||
while True:
|
||||
time.sleep(api_config.MONITOR_PERIOD_MINUTES * 60)
|
||||
num_checks += 1
|
||||
|
||||
# both succeeded and failed tasks are marked "completed" on Batch
|
||||
num_tasks_succeeded, num_tasks_failed = batch_job_manager.get_num_completed_tasks(job_id)
|
||||
job_status = get_job_status('running',
|
||||
(f'Check number {num_checks}, '
|
||||
f'{num_tasks_succeeded} out of {num_tasks} shards have completed '
|
||||
f'successfully, {num_tasks_failed} shards have failed.'))
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.info(f'job_id {job_id}. '
|
||||
f'Check number {num_checks}, {num_tasks_succeeded} out of {num_tasks} shards completed, '
|
||||
f'{num_tasks_failed} shards failed.')
|
||||
|
||||
if (num_tasks_succeeded + num_tasks_failed) >= num_tasks:
|
||||
break
|
||||
|
||||
if num_checks > api_config.MAX_MONITOR_CYCLES:
|
||||
job_status = get_job_status('problem',
|
||||
(
|
||||
f'Job unfinished after {num_checks} x {api_config.MONITOR_PERIOD_MINUTES} minutes, '
|
||||
f'please contact us to retrieve the results. Number of succeeded shards: {num_tasks_succeeded}')
|
||||
)
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.warning(f'server_job, create_batch_job, MAX_MONITOR_CYCLES reached, ending thread')
|
||||
break # still aggregate the Tasks' outputs
|
||||
|
||||
except Exception as e:
|
||||
job_status = get_job_status('problem', f'Error occurred while monitoring the Batch job: {e}')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.error(f'server_job, create_batch_job, Error occurred while monitoring the Batch job: {e}')
|
||||
return
|
||||
|
||||
try:
|
||||
output_sas_url = aggregate_results(job_id, model_version, job_name, job_submission_timestamp)
|
||||
# preserving format from before, but SAS URL to 'failed_images' and 'images' are no longer provided
|
||||
# failures should be contained in the output entries, indicated by an 'error' field
|
||||
msg = {
|
||||
'num_failed_shards': num_tasks_failed,
|
||||
'output_file_urls': {
|
||||
'detections': output_sas_url
|
||||
}
|
||||
}
|
||||
job_status = get_job_status('completed', msg)
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
|
||||
except Exception as e:
|
||||
job_status = get_job_status('problem',
|
||||
f'Please contact us to retrieve the results. Error occurred while aggregating results: {e}')
|
||||
job_status_table.update_job_status(job_id, job_status)
|
||||
log.error(f'server_job, create_batch_job, Error occurred while aggregating results: {e}')
|
||||
return
|
||||
|
||||
|
||||
def aggregate_results(job_id: str,
|
||||
model_version: str,
|
||||
job_name: str,
|
||||
job_submission_timestamp: str) -> str:
|
||||
log.info(f'server_job, aggregate_results starting, job_id: {job_id}')
|
||||
|
||||
container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
|
||||
container=api_config.STORAGE_CONTAINER_API)
|
||||
# when people download this, the timestamp will have : replaced by _
|
||||
output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json'
|
||||
|
||||
with ContainerClient.from_container_url(container_url,
|
||||
credential=api_config.STORAGE_ACCOUNT_KEY) as container_client:
|
||||
# check if the result blob has already been written (could be another instance of the API / worker thread)
|
||||
# and if so, skip aggregating and uploading the results, and just generate the SAS URL, which
|
||||
# could be needed still if the previous request_status was `problem`.
|
||||
blob_client = container_client.get_blob_client(output_file_path)
|
||||
if blob_client.exists():
|
||||
log.warning(f'The output file already exists, likely because another monitoring thread already wrote it.')
|
||||
else:
|
||||
task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/'
|
||||
generator = container_client.list_blobs(name_starts_with=task_outputs_dir)
|
||||
|
||||
blobs = [i for i in generator if i.name.endswith('.json')]
|
||||
|
||||
all_results = []
|
||||
for blob_props in tqdm(blobs):
|
||||
with container_client.get_blob_client(blob_props) as blob_client:
|
||||
stream = io.BytesIO()
|
||||
blob_client.download_blob().readinto(stream)
|
||||
stream.seek(0)
|
||||
task_results = json.load(stream)
|
||||
all_results.extend(task_results)
|
||||
|
||||
api_output = {
|
||||
'info': {
|
||||
'detector': f'megadetector_v{model_version}',
|
||||
'detection_completion_time': get_utc_time(),
|
||||
'format_version': api_config.OUTPUT_FORMAT_VERSION
|
||||
},
|
||||
'detection_categories': api_config.DETECTOR_LABEL_MAP,
|
||||
'images': all_results
|
||||
}
|
||||
|
||||
# upload the output JSON to the Job folder
|
||||
api_output_as_bytes = bytes(json.dumps(api_output, ensure_ascii=False, indent=1), encoding='utf-8')
|
||||
_ = container_client.upload_blob(name=output_file_path, data=api_output_as_bytes)
|
||||
|
||||
output_sas = generate_blob_sas(
|
||||
account_name=api_config.STORAGE_ACCOUNT_NAME,
|
||||
container_name=api_config.STORAGE_CONTAINER_API,
|
||||
blob_name=output_file_path,
|
||||
account_key=api_config.STORAGE_ACCOUNT_KEY,
|
||||
permission=BlobSasPermissions(read=True, write=False),
|
||||
expiry=datetime.utcnow() + timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS)
|
||||
)
|
||||
output_sas_url = sas_blob_utils.build_azure_storage_uri(
|
||||
account=api_config.STORAGE_ACCOUNT_NAME,
|
||||
container=api_config.STORAGE_CONTAINER_API,
|
||||
blob=output_file_path,
|
||||
sas_token=output_sas
|
||||
)
|
||||
log.info(f'server_job, aggregate_results done, job_id: {job_id}')
|
||||
log.info(f'output_sas_url: {output_sas_url}')
|
||||
return output_sas_url
|
|
@ -1,92 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
Helper functions for the batch processing API.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from typing import Tuple, Any, Sequence, Optional
|
||||
|
||||
import sas_blob_utils # from ai4eutils
|
||||
|
||||
|
||||
log = logging.getLogger(os.environ['FLASK_APP'])
|
||||
|
||||
|
||||
#%% helper classes and functions
|
||||
|
||||
def make_error(error_code: int, error_message: str) -> Tuple[dict, int]:
|
||||
# TODO log exception when we have more telemetry
|
||||
log.error(f'Error {error_code} - {error_message}')
|
||||
return {'error': error_message}, error_code
|
||||
|
||||
|
||||
def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, str]]:
|
||||
"""
|
||||
Returns a tuple (error_code, msg) if not a usable SAS URL, else returns None
|
||||
"""
|
||||
# TODO check that the expiry date of input_container_sas is at least a month
|
||||
# into the future
|
||||
permissions = sas_blob_utils.get_permissions_from_uri(input_container_sas)
|
||||
data = sas_blob_utils.get_all_query_parts(input_container_sas)
|
||||
|
||||
msg = ('input_container_sas provided does not have both read and list '
|
||||
'permissions.')
|
||||
if 'read' not in permissions or 'list' not in permissions:
|
||||
if 'si' in data:
|
||||
# if no permission specified explicitly but has an access policy, assumes okay
|
||||
# TODO - check based on access policy as well
|
||||
return None
|
||||
|
||||
return 400, msg
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_utc_time() -> str:
|
||||
# return current UTC time as a string in the ISO 8601 format (so we can query by
|
||||
# timestamp in the Cosmos DB job status table.
|
||||
# example: '2021-02-08T20:02:05.699689Z'
|
||||
return datetime.utcnow().isoformat(timespec='microseconds') + 'Z'
|
||||
|
||||
|
||||
def get_job_status(request_status: str, message: Any) -> dict:
|
||||
return {
|
||||
'request_status': request_status,
|
||||
'message': message
|
||||
}
|
||||
|
||||
|
||||
def validate_provided_image_paths(image_paths: Sequence[Any]) -> Tuple[Optional[str], bool]:
|
||||
"""Given a list of image_paths (list length at least 1), validate them and
|
||||
determine if metadata is available.
|
||||
Args:
|
||||
image_paths: a list of string (image_id) or a list of 2-item lists
|
||||
([image_id, image_metadata])
|
||||
Returns:
|
||||
error: None if checks passed, otherwise a string error message
|
||||
metadata_available: bool, True if available
|
||||
"""
|
||||
# image_paths will have length at least 1, otherwise would have ended before this step
|
||||
first_item = image_paths[0]
|
||||
metadata_available = False
|
||||
if isinstance(first_item, str):
|
||||
for i in image_paths:
|
||||
if not isinstance(i, str):
|
||||
error = 'Not all items in image_paths are of type string.'
|
||||
return error, metadata_available
|
||||
return None, metadata_available
|
||||
elif isinstance(first_item, list):
|
||||
metadata_available = True
|
||||
for i in image_paths:
|
||||
if len(i) != 2: # i should be [image_id, metadata_string]
|
||||
error = ('Items in image_paths are lists, but not all lists '
|
||||
'are of length 2 [image locator, metadata].')
|
||||
return error, metadata_available
|
||||
return None, metadata_available
|
||||
else:
|
||||
error = 'image_paths contain items that are not strings nor lists.'
|
||||
return error, metadata_available
|
|
@ -1,63 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
# Get the credentials from KeyVault
|
||||
|
||||
# run `source start_batch_api.sh` to persist the credentials as env variables in the
|
||||
# current shell for easy debugging by launching the Flask app separately.
|
||||
|
||||
# need "-o tsv" for the Azure CLI queries to get rid of quote marks
|
||||
|
||||
SUBSCRIPTION=74d91980-e5b4-4fd9-adb6-263b8f90ec5b
|
||||
KEY_VAULT_NAME=cameratraps
|
||||
|
||||
|
||||
# A URL and a code to use for logging in on the browser will be displayed
|
||||
echo Log in to your Azure account via the CLI. You should be prompted to authenticate shortly...
|
||||
az login
|
||||
|
||||
|
||||
# service principal to authenticate with Azure Batch
|
||||
APP_TENANT_ID=$(az keyvault secret show --name batch-api-tenant-id --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
echo APP_TENANT_ID read from KeyVault
|
||||
export APP_TENANT_ID
|
||||
|
||||
APP_CLIENT_ID=$(az keyvault secret show --name batch-api-client-id --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
echo APP_CLIENT_ID read from KeyVault
|
||||
export APP_CLIENT_ID
|
||||
|
||||
APP_CLIENT_SECRET=$(az keyvault secret show --name batch-api-client-secret --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
echo APP_CLIENT_SECRET read from KeyVault
|
||||
export APP_CLIENT_SECRET
|
||||
|
||||
|
||||
# blob storage account with containers for scripts and job outputs
|
||||
export STORAGE_ACCOUNT_NAME=cameratrap
|
||||
|
||||
STORAGE_ACCOUNT_KEY=$(az keyvault secret show --name cameratrap-storage-account-key --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
echo STORAGE_ACCOUNT_KEY read from KeyVault
|
||||
export STORAGE_ACCOUNT_KEY
|
||||
|
||||
|
||||
# Azure Container Registry - Azure Batch gets the Docker image from here
|
||||
export REGISTRY_SERVER=***REMOVED***.azurecr.io
|
||||
|
||||
REGISTRY_PASSWORD=$(az keyvault secret show --name registry-password --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
echo REGISTRY_PASSWORD read from KeyVault
|
||||
export REGISTRY_PASSWORD
|
||||
|
||||
|
||||
# App Configuration
|
||||
APP_CONFIG_CONNECTION_STR=$(az keyvault secret show --name camera-trap-app-config-connection-str --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
echo APP_CONFIG_CONNECTION_STR read from KeyVault
|
||||
export APP_CONFIG_CONNECTION_STR
|
||||
|
||||
|
||||
# Cosmos DB for job status tracking
|
||||
COSMOS_ENDPOINT=$(az keyvault secret show --name cosmos-db-endpoint --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
export COSMOS_ENDPOINT
|
||||
COSMOS_WRITE_KEY=$(az keyvault secret show --name cosmos-db-read-write-key --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
export COSMOS_WRITE_KEY
|
||||
echo COSMOS_ENDPOINT and COSMOS_WRITE_KEY read from KeyVault
|
|
@ -1,47 +0,0 @@
|
|||
#
|
||||
# If a request has been sent to AML for batch scoring but the monitoring thread of the API was
|
||||
# interrupted (uncaught exception or having to re-start the API container), we could manually
|
||||
# aggregate results from each shard using this script, assuming all jobs submitted to AML have finished.
|
||||
#
|
||||
# Need to have set environment variables STORAGE_ACCOUNT_NAME and STORAGE_ACCOUNT_KEY to those of the
|
||||
# storage account backing the API. Also need to adjust the INTERNAL_CONTAINER, AML_CONTAINER and
|
||||
# AML_CONFIG fields in api_core/orchestrator_api/api_config.py to match the instance of the API that this
|
||||
# request was submitted to.
|
||||
#
|
||||
# May need to change the import statement in api_core/orchestrator_api/orchestrator.py
|
||||
# "from sas_blob_utils import SasBlob" to
|
||||
# "from .sas_blob_utils import SasBlob" to not confuse with the module in AI4Eutils;
|
||||
# and change "import api_config" to
|
||||
# "from api.batch_processing.api_core.orchestrator_api import api_config"
|
||||
|
||||
# Execute this script from the root of the repository. You may need to add the repository to PYTHONPATH.
|
||||
|
||||
import argparse
|
||||
import json
|
||||
|
||||
from api.batch_processing.api_core.orchestrator_api.orchestrator import AMLMonitor
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('shortened_request_id', type=str,
|
||||
help='the request ID to restart monitoring')
|
||||
parser.add_argument('model_version', type=str, help='version of megadetector used; this is used to fill in the meta info section of the output file')
|
||||
parser.add_argument('request_name', type=str, help='easy to remember name for that job, optional', default='')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# list_jobs_submitted cannot be serialized ("can't pickle _thread.RLock objects "), but
|
||||
# do not need it for aggregating results
|
||||
aml_monitor = AMLMonitor(request_id=args.request_id,
|
||||
list_jobs_submitted=None,
|
||||
request_name=args.request_name,
|
||||
request_submission_timestamp='',
|
||||
model_version=args.model_version)
|
||||
output_file_urls = aml_monitor.aggregate_results()
|
||||
output_file_urls_str = json.dumps(output_file_urls)
|
||||
print(output_file_urls_str)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,383 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from IPython.core.interactiveshell import InteractiveShell\n",
|
||||
"InteractiveShell.ast_node_interactivity = 'all' # default is ‘last_expr’\n",
|
||||
"\n",
|
||||
"%load_ext autoreload\n",
|
||||
"%autoreload 2"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'10.0.0'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import azure.batch\n",
|
||||
"azure.batch.__version__"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from azure.batch import BatchServiceClient\n",
|
||||
"from azure.batch.models import *\n",
|
||||
"from azure.common.credentials import ServicePrincipalCredentials"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Set up an instance of the batch processing API\n",
|
||||
"\n",
|
||||
"We create one Azure Batch Pool for each instance of the batch processing API.\n",
|
||||
"\n",
|
||||
"The limit for the number of Pools in our Batch account is 100."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 1: Create an Azure Batch Pool"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 23,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# MODIFY THIS CELL\n",
|
||||
"\n",
|
||||
"# POOL_ID should start with the name of the API instance this pool will be used for\n",
|
||||
"\n",
|
||||
"POOL_ID = 'internal_1'\n",
|
||||
"assert len(POOL_ID) <= 64, 'pool_id has more than 64 characters'\n",
|
||||
"\n",
|
||||
"# choose the account in East US or South Central US\n",
|
||||
"BATCH_ACCOUNT_URL = 'https://cameratrapssc.southcentralus.batch.azure.com' "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# secrets read from environment variables\n",
|
||||
"REGISTRY_PASSWORD = os.environ['REGISTRY_PASSWORD']\n",
|
||||
"STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY']\n",
|
||||
"\n",
|
||||
"# authenticate with Batch account using the service principle \"camera-trap-async-api\" in our AAD\n",
|
||||
"APP_CLIENT_ID = os.environ['APP_CLIENT_ID']\n",
|
||||
"APP_CLIENT_SECRET = os.environ['APP_CLIENT_SECRET']\n",
|
||||
"APP_TENANT_ID = os.environ['APP_TENANT_ID']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# other configuration info\n",
|
||||
"\n",
|
||||
"# Docker image\n",
|
||||
"REGISTRY_SERVER = '***REMOVED***.azurecr.io'\n",
|
||||
"REGISTRY_USERNAME = REGISTRY_SERVER.split('.')[0]\n",
|
||||
"\n",
|
||||
"CONTAINER_IMAGE_NAME = '***REMOVED***.azurecr.io/tensorflow:1.14.0-gpu-py3' # login server/repository:tag\n",
|
||||
"\n",
|
||||
"# storage\n",
|
||||
"STORAGE_ACCOUNT_NAME = 'cameratrap' # in the engineering subscription\n",
|
||||
"\n",
|
||||
"# names of two containers supporting the API instances in the above storage account\n",
|
||||
"STORAGE_CONTAINER_MODELS = 'models'\n",
|
||||
"STORAGE_CONTAINER_API = 'batch-api'\n",
|
||||
"\n",
|
||||
"# Azure Batch node pool VM type\n",
|
||||
"POOL_VM_SIZE = 'Standard_NC6s_v3' # https://docs.microsoft.com/en-us/azure/virtual-machines/ncv3-series\n",
|
||||
"\n",
|
||||
"# auto-scale formula - can be set manually in Azure portal\n",
|
||||
"# last statement makes sure that nodes aren't removed until their tasks are finished\n",
|
||||
"# docs: https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling\n",
|
||||
"\n",
|
||||
"# MODIFY the \"cappedPoolSize\" if it should be other than 16 dedicated nodes\n",
|
||||
"POOL_AUTO_SCALE_FORMULA = \"\"\"\n",
|
||||
"// In this formula, the pool size is adjusted based on the number of tasks in the queue. \n",
|
||||
"// Note that both comments and line breaks are acceptable in formula strings.\n",
|
||||
"\n",
|
||||
"// Get pending tasks for the past 15 minutes.\n",
|
||||
"$samples = $ActiveTasks.GetSamplePercent(TimeInterval_Minute * 15);\n",
|
||||
"\n",
|
||||
"// If we have fewer than 70 percent data points, we use the last sample point, otherwise we use the maximum of last sample point and the history average.\n",
|
||||
"$tasks = $samples < 70 ? max(0, $ActiveTasks.GetSample(1)) : \n",
|
||||
"max( $ActiveTasks.GetSample(1), avg($ActiveTasks.GetSample(TimeInterval_Minute * 15)));\n",
|
||||
"\n",
|
||||
"// If number of pending tasks is not 0, set targetVM to pending tasks, otherwise set to 0, since there is usually long intervals between job submissions.\n",
|
||||
"$targetVMs = $tasks > 0 ? $tasks : 0;\n",
|
||||
"\n",
|
||||
"// The pool size is capped at 16, if target VM value is more than that, set it to 16.\n",
|
||||
"cappedPoolSize = 16;\n",
|
||||
"$TargetDedicatedNodes = max(0, min($targetVMs, cappedPoolSize));\n",
|
||||
"\n",
|
||||
"// Set node deallocation mode - keep nodes active only until tasks finish\n",
|
||||
"$NodeDeallocationOption = taskcompletion;\n",
|
||||
"\"\"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def print_batch_exception(batch_exception):\n",
|
||||
" \"\"\"\n",
|
||||
" Prints the contents of the specified Batch exception.\n",
|
||||
" \"\"\"\n",
|
||||
" print('-------------------------------------------')\n",
|
||||
" print('Exception encountered:')\n",
|
||||
" if batch_exception.error and \\\n",
|
||||
" batch_exception.error.message and \\\n",
|
||||
" batch_exception.error.message.value:\n",
|
||||
" print(batch_exception.error.message.value)\n",
|
||||
" if batch_exception.error.values:\n",
|
||||
" print()\n",
|
||||
" for msg in batch_exception.error.values:\n",
|
||||
" print(f'{msg.key}:\\t{msg.value}')\n",
|
||||
" print('-------------------------------------------')\n",
|
||||
"\n",
|
||||
"def create_pool(batch_service_client, pool_id):\n",
|
||||
" \"\"\"\n",
|
||||
" Create a pool with pool_id and the Docker image specified by constants in above cells\n",
|
||||
" \"\"\"\n",
|
||||
" # we have to use VM images supporting GPU access *and* Docker\n",
|
||||
" # this VM image will run our custom container\n",
|
||||
" image_ref = ImageReference(\n",
|
||||
" publisher='microsoft-azure-batch',\n",
|
||||
" offer='ubuntu-server-container',\n",
|
||||
" sku='20-04-lts',\n",
|
||||
" version='latest' # URN: microsoft-azure-batch:ubuntu-server-container:16-04-lts:1.1.0\n",
|
||||
" # The Azure Batch container image only accepts 'latest' version\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # specify a container registry from which to pull the custom container\n",
|
||||
" # see the `batch_service` folder on instructions for building the container image\n",
|
||||
" container_registry = ContainerRegistry(\n",
|
||||
" registry_server=REGISTRY_SERVER,\n",
|
||||
" user_name=REGISTRY_USERNAME,\n",
|
||||
" password=REGISTRY_PASSWORD\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" container_conf = ContainerConfiguration(\n",
|
||||
" container_image_names = [CONTAINER_IMAGE_NAME],\n",
|
||||
" container_registries =[container_registry]\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" vm_config = VirtualMachineConfiguration(\n",
|
||||
" image_reference=image_ref,\n",
|
||||
" container_configuration=container_conf,\n",
|
||||
" node_agent_sku_id='batch.node.ubuntu 20.04'\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # mount the `models` and the `batch-api` blob containers\n",
|
||||
" container_models = MountConfiguration(\n",
|
||||
" azure_blob_file_system_configuration=AzureBlobFileSystemConfiguration(\n",
|
||||
" account_name=STORAGE_ACCOUNT_NAME,\n",
|
||||
" container_name=STORAGE_CONTAINER_MODELS,\n",
|
||||
" relative_mount_path=STORAGE_CONTAINER_MODELS, # use container name as relative path\n",
|
||||
" account_key=STORAGE_ACCOUNT_KEY,\n",
|
||||
" blobfuse_options='-o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 -o allow_other'\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" container_batch_api = MountConfiguration(\n",
|
||||
" azure_blob_file_system_configuration=AzureBlobFileSystemConfiguration(\n",
|
||||
" account_name=STORAGE_ACCOUNT_NAME,\n",
|
||||
" container_name=STORAGE_CONTAINER_API,\n",
|
||||
" relative_mount_path=STORAGE_CONTAINER_API, # use container name as relative path\n",
|
||||
" account_key=STORAGE_ACCOUNT_KEY,\n",
|
||||
" # allow_other needs to be flagged - task running inside container needs to access this blob container\n",
|
||||
" blobfuse_options='-o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 -o allow_other'\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" new_pool = PoolAddParameter(\n",
|
||||
" id=POOL_ID,\n",
|
||||
" display_name=POOL_ID,\n",
|
||||
"\n",
|
||||
" vm_size=POOL_VM_SIZE,\n",
|
||||
" \n",
|
||||
" enable_auto_scale=True,\n",
|
||||
" auto_scale_formula=POOL_AUTO_SCALE_FORMULA,\n",
|
||||
"\n",
|
||||
" virtual_machine_configuration=vm_config,\n",
|
||||
"\n",
|
||||
" # default is 1; each task occupies the entire GPU so we can only run one task at a time on a node\n",
|
||||
" task_slots_per_node=1,\n",
|
||||
"\n",
|
||||
" mount_configuration=[container_models, container_batch_api],\n",
|
||||
" )\n",
|
||||
" batch_service_client.pool.add(new_pool)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"credentials = ServicePrincipalCredentials(\n",
|
||||
" client_id=APP_CLIENT_ID,\n",
|
||||
" secret=APP_CLIENT_SECRET,\n",
|
||||
" tenant=APP_TENANT_ID,\n",
|
||||
" resource='https://batch.core.windows.net/'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# if using the Batch quota system, use https://docs.microsoft.com/en-us/python/api/azure-batch/azure.batch.batch_auth.sharedkeycredentials?view=azure-python\n",
|
||||
"# to authenticate instead of the service principal is also okay.\n",
|
||||
"\n",
|
||||
"batch_client = BatchServiceClient(credentials=credentials, batch_url=BATCH_ACCOUNT_URL)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"CPU times: user 29 ms, sys: 3.54 ms, total: 32.5 ms\n",
|
||||
"Wall time: 1.01 s\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%time\n",
|
||||
"# pool creation should finish in about a minute\n",
|
||||
"\n",
|
||||
"try:\n",
|
||||
" create_pool(batch_client, POOL_ID)\n",
|
||||
"except BatchErrorException as e:\n",
|
||||
" print_batch_exception(e)\n",
|
||||
" raise"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Step 2: Upload the scoring script\n",
|
||||
"\n",
|
||||
"Note that all instances share this scoring script!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# MODIFY THIS CELL\n",
|
||||
"\n",
|
||||
"# path to the scoring script; modify if cwd is not `api_core`\n",
|
||||
"path_scoring_script = 'batch_service/score.py'\n",
|
||||
"\n",
|
||||
"# SAS with write permission for uploading output JSONs\n",
|
||||
"sas_query_str = '' # get a write-enabled SAS for the container below\n",
|
||||
"\n",
|
||||
"output_container_url = f'https://cameratrap.blob.core.windows.net/batch-api{sas_query_str}'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# upload the scoring script to the container above; Batch Tasks will retrieve the script from there\n",
|
||||
"\n",
|
||||
"output_container_client = ContainerClient.from_container_url(output_container_url)\n",
|
||||
"\n",
|
||||
"with open(path_scoring_script, 'rb') as f:\n",
|
||||
" script_blob_client = output_container_client.upload_blob(name='scripts/score.py', data=f, overwrite=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Useful CLI commands for using Docker images with Batch\n",
|
||||
"\n",
|
||||
"List all Batch supported images with their \"capabilities\" (e.g. \"DockerCompatible\", \"NvidiaTeslaDriverInstalled\"):\n",
|
||||
"```\n",
|
||||
"az batch pool supported-images list\n",
|
||||
"```\n",
|
||||
"with the pool information provided in additional parameters.\n",
|
||||
"\n",
|
||||
"Listing all versions of a SKU of image:\n",
|
||||
"```\n",
|
||||
"az vm image list --all --publisher microsoft-dsvm\n",
|
||||
"```\n",
|
||||
"\n",
|
||||
"You may need to accept the terms of an image:\n",
|
||||
"```\n",
|
||||
"az vm image list --all --publisher <publisher>\n",
|
||||
"```\n",
|
||||
"to find the URN for the image you want to use, followed by:\n",
|
||||
"\n",
|
||||
"```\n",
|
||||
"az vm image terms accept --urn <corresponding-urn>\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python [conda env:cameratraps-batch-api]",
|
||||
"language": "python",
|
||||
"name": "conda-env-cameratraps-batch-api-py"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Copyright (c) Microsoft Corporation.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
# Get the credentials from KeyVault and run summarize_daily_activity.py
|
||||
|
||||
SUBSCRIPTION=74d91980-e5b4-4fd9-adb6-263b8f90ec5b
|
||||
KEY_VAULT_NAME=cameratraps
|
||||
|
||||
|
||||
# A URL and a code to use for logging in on the browser will be displayed
|
||||
echo Log in to your Azure account via the CLI. You should be prompted to authenticate shortly...
|
||||
az login
|
||||
|
||||
|
||||
# Cosmos DB for job status checking
|
||||
COSMOS_ENDPOINT=$(az keyvault secret show --name cosmos-db-endpoint --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
export COSMOS_ENDPOINT
|
||||
COSMOS_READ_KEY=$(az keyvault secret show --name cosmos-db-read-only-key --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
export COSMOS_READ_KEY
|
||||
echo COSMOS_ENDPOINT and COSMOS_READ_KEY read from KeyVault
|
||||
|
||||
|
||||
# Teams webhook
|
||||
TEAMS_WEBHOOK=$(az keyvault secret show --name teams-webhook-cicd --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
|
||||
export TEAMS_WEBHOOK
|
||||
echo TEAMS_WEBHOOK read from KeyVault
|
||||
|
||||
|
||||
python summarize_daily_activity.py
|
|
@ -1,153 +0,0 @@
|
|||
# Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
# Licensed under the MIT License.
|
||||
|
||||
"""
|
||||
This script can be run in a separate process to monitor all instances of the batch API.
|
||||
It sends a digest of submissions within the past day to a Teams channel webhook.
|
||||
|
||||
It requires the environment variables TEAMS_WEBHOOK, COSMOS_ENDPOINT and COSMOS_READ_KEY to be set.
|
||||
"""
|
||||
|
||||
import time
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from collections import defaultdict
|
||||
|
||||
import requests
|
||||
from azure.cosmos.cosmos_client import CosmosClient
|
||||
|
||||
|
||||
# Cosmos DB `batch-api-jobs` table for job status
|
||||
COSMOS_ENDPOINT = os.environ['COSMOS_ENDPOINT']
|
||||
COSMOS_READ_KEY = os.environ['COSMOS_READ_KEY']
|
||||
|
||||
TEAMS_WEBHOOK = os.environ['TEAMS_WEBHOOK']
|
||||
|
||||
|
||||
def send_message():
|
||||
cosmos_client = CosmosClient(COSMOS_ENDPOINT, credential=COSMOS_READ_KEY)
|
||||
db_client = cosmos_client.get_database_client('camera-trap')
|
||||
db_jobs_client = db_client.get_container_client('batch_api_jobs')
|
||||
|
||||
yesterday = datetime.now(timezone.utc).date() - timedelta(days=1)
|
||||
|
||||
query = f'''
|
||||
SELECT *
|
||||
FROM job
|
||||
WHERE job.job_submission_time >= "{yesterday.isoformat()}T00:00:00Z"
|
||||
'''
|
||||
|
||||
result_iterable = db_jobs_client.query_items(query=query,
|
||||
enable_cross_partition_query=True)
|
||||
|
||||
# aggregate the number of images, country and organization names info from each job
|
||||
# submitted during yesterday (UTC time)
|
||||
instance_num_images = defaultdict(lambda: defaultdict(int))
|
||||
instance_countries = defaultdict(set)
|
||||
instance_orgs = defaultdict(set)
|
||||
|
||||
total_images_received = 0
|
||||
|
||||
for job in result_iterable:
|
||||
api_instance = job['api_instance']
|
||||
status = job['status']
|
||||
call_params = job['call_params']
|
||||
|
||||
if status['request_status'] == 'completed':
|
||||
instance_num_images[api_instance]['num_images_completed'] += status.get('num_images', 0)
|
||||
instance_num_images[api_instance]['num_images_total'] += status.get('num_images', 0)
|
||||
total_images_received += status.get('num_images', 0)
|
||||
|
||||
instance_countries[api_instance].add(call_params.get('country', 'unknown'))
|
||||
instance_orgs[api_instance].add(call_params.get('organization_name', 'unknown'))
|
||||
|
||||
print(f'send_message, number of images received yesterday: {total_images_received}')
|
||||
|
||||
if total_images_received < 1:
|
||||
print('send_message, no images submitted yesterday, not sending a summary')
|
||||
print('')
|
||||
return
|
||||
|
||||
# create the card
|
||||
sections = []
|
||||
|
||||
for instance_name, num_images in instance_num_images.items():
|
||||
entry = {
|
||||
'activityTitle': f'API instance: {instance_name}',
|
||||
'facts': [
|
||||
{
|
||||
'name': 'Total images',
|
||||
'value': '{:,}'.format(num_images['num_images_total'])
|
||||
},
|
||||
{
|
||||
'name': 'Images completed',
|
||||
'value': '{:,}'.format(num_images['num_images_completed'])
|
||||
},
|
||||
{
|
||||
'name': 'Countries',
|
||||
'value': ', '.join(sorted(list(instance_countries[instance_name])))
|
||||
},
|
||||
{
|
||||
'name': 'Organizations',
|
||||
'value': ', '.join(sorted(list(instance_orgs[instance_name])))
|
||||
}
|
||||
]
|
||||
}
|
||||
sections.append(entry)
|
||||
|
||||
card = {
|
||||
'@type': 'MessageCard',
|
||||
'@context': 'http://schema.org/extensions',
|
||||
'themeColor': 'ffcdb2',
|
||||
'summary': 'Digest of batch API activities over the past 24 hours',
|
||||
'title': f'Camera traps batch API activities on {yesterday.strftime("%b %d, %Y")}',
|
||||
'sections': sections,
|
||||
'potentialAction': [
|
||||
{
|
||||
'@type': 'OpenUri',
|
||||
'name': 'View Batch account in Azure Portal',
|
||||
'targets': [
|
||||
{
|
||||
'os': 'default',
|
||||
'uri': 'https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/74d91980-e5b4-4fd9-adb6-263b8f90ec5b/resourcegroups/camera_trap_api_rg/providers/Microsoft.Batch/batchAccounts/cameratrapssc/accountOverview'
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(TEAMS_WEBHOOK, data=json.dumps(card))
|
||||
print(f'send_message, card to send:')
|
||||
print(json.dumps(card, indent=4))
|
||||
print(f'send_message, sent summary to webhook, response code: {response.status_code}')
|
||||
print('')
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Wake up at 5 minutes past midnight UTC to send a summary of yesterday's activities if there were any.
|
||||
Then goes in a loop to wake up and send a summary every 24 hours.
|
||||
"""
|
||||
current = datetime.utcnow()
|
||||
future = current.replace(day=current.day, hour=0, minute=5, second=0, microsecond=0) + timedelta(
|
||||
days=1) # current has been modified
|
||||
|
||||
current = datetime.utcnow()
|
||||
duration = future - current
|
||||
|
||||
duration_hours = duration.seconds / (60 * 60)
|
||||
print(f'Current time: {current}')
|
||||
print(f'Will wake up at {future}, in {duration_hours} hours')
|
||||
print('')
|
||||
|
||||
time.sleep(duration.seconds)
|
||||
|
||||
while True:
|
||||
print(f'Woke up at {datetime.utcnow()}')
|
||||
send_message()
|
||||
time.sleep(24 * 60 * 60)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,159 +0,0 @@
|
|||
# Managing camera trap API tasks
|
||||
|
||||
## Overview
|
||||
|
||||
This document describes the process for running partner data through our <a href="https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing/">MegaDetector Batch Processing API</a>. It assumes that the target data is in a single blob container to which we have a read-write SAS token.
|
||||
|
||||
The requirement for write permissions is only used to write intermediate files and API output, so it would be a very small set of code changes to handle the case where we have read-only access to the source container, but for now, we're assuming r/w access.
|
||||
|
||||
The major steps covered here are:
|
||||
|
||||
* Enumerating the files that need processing and generating API input
|
||||
* Calling the API, including any necessary resubmissions due to failed shards
|
||||
* Postprocessing to generate preview files
|
||||
* Repeat detection elimination
|
||||
|
||||
Repeat detection elimination is a manual step that we do in ~30% of cases, and we typically tune the process so this step takes around 20 minutes of hands-on time. Without this, this whole process should take around five minutes of hands-on time, plus the time required to run the task (which can be anywhere from minutes to days). I know this looks like a lot of steps, but once you get the hang of it, it's really fast. If it's your third time doing this and you find that it's taking more than five minutes of human intervention time – including generating SAS tokens and uploading results for preview – email cameratraps@lila.science to let us know!
|
||||
|
||||
This document is written 98% for internal use, so you will see some instructions that only make sense internally (like "ask Dan to create a password for blah blah"). But if you find it useful externally, let us know!
|
||||
|
||||
|
||||
## Magic strings you need before following this guide
|
||||
|
||||
* Storage account and container name for the data container
|
||||
* API endpoint URL and required "caller" token... for this document, we'll use "blah.endpoint.com" and "caller", respectively.
|
||||
* Read-only and read-write SAS tokens for the data container... for this document, we'll use "?st=sas_token"
|
||||
* Credentials for the VM where we host previews and output data... for this document, we'll use "datavm.com".
|
||||
* A password for the specific folder you will post the results to on that VM
|
||||
* Possibly a set of specific folders to process as separate tasks within the target container
|
||||
|
||||
|
||||
## Setting up your environment (one time only)
|
||||
|
||||
* Unless otherwise stated, you will want to work on a VM in South Central US. You will not be moving substantial volumes of images, so it's OK to work outside of Azure, but a few steps will be slightly faster with low-latency access. These instructions will also assume you have a graphical/interactive IDE (Spyder, PyCharm, or VS Code) and that you can run a browser on the same machine where you're running Python.
|
||||
|
||||
* Probably install <a href="https://www.postman.com/">Postman</a> for task submission
|
||||
|
||||
* If you're working on Windows, probably install <a href="https://www.irfanview.com/">IrfanView</a> for repeat detection elimination (the semi-automated step that will require you to look at lots of images).
|
||||
|
||||
* If you're working on Windows, probably install <a href="https://www.bitvise.com/">Bitvise</a> for SCP'ing the results to our Web server VM
|
||||
|
||||
* Clone the following repos, and be on master/latest on both:
|
||||
* <a href="https://github.com/ecologize/CameraTraps">github.com/ecologize/CameraTraps</a>
|
||||
* <a href="https://github.com/microsoft/ai4eutils">github.com/microsoft/ai4eutils</a>
|
||||
|
||||
* Put the roots of both of the above repos on your PYTHONPATH; see <a href="https://github.com/ecologize/CameraTraps/#other-notes">instructions on the CameraTraps repo</a> re: setting your PYTHONPATH.
|
||||
|
||||
* If you're into using conda environments, cd to the root of the CameraTraps repo and run:
|
||||
|
||||
`conda env create --file environment-api-task-management.yml`
|
||||
|
||||
|
||||
## Stuff you do for each task
|
||||
|
||||
### Forking the template script
|
||||
|
||||
* Make a copy of <a href="https://github.com/ecologize/CameraTraps/blob/master/api/batch_processing/data_preparation/manage_api_submission.py">manage_api_submission.py</a>, <i>outside</i> of the CameraTraps repo. You may or may not end up with credentials in this file, so your working copy should <i>not be on GitHub</i>. Name this file as `organization-YYYYMMDD.py`.
|
||||
|
||||
* Fill in all the constants in the "constants I set per task" cell. Specifically:
|
||||
|
||||
* storage_account_name
|
||||
* container_name
|
||||
* task_set_name, formatted as `organization-YYYYMMDD` (same as the file name)
|
||||
* base_output_folder_name (this is a local folder... I recommend maintaining a local folder like c:\camera_trap_tasks and putting all task data in subfolders named according to the organization, e.g. c:\camera_trap_tasks\university_of_arendelle, but this isn't critical)
|
||||
* read_only_sas_token
|
||||
* read_write_sas_token
|
||||
* caller
|
||||
* endpoint_base
|
||||
|
||||
If applicable (but usually not applicable):
|
||||
|
||||
* container_prefix (restricts image enumeration to specific prefixes in the source container)
|
||||
* folder_names (splits the overall task up into multiple sub-tasks, typically corresponding to folders that are meaningful to the organization, e.g. "Summer_2018")
|
||||
* additional_task_args (typically used to specify a model version)
|
||||
|
||||
|
||||
### Preparing the task(s)
|
||||
|
||||
I use this file like a notebook, typically running all cells interactively. The cell notation in this file is friendly to Spyder, VS Code, and PyCharm (professional). To prepare the task, run all the cells through "generate API calls for each task".
|
||||
|
||||
At this point, the json-formatted API string for all tasks (typically just one, unless you used the "folder_names" feature to create multiple tasks), and you're ready to submit.
|
||||
|
||||
|
||||
### Submitting the task(s)
|
||||
|
||||
The next cell is called "run the tasks", and though it doesn't actually work, I don't recommend programmatic submission anyway. You are about to spin up sixteen expensive and power-hungry GPUs, and IMO it's better to do this manually so you can triple-quadruple check that you really want to start a task. I do this through Postman; see <a href="https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing#other-notes-and-example">here</a> for an example. If you are running multiple tasks, you should run them separately in Postman.
|
||||
|
||||
You will get back a task ID for each task, enter these in the "manually define task groups" cell in the format indicated in the template code. A "task group" is a logical task; the reason we use a <i>list</i> of task IDs for each task group is that (1) we split tasks over 1M images into multiple tasks, and (2) sometimes shards fail and we resubmit some images later as part of the same task, so we will extend those lists as necessary.
|
||||
|
||||
I then typically run the "estimate total time" cell. For very small tasks, this isn't meaningful, since it doesn't include spin-up time. This tells me when I should check back again. I then typically run the "status check" cell to confirm the task is in progress.
|
||||
|
||||
|
||||
### Time passes...
|
||||
|
||||
Do other work, watch Netflix (Last Kingdom Season 4 just came out!), go to bed, wake up...
|
||||
|
||||
When you're back, run the "status check" cell again, and if it doesn't show "completed", wait longer. If it's been suspiciously long, check in with us.
|
||||
|
||||
|
||||
### Check for failures
|
||||
|
||||
Run the "look for failed shards" cell. Most of the time it will say "no resubmissions necessary". If it shows some required resubmissions, look carefully at the "missing images" printout. If it's actually just a small number (but still slightly larger than the `max_tolerable_missing_images` constant, otherwise you wouldn't get this printout), consider just raising the `max_tolerable_missing_images` constant. This is subjective and project-specific.
|
||||
|
||||
If you do have to resubmit tasks, the API calls will be in your console. Run them, and see the "Resubmit tasks for failed shards" cell, where you need to add the task IDs for the resubmissions to the appropriate task groups.
|
||||
|
||||
Theoretically you could have to do all this again if your resubmissions fail, thinking through this is outside the scope of this README. I've never had this happen.
|
||||
|
||||
|
||||
### Post-processing
|
||||
|
||||
Run the next two cells, which should uneventfully pull results and combine results from resubmitted tasks into single .json files.
|
||||
|
||||
Now the excitement starts again with the "post-processing" cell: running this will take a minute or two, and browser tabs should open with previews for each task. I typically decide two things here, both subjective:
|
||||
|
||||
1. Do we need to adjust the confidence threshold from the 80% default?
|
||||
|
||||
2. Do we need to do the repeat detection elimination step?
|
||||
|
||||
The latter isn't just about the results; it's about the priority of the task, the time available, the degree to which the collaborator can do this on their own, etc. Guidance for these two decisions is beyond the scope of this document.
|
||||
|
||||
|
||||
### Repeat detection elimination (not typically necessary)
|
||||
|
||||
Before reading this, I recommend skimming the <a href="https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination">public documentation on the repeat detection elimination (RDE) process</a>, to get a sense of what it's about.
|
||||
|
||||
OK, you're back... I run RDE in the following steps:
|
||||
|
||||
1. Run the "repeat detection elimination, phase 1" cell.
|
||||
|
||||
2. Before actually starting the manual step, get a rough sense of how many images you have in the target folder. If it's more than you have time to deal with (I typically aim for no more than ~2k), adjust parameters and re-run this cell. Also if you see quickly that there are lots of actual true positives with boxes in the output folder (i.e., lots of animals that were just sitting really still), you'll also want Parameter adjustment is also beyond the scope of this document, we'll update in the future with examples for when you might adjust each parameter.
|
||||
|
||||
3. Now you're ready to do the manual step, i.e. deleting all the images in the RDE folder with boxes that contain animals. Reminder: it's fine if the <i>image</i> contains an animal, we're deleting images where you see <i>boxes</i> that contain animals. <i>Not</i> deleting an image is equivalent to marking it as a false positive in this process, so if you're unsure, it's always safer to delete the image from the RDE folder, which will leave the image in the final output set. There's rarely harm in deleting a few too many from the RDE folder.
|
||||
|
||||
4. For this step, I strongly recommend <a href="https://www.irfanview.com/">IrfanView</a>. I keep one hand on the "page-down" key and one hand on the "delete" key, and I can blast through several images a second this way.
|
||||
|
||||
5. OK, you're back, and you just looked at a lot of images with boxes on trees and other annoying stuff. Now run the "post-processing (post-RDE)" cell to generate a new HTML preview. You should see that the number of detections is lower than in the preview you generated earlier, since you just got rid of a bunch of detections.
|
||||
|
||||
|
||||
### Uploading previews to our Web server
|
||||
|
||||
For now, ask Dan to create a login and associated folder on our Web server. If the organization associated with this task is called "university_of_arendelle", Dan will create a folder at `/datadrive/html/data/university_of_arendelle`. You should copy (with SCP) (I use <a href="https://www.bitvise.com/">Bitvise</a>) the postprocessing folder(s) there, e.g. if your output base was:
|
||||
|
||||
`g:\university_of_arendelle`
|
||||
|
||||
...and your task set name was:
|
||||
|
||||
`university_of_arendelle-20200409`
|
||||
|
||||
You will copy a folder that looks like:
|
||||
|
||||
`g:\university_of_arendelle\university_of_arendelle-20200409\postprocessing\university_of_arendelle-20200409_0.800`
|
||||
|
||||
This will be externally visible (though password-protected) at:
|
||||
|
||||
`http://datavm.com/data/university_of_arendelle/university_of_arendelle-20200409_0.800`
|
||||
|
||||
|
||||
### Uploading results to our file share
|
||||
|
||||
The .json results files - including the results before and after repeat detection elimination, if applicable - are generally uploaded to our AI for Earth file share when anything somewhat stable is uploaded to the Web server. This is just a placeholder to add instructions later. Note to self: we generally zip .json files if they're larger than ~50MB.
|
|
@ -1,167 +0,0 @@
|
|||
#
|
||||
# manage_video_batch.py
|
||||
#
|
||||
# Notebook-esque script to manage the process of running a local batch of videos
|
||||
# through MD. Defers most of the heavy lifting to manage_local_batch.py .
|
||||
#
|
||||
|
||||
#%% Imports and constants
|
||||
|
||||
import path_utils
|
||||
import os
|
||||
from detection import video_utils
|
||||
|
||||
input_folder = '/datadrive/data'
|
||||
output_folder_base = '/datadrive/frames'
|
||||
|
||||
assert os.path.isdir(input_folder)
|
||||
os.makedirs(output_folder_base,exist_ok=True)
|
||||
|
||||
|
||||
#%% Split videos into frames
|
||||
|
||||
assert os.path.isdir(input_folder)
|
||||
os.makedirs(output_folder_base,exist_ok=True)
|
||||
|
||||
recursive = True
|
||||
overwrite = True
|
||||
n_threads = 5
|
||||
every_n_frames = 10
|
||||
|
||||
frame_filenames_by_video,fs_by_video,video_filenames = \
|
||||
video_utils.video_folder_to_frames(input_folder=input_folder,
|
||||
output_folder_base=output_folder_base,
|
||||
recursive=recursive,
|
||||
overwrite=overwrite,
|
||||
n_threads=n_threads,
|
||||
every_n_frames=every_n_frames)
|
||||
|
||||
|
||||
#%% List frame files, break into folders
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
frame_files = path_utils.find_images(output_folder_base,True)
|
||||
frame_files = [s.replace('\\','/') for s in frame_files]
|
||||
print('Enumerated {} total frames'.format(len(frame_files)))
|
||||
|
||||
# Find unique (relative) folders
|
||||
folder_to_frame_files = defaultdict(list)
|
||||
|
||||
# fn = frame_files[0]
|
||||
for fn in frame_files:
|
||||
folder_name = os.path.dirname(fn)
|
||||
folder_name = os.path.relpath(folder_name,output_folder_base)
|
||||
folder_to_frame_files[folder_name].append(fn)
|
||||
|
||||
print('Found {} folders for {} files'.format(len(folder_to_frame_files),len(frame_files)))
|
||||
|
||||
|
||||
#%% List videos
|
||||
|
||||
video_filenames = video_utils.find_videos(input_folder,recursive=True)
|
||||
video_filenames = [os.path.relpath(fn,input_folder) for fn in video_filenames]
|
||||
print('Input folder contains {} videos'.format(len(video_filenames)))
|
||||
|
||||
|
||||
#%% Check for videos that are missing entirely
|
||||
|
||||
# list(folder_to_frame_files.keys())[0]
|
||||
# video_filenames[0]
|
||||
|
||||
missing_videos = []
|
||||
|
||||
# fn = video_filenames[0]
|
||||
for relative_fn in video_filenames:
|
||||
if relative_fn not in folder_to_frame_files:
|
||||
missing_videos.append(relative_fn)
|
||||
|
||||
print('{} of {} folders are missing frames entirely'.format(len(missing_videos),
|
||||
len(video_filenames)))
|
||||
|
||||
|
||||
#%% Check for videos with very few frames
|
||||
|
||||
min_frames_for_valid_video = 10
|
||||
|
||||
low_frame_videos = []
|
||||
|
||||
for folder_name in folder_to_frame_files.keys():
|
||||
frame_files = folder_to_frame_files[folder_name]
|
||||
if len(frame_files) < min_frames_for_valid_video:
|
||||
low_frame_videos.append(folder_name)
|
||||
|
||||
print('{} of {} folders have fewer than {} frames'.format(
|
||||
len(low_frame_videos),len(video_filenames),min_frames_for_valid_video))
|
||||
|
||||
|
||||
#%% Print the list of videos that are problematic
|
||||
|
||||
print('Videos that could not be decoded:\n')
|
||||
|
||||
for fn in missing_videos:
|
||||
print(fn)
|
||||
|
||||
print('\nVideos with fewer than {} decoded frames:\n'.format(min_frames_for_valid_video))
|
||||
|
||||
for fn in low_frame_videos:
|
||||
print(fn)
|
||||
|
||||
|
||||
#%% Process images like we would for any other camera trap job
|
||||
|
||||
# ...typically using manage_local_batch.py, but do this however you like, as long
|
||||
# as you get a results file at the end.
|
||||
#
|
||||
# If you do RDE, remember to use the second folder from the bottom, rather than the
|
||||
# bottom-most folder.
|
||||
|
||||
|
||||
#%% Convert frame results to video results
|
||||
|
||||
from detection.video_utils import frame_results_to_video_results
|
||||
|
||||
filtered_output_filename = '/results/organization/stuff.json'
|
||||
video_output_filename = filtered_output_filename.replace('.json','_aggregated.json')
|
||||
frame_results_to_video_results(filtered_output_filename,video_output_filename)
|
||||
|
||||
|
||||
#%% Confirm that the videos in the .json file are what we expect them to be
|
||||
|
||||
import json
|
||||
|
||||
with open(video_output_filename,'r') as f:
|
||||
video_results = json.load(f)
|
||||
|
||||
video_filenames_set = set(video_filenames)
|
||||
|
||||
filenames_in_video_results_set = set([im['file'] for im in video_results['images']])
|
||||
|
||||
for fn in filenames_in_video_results_set:
|
||||
assert fn in video_filenames_set
|
||||
|
||||
|
||||
#%% Scrap
|
||||
|
||||
if False:
|
||||
|
||||
pass
|
||||
|
||||
#%% Test a possibly-broken video
|
||||
|
||||
fn = '/datadrive/tmp/video.AVI'
|
||||
|
||||
|
||||
fs = video_utils.get_video_fs(fn)
|
||||
print(fs)
|
||||
|
||||
tmpfolder = '/home/user/tmp/frametmp'
|
||||
os.makedirs(tmpfolder,exist_ok=True)
|
||||
|
||||
video_utils.video_to_frames(fn, tmpfolder, verbose=True, every_n_frames=10)
|
||||
|
||||
|
||||
#%% List videos in a folder
|
||||
|
||||
input_folder = '/datadrive/tmp/organization/data'
|
||||
video_filenames = video_utils.find_videos(input_folder,recursive=True)
|
|
@ -1,565 +0,0 @@
|
|||
"""
|
||||
|
||||
prepare_api_submission.py
|
||||
|
||||
This module defines the Task class and helper methods that are useful for
|
||||
submitting tasks to the AI for Earth Camera Trap Batch Detection API.
|
||||
|
||||
Here's the stuff we usually do before submitting a task:
|
||||
|
||||
1) Upload images to Azure Blob Storage... we do this with azcopy, not addressed
|
||||
in this script.
|
||||
|
||||
2) List the files you want the API to process.
|
||||
ai4eutils.ai4e_azure_utils.enumerate_blobs_to_file()
|
||||
|
||||
3) Divide that list into chunks that will become individual API submissions.
|
||||
divide_files_into_tasks()
|
||||
|
||||
3) Put each .json file in a blob container and get a read-only SAS URL for it.
|
||||
Task.upload_images_list()
|
||||
|
||||
4) Generate the API query(ies) you'll submit to the API.
|
||||
Task.generate_api_request()
|
||||
|
||||
5) Submit the API query. This can be done manually with Postman as well.
|
||||
Task.submit()
|
||||
|
||||
6) Monitor task status
|
||||
Task.check_status()
|
||||
|
||||
7) Combine multiple API outputs
|
||||
|
||||
8) We're now into what we really call "postprocessing", rather than
|
||||
"data_preparation", but... possibly do some amount of partner-specific
|
||||
renaming, folder manipulation, etc. This is very partner-specific, but
|
||||
generally done via:
|
||||
|
||||
find_repeat_detections.py
|
||||
subset_json_detector_output.py
|
||||
postprocess_batch_results.py
|
||||
|
||||
"""
|
||||
|
||||
|
||||
#%% Imports
|
||||
|
||||
from enum import Enum
|
||||
import json
|
||||
import os
|
||||
import posixpath
|
||||
import string
|
||||
from typing import Any, ClassVar, Dict, List, Optional, Sequence, Tuple
|
||||
import urllib
|
||||
|
||||
import requests
|
||||
|
||||
import ai4e_azure_utils # from ai4eutils
|
||||
import path_utils # from ai4eutils
|
||||
|
||||
|
||||
#%% Constants
|
||||
|
||||
MAX_FILES_PER_API_TASK = 1_000_000
|
||||
IMAGES_PER_SHARD = 2000
|
||||
|
||||
VALID_REQUEST_NAME_CHARS = f'-_{string.ascii_letters}{string.digits}'
|
||||
REQUEST_NAME_CHAR_LIMIT = 92
|
||||
|
||||
|
||||
#%% Classes
|
||||
|
||||
class BatchAPISubmissionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class BatchAPIResponseError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class TaskStatus(str, Enum):
|
||||
RUNNING = 'running'
|
||||
FAILED = 'failed'
|
||||
PROBLEM = 'problem'
|
||||
COMPLETED = 'completed'
|
||||
|
||||
|
||||
class Task:
|
||||
"""
|
||||
Represents a Batch Detection API task.
|
||||
|
||||
Given the Batch Detection API URL, assumes that the endpoints are:
|
||||
/request_detections
|
||||
for submitting tasks
|
||||
/task/<task.id>
|
||||
for checking on task status
|
||||
"""
|
||||
|
||||
# class variables
|
||||
request_endpoint: ClassVar[str] = 'request_detections' # submit tasks
|
||||
task_status_endpoint: ClassVar[str] = 'task' # check task status
|
||||
|
||||
# instance variables, in order of when they are typically set
|
||||
name: str
|
||||
api_url: str
|
||||
local_images_list_path: str
|
||||
remote_images_list_url: str # includes SAS token if uploaded with one
|
||||
api_request: Dict[str, Any] # request object before JSON serialization
|
||||
id: str
|
||||
response: Dict[str, Any] # decoded response JSON
|
||||
status: TaskStatus
|
||||
bypass_status_check: bool # set when we manually complete a task
|
||||
|
||||
def __init__(self, name: str, task_id: Optional[str] = None,
|
||||
images_list_path: Optional[str] = None,
|
||||
validate: bool = True, api_url: Optional[str] = None):
|
||||
"""
|
||||
Initializes a Task.
|
||||
|
||||
If desired, validates that the images list does not exceed the maximum
|
||||
length and that all files in the images list are actually images.
|
||||
|
||||
Args:
|
||||
name: str, name of the request
|
||||
task_id: optional str, ID of submitted task
|
||||
images_list_path: str, path or URL to a JSON file containing a list
|
||||
of image paths, must start with 'http' if a URL
|
||||
local: bool, set to True if images_list_path is a local path,
|
||||
set to False if images_list_path is a URL
|
||||
validate: bool, whether to validate the given images list,
|
||||
only used if images_list_path is not None
|
||||
api_url: optional str, Batch Detection API URL,
|
||||
defaults to environment variable BATCH_DETECTION_API_URL
|
||||
|
||||
Raises:
|
||||
requests.HTTPError: if images_list_path is a URL but an error
|
||||
occurred trying to fetch it
|
||||
ValueError: if images_list_path is given, but the file contains more
|
||||
than MAX_FILES_PER_API_TASK entries, or if one of the entries
|
||||
is not a supported image file type
|
||||
"""
|
||||
self.bypass_status_check = False
|
||||
|
||||
clean_name = clean_request_name(name)
|
||||
if name != clean_name:
|
||||
print('Warning: renamed {} to {}'.format(name,clean_name))
|
||||
self.name = clean_name
|
||||
|
||||
if api_url is None:
|
||||
api_url = os.environ['BATCH_DETECTION_API_URL']
|
||||
assert api_url is not None and api_url != ''
|
||||
self.api_url = api_url
|
||||
|
||||
if task_id is not None:
|
||||
self.id = task_id
|
||||
|
||||
if images_list_path is not None:
|
||||
|
||||
if images_list_path.startswith('http'):
|
||||
self.remote_images_list_url = images_list_path
|
||||
else:
|
||||
self.local_images_list_path = images_list_path
|
||||
|
||||
if validate:
|
||||
|
||||
if images_list_path.startswith('http'):
|
||||
images_list = requests.get(images_list_path).json()
|
||||
else:
|
||||
with open(images_list_path, 'r') as f:
|
||||
images_list = json.load(f)
|
||||
|
||||
if len(images_list) > MAX_FILES_PER_API_TASK:
|
||||
raise ValueError('Images list has too many files')
|
||||
|
||||
# Leaving this commented out to remind us that we don't want this check here; let
|
||||
# the API fail on these images. It's a huge hassle to remove non-image
|
||||
# files.
|
||||
#
|
||||
# for path_or_url in images_list:
|
||||
# if not is_image_file_or_url(path_or_url):
|
||||
# raise ValueError('{} is not an image'.format(path_or_url))
|
||||
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return 'Task(name={name}, id={id})'.format(
|
||||
name=self.name,
|
||||
id=getattr(self, 'id', None))
|
||||
# Commented out as a reminder: don't check task status (which is a rest API call)
|
||||
# in __repr__; require the caller to explicitly request status
|
||||
# status=getattr(self, 'status', None))
|
||||
|
||||
|
||||
def upload_images_list(self, account: str, container: str, sas_token: str,
|
||||
blob_name: Optional[str] = None, overwrite: bool=False) -> None:
|
||||
"""
|
||||
Uploads the local images list to an Azure Blob Storage container.
|
||||
|
||||
Sets self.remote_images_list_url to the blob URL of the uploaded file.
|
||||
|
||||
Args:
|
||||
account: str, Azure Storage account name
|
||||
container: str, Azure Blob Storage container name
|
||||
sas_token: str, Shared Access Signature (SAS) with write permission,
|
||||
does not start with '?'
|
||||
blob_name: optional str, defaults to basename of
|
||||
self.local_images_list_path if blob_name is not given
|
||||
"""
|
||||
|
||||
if blob_name is None:
|
||||
blob_name = os.path.basename(self.local_images_list_path)
|
||||
self.remote_images_list_url = ai4e_azure_utils.upload_file_to_blob(
|
||||
account_name=account, container_name=container,
|
||||
local_path=self.local_images_list_path, blob_name=blob_name,
|
||||
sas_token=sas_token, overwrite=overwrite)
|
||||
|
||||
|
||||
def generate_api_request(self,
|
||||
caller: str,
|
||||
input_container_url: Optional[str] = None,
|
||||
image_path_prefix: Optional[str] = None,
|
||||
**kwargs: Any
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Generate API request JSON.
|
||||
|
||||
Sets self.api_request to the request JSON. For complete list of API
|
||||
input parameters, see:
|
||||
https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing#api-inputs
|
||||
|
||||
Args:
|
||||
caller: str
|
||||
input_container_url: optional str, URL to Azure Blob Storage
|
||||
container where images are stored. URL must include SAS token
|
||||
with read and list permissions if the container is not public.
|
||||
Only provide this parameter when the image paths in
|
||||
self.remote_images_list_url are relative to a container.
|
||||
image_path_prefix: optional str, TODO
|
||||
kwargs: additional API input parameters
|
||||
|
||||
Returns: dict, represents the JSON request to be submitted
|
||||
"""
|
||||
|
||||
request = kwargs
|
||||
request.update({
|
||||
'request_name': self.name,
|
||||
'caller': caller,
|
||||
'images_requested_json_sas': self.remote_images_list_url
|
||||
})
|
||||
if input_container_url is None:
|
||||
request['use_url'] = True
|
||||
else:
|
||||
request['input_container_sas'] = input_container_url
|
||||
if image_path_prefix is not None:
|
||||
request['image_path_prefix'] = image_path_prefix
|
||||
self.api_request = request
|
||||
return request
|
||||
|
||||
|
||||
def submit(self) -> str:
|
||||
"""
|
||||
Submit this task to the Batch Detection API.
|
||||
|
||||
Sets self.id to the returned request ID. Only run this method after
|
||||
generate_api_request().
|
||||
|
||||
Returns: str, task ID
|
||||
|
||||
Raises:
|
||||
requests.HTTPError, if an HTTP error occurred
|
||||
BatchAPISubmissionError, if request returns an error
|
||||
"""
|
||||
|
||||
request_endpoint = posixpath.join(self.api_url, self.request_endpoint)
|
||||
r = requests.post(request_endpoint, json=self.api_request)
|
||||
r.raise_for_status()
|
||||
assert r.status_code == requests.codes.ok
|
||||
|
||||
response = r.json()
|
||||
if 'error' in response:
|
||||
raise BatchAPISubmissionError(response['error'])
|
||||
if 'request_id' not in response:
|
||||
raise BatchAPISubmissionError(
|
||||
'"request_id" not in API response: {}'.format(response))
|
||||
self.id = response['request_id']
|
||||
return self.id
|
||||
|
||||
|
||||
def check_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Checks the task status.
|
||||
|
||||
Sets self.response and self.status.
|
||||
|
||||
Returns: dict, contains fields ['Status', 'TaskId'] and possibly others
|
||||
|
||||
Raises:
|
||||
requests.HTTPError, if an HTTP error occurred
|
||||
BatchAPIResponseError, if response task ID does not match self.id
|
||||
"""
|
||||
|
||||
if self.bypass_status_check:
|
||||
return self.response
|
||||
|
||||
url = posixpath.join(self.api_url, self.task_status_endpoint, self.id)
|
||||
r = requests.get(url)
|
||||
|
||||
r.raise_for_status()
|
||||
assert r.status_code == requests.codes.ok
|
||||
|
||||
self.response = r.json()
|
||||
if self.response['TaskId'] != self.id:
|
||||
raise BatchAPIResponseError(
|
||||
f'Response task ID {self.response["TaskId"]} does not match '
|
||||
f'expected task ID {self.id}.')
|
||||
try:
|
||||
self.status = TaskStatus(self.response['Status']['request_status'])
|
||||
except Exception as e:
|
||||
self.status = 'Exception error: {}'.format(str(e))
|
||||
return self.response
|
||||
|
||||
|
||||
def force_completion(self,response) -> None:
|
||||
"""
|
||||
Simulate completion of a task by passing a manually-created response
|
||||
string.
|
||||
"""
|
||||
self.response = response
|
||||
self.status = TaskStatus(self.response['Status']['request_status'])
|
||||
self.bypass_status_check = True
|
||||
|
||||
|
||||
def get_output_file_urls(self, verbose: bool = False) -> Dict[str, str]:
|
||||
"""
|
||||
Retrieves the dictionary of URLs for the three output files for this task
|
||||
"""
|
||||
|
||||
assert self.status == TaskStatus.COMPLETED
|
||||
message = self.response['Status']['message']
|
||||
output_file_urls = message['output_file_urls']
|
||||
return output_file_urls
|
||||
|
||||
|
||||
def get_missing_images(self, submitted_images, verbose: bool = False) -> List[str]:
|
||||
"""
|
||||
Compares the submitted and processed images lists to find missing
|
||||
images.
|
||||
|
||||
"missing": an image from the submitted list that was not processed,
|
||||
for whatever reason
|
||||
"failed": a missing image explicitly marked as 'failed' by the
|
||||
batch detection API
|
||||
|
||||
Only run this method when task.status == TaskStatus.COMPLETED.
|
||||
|
||||
Returns: list of str, sorted list of missing image paths
|
||||
|
||||
Ignores non-image filenames.
|
||||
"""
|
||||
|
||||
assert self.status == TaskStatus.COMPLETED
|
||||
message = self.response['Status']['message']
|
||||
|
||||
# estimate # of failed images from failed shards
|
||||
if 'num_failed_shards' in message:
|
||||
n_failed_shards = message['num_failed_shards']
|
||||
else:
|
||||
n_failed_shards = 0
|
||||
|
||||
# Download all three JSON urls to memory
|
||||
output_file_urls = message['output_file_urls']
|
||||
for url in output_file_urls.values():
|
||||
if self.id not in url:
|
||||
raise BatchAPIResponseError(
|
||||
'Task ID missing from output URL: {}'.format(url))
|
||||
detections = requests.get(output_file_urls['detections']).json()
|
||||
|
||||
return get_missing_images_from_json(submitted_images,detections,n_failed_shards,verbose)
|
||||
|
||||
|
||||
def create_response_message(n_failed_shards,detections_url,task_id):
|
||||
"""
|
||||
Manually create a response message in the format of the batch API. Used when tasks hang or fail
|
||||
and we need to simulate their completion by directly pulling the results from the AML output.
|
||||
"""
|
||||
output_file_urls = {
|
||||
'detections':detections_url
|
||||
}
|
||||
message = {'num_failed_shards':n_failed_shards,'output_file_urls':output_file_urls}
|
||||
status = {'message':message,'request_status':str(TaskStatus.COMPLETED.value)}
|
||||
response = {}
|
||||
response['Status'] = status
|
||||
response['request_id'] = task_id
|
||||
return response
|
||||
|
||||
|
||||
def get_missing_images_from_json(submitted_images,detections,n_failed_shards,verbose=False):
|
||||
"""
|
||||
Given the json-encoded results for the lists of submitted images and detections,
|
||||
find and return the list of images missing in the list of detections. Ignores
|
||||
non-image filenames.
|
||||
"""
|
||||
|
||||
# Remove files that were submitted but don't appear to be images
|
||||
# assert all(is_image_file_or_url(s) for s in submitted_images)
|
||||
non_image_files_submitted = [s for s in submitted_images if not is_image_file_or_url(s)]
|
||||
if len(non_image_files_submitted) > 0:
|
||||
print('Warning, {} non-image files submitted:\n'.format(len(non_image_files_submitted)))
|
||||
for k in range(0,min(10,len(non_image_files_submitted))):
|
||||
print(non_image_files_submitted[k])
|
||||
print('...\n')
|
||||
|
||||
submitted_images = [s for s in submitted_images if is_image_file_or_url(s)]
|
||||
|
||||
# Diff submitted and processed images
|
||||
processed_images = [d['file'] for d in detections['images']]
|
||||
missing_images = sorted(set(submitted_images) - set(processed_images))
|
||||
|
||||
if verbose:
|
||||
estimated_failed_shard_images = n_failed_shards * IMAGES_PER_SHARD
|
||||
print('Submitted {} images'.format(len(submitted_images)))
|
||||
print('Received results for {} images'.format(len(processed_images)))
|
||||
print(f'{n_failed_shards} failed shards '
|
||||
f'(~approx {estimated_failed_shard_images} images)')
|
||||
print('{} images not in results'.format(len(missing_images)))
|
||||
|
||||
# Confirm that the procesed images are a subset of the submitted images
|
||||
assert set(processed_images) <= set(submitted_images), (
|
||||
'Failed images should be a subset of missing images')
|
||||
|
||||
return missing_images
|
||||
|
||||
|
||||
def divide_chunks(l: Sequence[Any], n: int) -> List[Sequence[Any]]:
|
||||
"""
|
||||
Divide list *l* into chunks of size *n*, with the last chunk containing
|
||||
<= n items.
|
||||
"""
|
||||
|
||||
# https://www.geeksforgeeks.org/break-list-chunks-size-n-python/
|
||||
chunks = [l[i * n:(i + 1) * n] for i in range((len(l) + n - 1) // n)]
|
||||
return chunks
|
||||
|
||||
|
||||
def divide_list_into_tasks(file_list: Sequence[str],
|
||||
save_path: str,
|
||||
n_files_per_task: int = MAX_FILES_PER_API_TASK
|
||||
) -> Tuple[List[str], List[Sequence[Any]]]:
|
||||
"""
|
||||
Divides a list of filenames into a set of JSON files, each containing a
|
||||
list of length *n_files_per_task* (the last file will contain <=
|
||||
*n_files_per_task* files).
|
||||
|
||||
Output JSON files are saved to *save_path* except the extension is replaced
|
||||
with `*.chunkXXX.json`. For example, if *save_path* is `blah.json`, output
|
||||
files will be `blah.chunk000.json`, `blah.chunk001.json`, etc.
|
||||
|
||||
Args:
|
||||
file_list: list of str, filenames to split across multiple JSON files
|
||||
save_path: str, base path to save the chunked lists
|
||||
n_files_per_task: int, max number of files to include in each API task
|
||||
|
||||
Returns:
|
||||
output_files: list of str, output JSON file names
|
||||
chunks: list of list of str, chunks[i] is the content of output_files[i]
|
||||
"""
|
||||
|
||||
chunks = divide_chunks(file_list, n_files_per_task)
|
||||
output_files = []
|
||||
|
||||
for i_chunk, chunk in enumerate(chunks):
|
||||
chunk_id = 'chunk{:0>3d}'.format(i_chunk)
|
||||
output_file = path_utils.insert_before_extension(
|
||||
save_path, chunk_id)
|
||||
output_files.append(output_file)
|
||||
with open(output_file, 'w') as f:
|
||||
json.dump(chunk, f, indent=1)
|
||||
return output_files, chunks
|
||||
|
||||
|
||||
def divide_files_into_tasks(file_list_json: str,
|
||||
n_files_per_task: int = MAX_FILES_PER_API_TASK
|
||||
) -> Tuple[List[str], List[Sequence[Any]]]:
|
||||
"""
|
||||
Convenience wrapper around divide_list_into_tasks() when the file_list
|
||||
itself is already saved as a JSON file.
|
||||
"""
|
||||
|
||||
with open(file_list_json) as f:
|
||||
file_list = json.load(f)
|
||||
return divide_list_into_tasks(file_list, save_path=file_list_json,
|
||||
n_files_per_task=n_files_per_task)
|
||||
|
||||
|
||||
def clean_request_name(request_name: str,
|
||||
whitelist: str = VALID_REQUEST_NAME_CHARS,
|
||||
char_limit: int = REQUEST_NAME_CHAR_LIMIT) -> str:
|
||||
"""
|
||||
Removes invalid characters from an API request name.
|
||||
"""
|
||||
return path_utils.clean_filename(
|
||||
filename=request_name, whitelist=whitelist, char_limit=char_limit).replace(':','_')
|
||||
|
||||
|
||||
def download_url(url: str, save_path: str, verbose: bool = False) -> None:
|
||||
"""
|
||||
Download a URL to a local file.
|
||||
"""
|
||||
if verbose:
|
||||
print('Downloading {} to {}'.format(url,save_path))
|
||||
urllib.request.urlretrieve(url, save_path)
|
||||
assert os.path.isfile(save_path)
|
||||
|
||||
|
||||
def is_image_file_or_url(path_or_url: str) -> bool:
|
||||
"""
|
||||
Checks (via file extension) whether a file path or URL is an image.
|
||||
|
||||
If path_or_url is a URL, strip away any query strings '?...'. This should
|
||||
have no adverse effect on local paths.
|
||||
"""
|
||||
stripped_path_or_url = urllib.parse.urlparse(path_or_url).path
|
||||
return path_utils.is_image_file(stripped_path_or_url)
|
||||
|
||||
|
||||
#%% Interactive driver
|
||||
|
||||
if False:
|
||||
|
||||
#%%
|
||||
|
||||
account_name = ''
|
||||
sas_token = 'st=...'
|
||||
container_name = ''
|
||||
rsearch = None # '^Y53'
|
||||
output_file = r'output.json'
|
||||
|
||||
blobs = ai4e_azure_utils.enumerate_blobs_to_file(
|
||||
output_file=output_file,
|
||||
account_name=account_name,
|
||||
sas_token=sas_token,
|
||||
container_name=container_name,
|
||||
rsearch=rsearch)
|
||||
|
||||
#%%
|
||||
|
||||
file_list_json = r"D:\temp\idfg_20190801-hddrop_image_list.json"
|
||||
task_files = divide_files_into_tasks(file_list_json)
|
||||
|
||||
#%%
|
||||
|
||||
file_list_sas_urls = [
|
||||
'','',''
|
||||
]
|
||||
|
||||
input_container_sas_url = ''
|
||||
request_name_base = ''
|
||||
caller = 'blah@blah.com'
|
||||
|
||||
request_strings,request_dicts = generate_api_queries(
|
||||
input_container_sas_url,
|
||||
file_list_sas_urls,
|
||||
request_name_base,
|
||||
caller)
|
||||
|
||||
for s in request_strings:
|
||||
print(s)
|
Двоичные данные
api/batch_processing/images/Postman_screenshot.png
До Ширина: | Высота: | Размер: 109 KiB |
Двоичные данные
api/batch_processing/images/SAS_screenshot.png
До Ширина: | Высота: | Размер: 95 KiB |
Двоичные данные
api/batch_processing/images/task_endpoint_response.png
До Ширина: | Высота: | Размер: 29 KiB |
|
@ -1,23 +0,0 @@
|
|||
## MegaDetector batch processing workflow integration
|
||||
|
||||
This folder contains information about ways to use MegaDetector output files in various workflows. Specifically...
|
||||
|
||||
### Timelapse2
|
||||
|
||||
[Timelapse2](http://saul.cpsc.ucalgary.ca/timelapse/) can read the results produced by the [MegaDetector batch processing API](https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing) and/or [run_tf_detector_batch.py](https://github.com/ecologize/CameraTraps/blob/master/detection/run_tf_detector_batch.py), as well as the species classification results produced by our [classification pipeline](https://github.com/ecologize/CameraTraps/tree/master/classification). For information about how to use this feature, see [timelapse.md](timelapse.md), but mostly see the section in the Timelapse manual called "Automatic Image Recognition". If you're a Timelapse user, you may also want to check out our [guide to configuring Azure virtual machines](remote_desktop.md) to run Timelapse in the cloud, which can make it easier to split annotation workloads across your team.
|
||||
|
||||
### eMammal
|
||||
|
||||
A [standalone application](https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing/integration/eMammal) is available to transfer MegaDetector results from our .json results format into the [eMammal desktop client](https://emammal.si.edu/eyes-wildlife/content/downloading-desktop-application). Many eMammal users also work with our results by splitting images into separate folders for animal/empty/vehicle/person using [this script](https://github.com/ecologize/CameraTraps/blob/master/api/batch_processing/postprocessing/separate_detections_into_folders.py), then either discarding the blanks or creating separate deployments for animal/empty/human.
|
||||
|
||||
### digiKam
|
||||
|
||||
[Python tools](digiKam/README.md) (which can be run with a GUI) to transfer MegaDetector results from our .json results format into XMP image metadata, specifically for use with[digiKam](https://www.digikam.org/).
|
||||
|
||||
### Data preparation
|
||||
|
||||
For any of these use cases, you may also want to check out our [Camera Trap JSON Manager App](https://github.com/ecologize/CameraTraps/blob/master/api/batch_processing/postprocessing/CameraTrapJsonManagerApp.md), which can help you split/modify our .json results files to break into smaller projects, adjust relative paths, etc.
|
||||
|
||||
If you use any of these tools – or if we're missing an important one – <a href="mailto:cameratraps@lila.science">email us</a>!
|
||||
|
||||
|
|
@ -1 +0,0 @@
|
|||
.spyproject/
|
|
@ -1,59 +0,0 @@
|
|||
# MegaDetector integration with digiKam
|
||||
|
||||
This folder contains a Python tool to transfer annotations produced by MegaDetector to the hierarchicalSubject field of XMP data in JPG images, to support the ingestion and review of those results in <a href="https://www.digikam.org/">digiKam</a>.
|
||||
|
||||
The tool can be run from the command line or as a GUI-based application.
|
||||
|
||||
|
||||
## Running the command-line tool
|
||||
|
||||
Run the script as:
|
||||
|
||||
`python xmp_integration.py --input_file [input_file] --image_folder [image_folder] --path_to_remove [path_to_remove]`
|
||||
|
||||
* `input_file` is the .json file produced by the MegaDetector batch API or by run_tf_detector_batch.py
|
||||
* `image_folder` is the root folder where your images are
|
||||
|
||||
`path_to_remove` (optional) is a string that should be removed from the head of all the image paths in the .json file. For example, let's say you ran MegaDetector on paths that looked like:
|
||||
|
||||
`images_for_megadetector/camera1/image01.jpg`
|
||||
|
||||
...but now your images look like:
|
||||
|
||||
`c:\my_images\camera1\image01.jpg`
|
||||
|
||||
In this case, you would want to specify `images_for_megadetector/` for `path_to_remove` and `c:\my_images` for `image_folder`.
|
||||
|
||||
|
||||
## Running the GUI-based tool
|
||||
|
||||
Run the script as:
|
||||
|
||||
`python xmp_integration.py --gui`
|
||||
|
||||
* Select the folder that contains the image
|
||||
* Select the .json file from the MegaDetector API
|
||||
* Optional specify a leading string to remove from image paths (see above)
|
||||
* Click "Submit"
|
||||
|
||||
![](images/screenshot.png)
|
||||
|
||||
|
||||
## Validating the XMP data in digiKam
|
||||
|
||||
* Open <a href="https://www.digikam.org/">digiKam</a>
|
||||
* Load the images folder into the album
|
||||
* Click on an image to view the XMP metadata
|
||||
* Click on the `Metadata` tab and then the `XMP` tab in the right-side panel
|
||||
* You should see the `hierarchicalSubject` field in `XMP metadata`, populated with your MegaDetector outputs
|
||||
|
||||
![](images/digikam.png)
|
||||
|
||||
## Compiling to an .exe
|
||||
|
||||
If you want to compile the tool to an executable (e.g. to make it easier to distribute within your organization), run:
|
||||
|
||||
`python setup.py build`
|
||||
|
||||
This will create a `build/exe.win-amd64-3.7`, in which you'll find `xmp_integration.exe`.
|
||||
|
Двоичные данные
api/batch_processing/integration/digiKam/images/aiforearth.png
До Ширина: | Высота: | Размер: 7.5 KiB |
Двоичные данные
api/batch_processing/integration/digiKam/images/bg.png
До Ширина: | Высота: | Размер: 160 KiB |
Двоичные данные
api/batch_processing/integration/digiKam/images/digikam.png
До Ширина: | Высота: | Размер: 76 KiB |
Двоичные данные
api/batch_processing/integration/digiKam/images/screenshot.png
До Ширина: | Высота: | Размер: 295 KiB |
|
@ -1,6 +0,0 @@
|
|||
from cx_Freeze import setup, Executable
|
||||
|
||||
setup(name = "XMP Integration" ,
|
||||
version = "3.0" ,
|
||||
description = "XMP metadata writer" ,
|
||||
executables = [Executable("xmp_integration.py")])
|
|
@ -1,466 +0,0 @@
|
|||
#
|
||||
# xmp_integration.py
|
||||
#
|
||||
# Tools for loading MegaDetector batch API results into XMP metadata, specifically
|
||||
# for consumption in digiKam:
|
||||
#
|
||||
# https://cran.r-project.org/web/packages/camtrapR/vignettes/camtrapr2.html
|
||||
#
|
||||
|
||||
#%% Imports and constants
|
||||
|
||||
import argparse
|
||||
import tkinter
|
||||
from tkinter import ttk, messagebox, filedialog
|
||||
|
||||
import inspect
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import pyexiv2
|
||||
import ntpath
|
||||
import threading
|
||||
import traceback
|
||||
|
||||
from tqdm import tqdm
|
||||
from multiprocessing import Pool
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from functools import partial
|
||||
|
||||
category_mapping = {'person': 'Human', 'animal': 'Animal', 'vehicle': 'Vehicle'}
|
||||
|
||||
|
||||
#%% Class definitions
|
||||
|
||||
class xmp_gui:
|
||||
|
||||
root = None
|
||||
textarea_min_threshold = None
|
||||
textarea_status = None
|
||||
textarea_remove_path = None
|
||||
textarea_rename_conf = None
|
||||
textarea_rename_cats = None
|
||||
num_threads = 1
|
||||
|
||||
class xmp_integration_options:
|
||||
|
||||
# Folder where images are stored
|
||||
image_folder = None
|
||||
|
||||
# .json file containing MegaDetector output
|
||||
input_file = None
|
||||
|
||||
# String to remove from all path names, typically representing a
|
||||
# prefix that was added during MegaDetector processing
|
||||
remove_path = None
|
||||
|
||||
# Optionally *rename* (not copy) all images that have no detections
|
||||
# above [rename_conf] for the categories in rename_cats from x.jpg to
|
||||
# x.check.jpg
|
||||
rename_conf = None
|
||||
|
||||
# Comma-deleted list of category names (or "all") to apply the rename_conf
|
||||
# behavior to.
|
||||
rename_cats = None
|
||||
|
||||
# Minimum detection threshold (applies to all classes, defaults to None,
|
||||
# i.e. 0.0
|
||||
min_threshold = None
|
||||
num_threads = 1
|
||||
xmp_gui = None
|
||||
|
||||
|
||||
#%% Functions
|
||||
|
||||
def write_status(options,s):
|
||||
|
||||
if options.xmp_gui is None:
|
||||
return
|
||||
options.xmp_gui.textarea_status.configure(state="normal")
|
||||
options.xmp_gui.textarea_status.insert(tkinter.END, s + '\n')
|
||||
options.xmp_gui.textarea_status.configure(state="disabled")
|
||||
|
||||
|
||||
n_images_processed = 0
|
||||
|
||||
def update_xmp_metadata(categories, options, rename_cats, n_images, image):
|
||||
"""
|
||||
Update the XMP metadata for a single image
|
||||
"""
|
||||
|
||||
# Relative image path
|
||||
filename = ''
|
||||
|
||||
# Absolute image path
|
||||
img_path = ''
|
||||
|
||||
global n_images_processed
|
||||
|
||||
try:
|
||||
|
||||
filename = image['file']
|
||||
if options.remove_path != None and len(options.remove_path) > 0:
|
||||
filename = filename.replace(options.remove_path, '')
|
||||
img_path = os.path.join(options.image_folder, filename)
|
||||
assert os.path.isfile(img_path), 'Image {} not found'.format(img_path)
|
||||
|
||||
# List of categories to write to XMP metadata
|
||||
image_categories = []
|
||||
|
||||
# Categories with above-threshold detections present for
|
||||
# this image
|
||||
original_image_cats = []
|
||||
|
||||
# Maximum confidence for each category
|
||||
original_image_cats_conf = {}
|
||||
|
||||
for detection in image['detections']:
|
||||
|
||||
cat = category_mapping[categories[detection['category']]]
|
||||
|
||||
# Have we already added this to the list of categories to
|
||||
# write out to this image?
|
||||
if cat not in image_categories:
|
||||
|
||||
# If we're supposed to compare to a threshold...
|
||||
if len(options.min_threshold) > 0 and \
|
||||
options.min_threshold != None:
|
||||
if float(detection['conf']) > float(options.min_threshold):
|
||||
image_categories.append(cat)
|
||||
original_image_cats.append(
|
||||
categories[detection['category']])
|
||||
|
||||
# Else we treat *any* detection as valid...
|
||||
else:
|
||||
image_categories.append(cat)
|
||||
original_image_cats.append(categories[detection['category']])
|
||||
|
||||
# Keep track of the highest-confidence detection for this class
|
||||
if options.min_threshold != None and \
|
||||
len(options.min_threshold) > 0 and \
|
||||
detection['conf'] > \
|
||||
original_image_cats_conf.get(
|
||||
categories[detection['category']], 0):
|
||||
|
||||
original_image_cats_conf[categories[detection['category']]] = \
|
||||
detection['conf']
|
||||
|
||||
img = pyexiv2.Image(r'{0}'.format(img_path))
|
||||
img.modify_xmp({'Xmp.lr.hierarchicalSubject': image_categories})
|
||||
|
||||
# If we're doing the rename/.check behavior...
|
||||
if not (options.rename_conf is None and options.rename_cats is None):
|
||||
|
||||
matching_cats = set(rename_cats).intersection(set(original_image_cats))
|
||||
is_conf_low = False
|
||||
if options.min_threshold != None and len(options.min_threshold) > 0:
|
||||
for matching_cat in matching_cats:
|
||||
if original_image_cats_conf[matching_cat] < float(options.rename_conf):
|
||||
is_conf_low = True
|
||||
if options.min_threshold != None and \
|
||||
len(options.min_threshold) > 0 and \
|
||||
len(image['detections']) == 0 or \
|
||||
(len(options.rename_conf) > 0 and \
|
||||
is_conf_low is True and \
|
||||
len(matching_cats) > 0):
|
||||
|
||||
parent_folder = os.path.dirname(img_path)
|
||||
file_name = ntpath.basename(img_path)
|
||||
manual_file_name = file_name.split('.')[0]+'_check' + '.' + file_name.split('.')[1]
|
||||
os.rename(img_path, os.path.join(parent_folder, manual_file_name))
|
||||
|
||||
if options.xmp_gui is not None:
|
||||
|
||||
n_images_processed += 1
|
||||
percentage = round((n_images_processed)/n_images*100)
|
||||
options.xmp_gui.progress_bar['value'] = percentage
|
||||
options.xmp_gui.root.update_idletasks()
|
||||
options.xmp_gui.style.configure('text.Horizontal.Tprogress_bar',
|
||||
text='{:g} %'.format(percentage))
|
||||
|
||||
except Exception as e:
|
||||
|
||||
s = 'Error processing image {}: {}'.format(filename,str(e))
|
||||
print(s)
|
||||
traceback.print_exc()
|
||||
write_status(options,s)
|
||||
|
||||
if False:
|
||||
|
||||
# Legacy code to rename files where XMP writing failed
|
||||
parent_folder = os.path.dirname(img_path)
|
||||
file_name = ntpath.basename(img_path)
|
||||
failed_file_name = file_name.split('.')[0]+'_failed' + '.' + file_name.split('.')[1]
|
||||
os.rename(img_path, os.path.join(
|
||||
parent_folder, failed_file_name))
|
||||
|
||||
|
||||
def process_input_data(options):
|
||||
"""
|
||||
Main function to loop over images and modify XMP data
|
||||
"""
|
||||
|
||||
if options.xmp_gui is not None:
|
||||
|
||||
if (options.image_folder is None) or (len(options.image_folder) == 0):
|
||||
tkinter.messagebox.showerror(title='Error', message='Image folder is not selected')
|
||||
sys.exit()
|
||||
if (options.input_file is None) or (len(options.input_file) == 0):
|
||||
tkinter.messagebox.showerror(
|
||||
title='Error', message='No MegaDetector .json file selected')
|
||||
sys.exit()
|
||||
options.remove_path = options.xmp_gui.textarea_remove_path.get()
|
||||
options.rename_conf = options.xmp_gui.textarea_rename_conf.get()
|
||||
options.rename_cats = options.xmp_gui.textarea_rename_cats.get()
|
||||
options.num_threads = options.xmp_gui.textarea_num_threads.get()
|
||||
options.min_threshold = options.xmp_gui.textarea_min_threshold.get()
|
||||
|
||||
try:
|
||||
|
||||
with open(options.input_file, 'r') as f:
|
||||
data = f.read()
|
||||
|
||||
data = json.loads(data)
|
||||
categories = data['detection_categories']
|
||||
|
||||
images = data['images']
|
||||
n_images = len(images)
|
||||
if not (options.rename_conf is None and options.rename_cats is None):
|
||||
rename_cats = options.rename_cats.split(",")
|
||||
if rename_cats[0] == 'all':
|
||||
rename_cats = list(category_mapping.keys())
|
||||
else:
|
||||
rename_cats = []
|
||||
if len(options.num_threads) > 0:
|
||||
num_threads = int(options.num_threads)
|
||||
else:
|
||||
num_threads = 1
|
||||
print(num_threads)
|
||||
if options.xmp_gui is None:
|
||||
func = partial(update_xmp_metadata, categories, options, rename_cats, n_images)
|
||||
with Pool(num_threads) as p:
|
||||
with tqdm(total=n_images) as pbar:
|
||||
for i, _ in enumerate(p.imap_unordered(func, images)):
|
||||
pbar.update()
|
||||
else:
|
||||
func = partial(update_xmp_metadata, categories, options, rename_cats, n_images)
|
||||
with ThreadPool(num_threads) as p:
|
||||
p.map(func, images)
|
||||
s = 'Successfully processed {} images'.format(n_images)
|
||||
print(s)
|
||||
write_status(options,s)
|
||||
|
||||
except Exception as e:
|
||||
|
||||
print('Error processing input data: {}'.format(str(e)))
|
||||
traceback.print_exc()
|
||||
if options.xmp_gui is not None:
|
||||
tkinter.messagebox.showerror(title='Error',
|
||||
message='Make Sure you selected the proper image folder and JSON files')
|
||||
sys.exit()
|
||||
|
||||
|
||||
def start_input_processing(options):
|
||||
|
||||
t = threading.Thread(target=lambda: process_input_data(options))
|
||||
t.start()
|
||||
|
||||
|
||||
def browse_folder(options,folder_path_var):
|
||||
|
||||
filename = tkinter.filedialog.askdirectory()
|
||||
options.image_folder = r'{0}'.format(filename)
|
||||
folder_path_var.set(filename)
|
||||
|
||||
|
||||
def browse_file(options,file_path_var):
|
||||
|
||||
filename = tkinter.filedialog.askopenfilename()
|
||||
options.input_file = r'{0}'.format(filename)
|
||||
file_path_var.set(filename)
|
||||
|
||||
|
||||
def create_gui(options):
|
||||
|
||||
root = tkinter.Tk()
|
||||
root.resizable(False, False)
|
||||
root.configure(background='white')
|
||||
root.title('DigiKam Integration')
|
||||
|
||||
group = tkinter.LabelFrame(root, padx=5, pady=5)
|
||||
group.configure(background = 'white')
|
||||
group.pack(padx=10, pady=10, fill='both', expand='yes')
|
||||
|
||||
canvas = tkinter.Canvas(group, width = 800, height = 150)
|
||||
canvas.configure(background = 'white')
|
||||
canvas.pack()
|
||||
img1 = tkinter.PhotoImage(file='images/aiforearth.png')
|
||||
canvas.create_image(0,0, anchor=tkinter.NW, image=img1)
|
||||
img2 = tkinter.PhotoImage(file='images/bg.png')
|
||||
canvas.create_image(0,20, anchor=tkinter.NW, image=img2)
|
||||
|
||||
frame = tkinter.Frame(root)
|
||||
frame.configure(background='white')
|
||||
frame.pack()
|
||||
|
||||
l1 = tkinter.Label(frame, text='Folder containing images')
|
||||
l1.configure(background='white')
|
||||
l1.grid(row=0, column=0)
|
||||
|
||||
folder_path_var = tkinter.StringVar()
|
||||
|
||||
e1 = tkinter.Entry(frame, width=50, textvariable=folder_path_var, highlightthickness=1)
|
||||
e1.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
e1.grid(row=0, column=2)
|
||||
|
||||
b1 = tkinter.Button(frame, text='Browse', fg='blue', command=lambda: browse_folder(options,folder_path_var))
|
||||
b1.grid(row=0, column=5, padx=10)
|
||||
|
||||
l2 = tkinter.Label(frame, text='Path to MegaDetector output .json file')
|
||||
l2.configure(background='white')
|
||||
l2.grid(row=1, column=0)
|
||||
|
||||
file_path_var = tkinter.StringVar()
|
||||
|
||||
e2 = tkinter.Entry(frame, width=50, textvariable=file_path_var, highlightthickness=1)
|
||||
e2.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
e2.grid(row=1, column=2)
|
||||
|
||||
b2 = tkinter.Button(frame, text='Browse', fg='blue', command=lambda: browse_file(options,file_path_var))
|
||||
b2.grid(row=1, column=5, padx=10)
|
||||
|
||||
l6 = tkinter.Label(frame, text='Minimum confidence to consider a category')
|
||||
l6.configure(background='white')
|
||||
l6.grid(row=2, column=0)
|
||||
|
||||
textarea_min_threshold = tkinter.Entry(frame, width=50, highlightthickness=1)
|
||||
textarea_min_threshold.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
textarea_min_threshold.grid(row=2, column=2)
|
||||
|
||||
l3 = tkinter.Label(frame, text='Prefix to remove from image paths (optional)')
|
||||
l3.configure(background='white')
|
||||
l3.grid(row=3, column=0)
|
||||
|
||||
textarea_remove_path = tkinter.Entry(frame, width=50, highlightthickness=1)
|
||||
textarea_remove_path.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
textarea_remove_path.grid(row=3, column=2)
|
||||
|
||||
l4 = tkinter.Label(frame, text='Confidence level to move images requires manual check (optional)')
|
||||
l4.configure(background='white')
|
||||
l4.grid(row=4, column=0)
|
||||
|
||||
textarea_rename_conf = tkinter.Entry(frame, width=50, highlightthickness=1)
|
||||
textarea_rename_conf.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
textarea_rename_conf.grid(row=4, column=2)
|
||||
|
||||
|
||||
l5 = tkinter.Label(frame, text='Categories to check for the confidence (optional)')
|
||||
l5.configure(background='white')
|
||||
l5.grid(row=5, column=0)
|
||||
|
||||
textarea_rename_cats = tkinter.Entry(frame, width=50, highlightthickness=1)
|
||||
textarea_rename_cats.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
textarea_rename_cats.grid(row=5, column=2)
|
||||
|
||||
l6 = tkinter.Label(frame, text='Number of threads to run (optional)')
|
||||
l6.configure(background='white')
|
||||
l6.grid(row=6, column=0)
|
||||
|
||||
textarea_num_threads = tkinter.Entry(frame, width=50, highlightthickness=1)
|
||||
textarea_num_threads.configure(highlightbackground='grey', highlightcolor='grey')
|
||||
textarea_num_threads.grid(row=6, column=2)
|
||||
|
||||
sb = tkinter.Button(frame, text='Submit', fg='black',
|
||||
command=lambda: start_input_processing(options), padx=10)
|
||||
sb.grid(row=7, column=2, padx=10, pady=10)
|
||||
|
||||
style = tkinter.ttk.Style(root)
|
||||
style.layout('text.Horizontal.Tprogress_bar',
|
||||
[('Horizontal.progress_bar.trough',
|
||||
{'children': [('Horizontal.progress_bar.pbar',
|
||||
{'side': 'left', 'sticky': 'ns'})],
|
||||
'sticky': 'nswe'}),
|
||||
('Horizontal.progress_bar.label', {'sticky': ''})])
|
||||
style.configure('text.Horizontal.Tprogress_bar', text='0 %')
|
||||
|
||||
progress_bar = tkinter.ttk.Progressbar(root, style='text.Horizontal.Tprogress_bar', length=700,
|
||||
maximum=100, value=0, mode='determinate')
|
||||
progress_bar.pack(pady=10)
|
||||
|
||||
group2 = tkinter.LabelFrame(root, text='Status', padx=5, pady=5)
|
||||
group2.pack(padx=10, pady=10, fill='both', expand='yes')
|
||||
|
||||
textarea_status = tkinter.Text(group2, height=10, width=100)
|
||||
textarea_status.configure(state="disabled")
|
||||
textarea_status.pack()
|
||||
|
||||
options.xmp_gui = xmp_gui()
|
||||
options.xmp_gui.root = root
|
||||
options.xmp_gui.textarea_min_threshold = textarea_min_threshold
|
||||
options.xmp_gui.textarea_remove_path = textarea_remove_path
|
||||
options.xmp_gui.textarea_rename_conf = textarea_rename_conf
|
||||
options.xmp_gui.textarea_rename_cats = textarea_rename_cats
|
||||
options.xmp_gui.textarea_num_threads = textarea_num_threads
|
||||
options.xmp_gui.textarea_status = textarea_status
|
||||
options.xmp_gui.progress_bar = progress_bar
|
||||
options.xmp_gui.style = style
|
||||
|
||||
root.mainloop()
|
||||
|
||||
|
||||
#%% Interactive/test driver
|
||||
|
||||
if False:
|
||||
|
||||
#%%
|
||||
|
||||
options = xmp_integration_options()
|
||||
options.input_file = r"C:\temp\demo_images\ssmini_xmp_test_orig\ssmini.mdv4.json"
|
||||
options.image_folder = r"C:\temp\demo_images\ssmini_xmp_test"
|
||||
options.remove_path = 'my_images/'
|
||||
process_input_data(options)
|
||||
|
||||
|
||||
#%% Command-line driver
|
||||
|
||||
def args_to_object(args,obj):
|
||||
"""
|
||||
Copy all fields from the argparse table "args" to the object "obj"
|
||||
"""
|
||||
for n, v in inspect.getmembers(args):
|
||||
if not n.startswith('_'):
|
||||
setattr(obj, n, v)
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--input_file', help = 'Path to the MegaDetector .json file', default=None)
|
||||
parser.add_argument('--image_folder', help = 'Path to the folder containing images', default=None)
|
||||
parser.add_argument('--min_threshold', help = 'Minimum detection confidence that will be treated as a detection event', default=None)
|
||||
parser.add_argument('--remove_path', help = 'Prefix to remove from image paths in the .json file', default=None)
|
||||
parser.add_argument('--rename_conf', help = 'Below this confidence level, images will be renamed for manual check', default=None)
|
||||
parser.add_argument('--rename_cat', help = 'Category (or comma-delimited categories) to apply renaming behavior to', default=None)
|
||||
parser.add_argument('--num_threads', help = 'Number of threads to use for image processing', default=1)
|
||||
parser.add_argument('--gui', help = 'Run in GUI mode', action='store_true')
|
||||
|
||||
options = xmp_integration_options()
|
||||
args = parser.parse_args()
|
||||
args_to_object(args,options)
|
||||
|
||||
if options.gui:
|
||||
assert options.input_file is None, 'Command-line argument specified in GUI mode'
|
||||
assert options.image_folder is None, 'Command-line argument specified in GUI mode'
|
||||
assert options.min_threshold is None, 'Command-line argument specified in GUI mode'
|
||||
assert options.remove_path is None, 'Command-line argument specified in GUI mode'
|
||||
assert options.rename_conf is None, 'Command-line argument specified in GUI mode'
|
||||
assert options.rename_cat is None, 'Command-line argument specified in GUI mode'
|
||||
assert options.num_threads == 1, 'Command-line argument specified in GUI mode'
|
||||
create_gui(options)
|
||||
else:
|
||||
process_input_data(options)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
main()
|
|
@ -1,70 +0,0 @@
|
|||
# MegaDetector/eMammal integration app: introduction
|
||||
|
||||
This app takes as input an output file from the AI for Earth MegaDetector batch API and transfers those annotations to the eMammal desktop app running on the local machine. This is very very very very beta, so if you're interested in trying this out, we recommend that you <a href="mailto:cameratraps@lila.science">email us</a>!
|
||||
|
||||
We have also worked with a number of eMammal users to use MegaDetector in a somewhat less elegant way, specifically to process all their images before they ever get to the eMammal desktop app, and use the MegaDetector results move all the empty images into a separate folder, which users can then either upload to a separate deployment or not upload at all. We can do the same with human/vehicle images. If you're interested in trying this approach out, also <a href="mailto:cameratraps@lila.science">email us</a>!
|
||||
|
||||
# Downloading the eMammal integration app
|
||||
|
||||
Download from <a href="https://lilablobssc.blob.core.windows.net/models/apps/megadetector-eMammal-integration-app.1.00.zip">here</a>.
|
||||
|
||||
# Downloading the eMammal desktop app
|
||||
|
||||
If you're reading this, you probably already have the eMammal desktop app, and we're not going to post a link here, but the installer you downloaded should look something like "EMammal_Desktop-0.9.32.exe". 0.9.32 is the most recent version we've tested our integration tool against.
|
||||
|
||||
User-facing documentation for the eMammal desktop app (for a slightly different version, but close) is available here:
|
||||
|
||||
* <https://emammal.si.edu/content/emammal-training><br/>
|
||||
* <https://www.youtube.com/watch?v=3x4JwHEMtFg&feature=emb_logo>
|
||||
|
||||
If you face any issues during installation of the eMammal desktop app, refer to [eMammal-Client-App-Troubleshooting.md](eMammal-Client-App-Troubleshooting.md).
|
||||
|
||||
# Using the eMammal integration app
|
||||
|
||||
## Run the eMammal desktop app and load images
|
||||
|
||||
1. Run the eMammal desktop app, click "Load New Photos", then select the project, sub-project, and deployment. It doesn't matter which subj-project/deployment you select; you won't be pushing anything to the cloud during testing.
|
||||
|
||||
2. Load the images into the eMammal desktop app by selecting a folder, which eMammal will recursively search for images. eMammal will remember the path of each image relative to the base folder you pick, and these relative paths need to match the .json file. So if your .json file has images like:
|
||||
|
||||
`a/b/c/d/image0001.jpg`
|
||||
|
||||
You should make sure that the "a" folder is somewhere all by itself, and select the folder above it, even if all the images are way down in the "c" folder. Of course you can also manipulate the .json file to match, but one way or another they need to line up.
|
||||
|
||||
3. Leave the app running.
|
||||
|
||||
## Run the AI for Earth eMammal integration app
|
||||
|
||||
1. Run the AI for Earth eMammal integration app.
|
||||
|
||||
2. Select the .json detection file for the images you loaded above.
|
||||
|
||||
3. Select the eMammal project and deployment you selected above.
|
||||
|
||||
<img src="images/eMammal-integration-app-project-details.jpg" width="500"><br/>
|
||||
|
||||
4. Click "next", then in the next screen for <i>Category Mapping</i>, select the eMammal categories from the drop-down list to which you want to match each of the four MegaDetector categories ("Animal", "Person", "Vehicle", and "Blank"). These will by default be mapped to the eMammal categories "Unknown Animal", "Homo sapiens", "Vehicle", and "No Animal", respectively.
|
||||
|
||||
<img src="images/eMammal-integration-app-category-mapping.jpg" width="500"><br/>
|
||||
|
||||
5. Click "next" to add annotations to the eMammal database. Once all the annotations have been added to the database, you will see a message confirming the successful deployment, asking you to close and re-open the eMammal desktop app (step (1) below).
|
||||
|
||||
<img src="images/successful-deployment.jpg" width="500"></br>
|
||||
|
||||
6. Once you've closed and re-opened the eMammal desktop app, you can click the "verify" button to confirm that all of the image assignments worked correctly:
|
||||
|
||||
<img src="images/verified-push.jpg" width="500"></br>
|
||||
|
||||
## View annotations in the eMammal desktop app
|
||||
|
||||
1. Close and re-open the eMammal desktop app
|
||||
|
||||
2. Sign in and and click "Load New Photos"
|
||||
|
||||
3. Select the same project and deployment you selected above
|
||||
|
||||
4. Click "continue", then in the next window click "Save Deployment Info". In the main window of the eMammal app, annotations for the images should appear reflecting the maximum confidence value for each image in a sequence.
|
||||
|
||||
5. Play around with the annotations, but - if you're an AI for Earth person reading this during development - <b>do not click "upload"</b>; we have been asked not to push annotations to the test deployment. It won't be catastrophic if you do, but we said we wouldn't. If you're an eMammal user, by all means, upload away!
|
||||
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
<Application x:Class="eMammal_integration_application.App"
|
||||
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
|
||||
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
|
||||
xmlns:local="clr-namespace:eMammal_integration_application"
|
||||
StartupUri="eMammalIntegrationWindow.xaml">
|
||||
<Application.Resources>
|
||||
|
||||
</Application.Resources>
|
||||
</Application>
|
|
@ -1,17 +0,0 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Configuration;
|
||||
using System.Data;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using System.Windows;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
/// <summary>
|
||||
/// Interaction logic for App.xaml
|
||||
/// </summary>
|
||||
public partial class App : Application
|
||||
{
|
||||
}
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
public class Category
|
||||
{
|
||||
public int blank { get; set; }
|
||||
public int animal { get; set; }
|
||||
public int person { get; set; }
|
||||
public int vehicle { get; set; }
|
||||
}
|
||||
}
|
|
@ -1,131 +0,0 @@
|
|||
using System;
|
||||
using System.Drawing;
|
||||
using System.Threading;
|
||||
using System.Windows;
|
||||
using System.Windows.Controls;
|
||||
using System.Windows.Media;
|
||||
using System.Windows.Threading;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
public class Common
|
||||
{
|
||||
public static void CheckConnection(eMammalIntegrationWindow window, bool loadProject = false)
|
||||
{
|
||||
eMammalMySQLOps db = new eMammalMySQLOps();
|
||||
|
||||
bool isConnectionOpen = false;
|
||||
while (isConnectionOpen == false)
|
||||
{
|
||||
Thread.Sleep(200);
|
||||
isConnectionOpen = db.OpenConnectionIfNotOpen(true);
|
||||
}
|
||||
window.Dispatcher.BeginInvoke(new Action(() =>
|
||||
{
|
||||
Common.SetMessage(window, Constants.DATABASE_AVAILABLE, false, false);
|
||||
|
||||
window.Tab.Visibility = Visibility.Visible;
|
||||
|
||||
if (loadProject)
|
||||
window.Loadproject();
|
||||
|
||||
window.Tab.SelectedIndex = 0;
|
||||
window.Tab.IsEnabled = true;
|
||||
|
||||
window.IsEnabled = true;
|
||||
|
||||
window.ButtonBack.Visibility = Visibility.Hidden;
|
||||
window.ReactivateButton(window.ButtonNext);
|
||||
window.ReactivateButton(window.ButtonBrowse);
|
||||
|
||||
}));
|
||||
}
|
||||
public static void SetMessage(eMammalIntegrationWindow window, string msg, bool isError = false, bool showMessageBox = true)
|
||||
{
|
||||
//window.Visibility = Visibility.Visible;
|
||||
//window.TextBlockInfo.Text = msg;
|
||||
//TextBlock.Text = msg;
|
||||
|
||||
window.TextBlockInfo.Dispatcher.Invoke(() => window.TextBlockInfo.Visibility = Visibility.Visible, DispatcherPriority.Background);
|
||||
window.TextBlockInfo.Dispatcher.Invoke(() => window.TextBlockInfo.Text = msg, DispatcherPriority.Normal);
|
||||
|
||||
if (isError)
|
||||
window.Foreground = new SolidColorBrush(Colors.Red);
|
||||
else
|
||||
window.Foreground = new SolidColorBrush(Colors.Blue);
|
||||
|
||||
|
||||
if (showMessageBox)
|
||||
SetMessageBox(msg, isError);
|
||||
|
||||
}
|
||||
|
||||
public static void SetMessageBox(string msg, bool error = false)
|
||||
{
|
||||
//CustomMessageBox w = new CustomMessageBox();
|
||||
//w.LabelInfo.Content = msg;
|
||||
//w.ShowDialog();
|
||||
if (error)
|
||||
MessageBox.Show(msg, "", MessageBoxButton.OK,
|
||||
MessageBoxImage.Error,
|
||||
MessageBoxResult.OK,
|
||||
MessageBoxOptions.DefaultDesktopOnly);
|
||||
else
|
||||
MessageBox.Show(msg, "", MessageBoxButton.OK,
|
||||
MessageBoxImage.Information,
|
||||
MessageBoxResult.OK,
|
||||
MessageBoxOptions.DefaultDesktopOnly);
|
||||
}
|
||||
public static void ShowProgress(eMammalIntegrationWindow window, string msg, int progressCount,
|
||||
bool isLast = true, bool showProgressBar = true)
|
||||
{
|
||||
window.LabelProgress.Content = msg;
|
||||
|
||||
window.LabelProgress.Dispatcher.Invoke(() =>
|
||||
window.LabelProgress.Visibility = Visibility.Visible, DispatcherPriority.Background);
|
||||
|
||||
if (showProgressBar)
|
||||
{
|
||||
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
window.ProgressbarUpdateProgress.Visibility = Visibility.Visible, DispatcherPriority.Background);
|
||||
}
|
||||
|
||||
if (isLast)
|
||||
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
window.ProgressbarUpdateProgress.Value = progressCount, DispatcherPriority.Normal);
|
||||
else
|
||||
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
window.ProgressbarUpdateProgress.Value = progressCount, DispatcherPriority.Background);
|
||||
|
||||
|
||||
}
|
||||
public static void HideProgress(eMammalIntegrationWindow window)
|
||||
{
|
||||
window.LabelProgress.Content = "";
|
||||
window.LabelProgress.Visibility = Visibility.Hidden;
|
||||
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
window.ProgressbarUpdateProgress.Visibility = Visibility.Hidden, DispatcherPriority.Normal);
|
||||
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
window.ProgressbarUpdateProgress.Value = 0, DispatcherPriority.Background);
|
||||
}
|
||||
public static void delay(int maxCount = 1000000)
|
||||
{
|
||||
int count = 0;
|
||||
while (count < 1000000)
|
||||
count++;
|
||||
}
|
||||
public static int GetShowProgressCount(int showProgressCount, int totalImages)
|
||||
{
|
||||
if (totalImages < 10)
|
||||
showProgressCount = 1;
|
||||
|
||||
else if (totalImages > 1000 && totalImages < 100000)
|
||||
showProgressCount = 100;
|
||||
|
||||
else if (totalImages > 100000)
|
||||
showProgressCount = 1000;
|
||||
|
||||
return showProgressCount;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Security.RightsManagement;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
static class Constants
|
||||
{
|
||||
// Category constants
|
||||
public const string animal = "1";
|
||||
public const string person = "2";
|
||||
public const string vehicle = "3";
|
||||
|
||||
// Message constants
|
||||
public const string DATABASE_CONNECTION_ERROR = "Cannot connect to the eMammal database. Please ensure that you have opened the eMammal app and you have logged into the app. " +
|
||||
"Once you opened the eMammal application, this application will automatically refresh.";
|
||||
//public const string DATABASE_CONNECTION_ERROR = "Cannot connect to the eMammal database. Please ensure that you have opened the eMammal app and you have logged into the app. ";
|
||||
|
||||
|
||||
public const string NO_JSON_FILE_ERROR = "Please select a JSON detections file";
|
||||
public const string DATABASE_AVAILABLE = "Application is now able to connect to the eMammal database";
|
||||
|
||||
//log messages
|
||||
public const string LOG_MESSAGE_APP_CONNECTED_TO_DATABASE = "App successfully connected to the eMammal database";
|
||||
public const string LOG_MESSAGE_PROJECT_LOADED = "Projects loaded";
|
||||
public const string LOG_APP_COULD_NOT_CONNECT_TO_DATABASE = "App could not connect to the eMammal database";
|
||||
public const string LOG_APP_CLOSING = "App Closing";
|
||||
public const string LOG_CLOSING_OPEN_DATABASE_CONNECTION = "Closing open database connection";
|
||||
public const string LOG_DATABASE_CONNECTION_NOT_OPEN = "Database connection not open";
|
||||
public const string LOG_ERROR_WHILE_CLOSING_DATABASE_CONNECTION = "Error occurred while trying to close database connection";
|
||||
public const string LOG_OPEN_CLOSED_DATABASE_CONNECTION = "Opening closed connection";
|
||||
public const string LOG_OPENING_CLOSED_DATABASE_CONNECTION_SUCCESSFULL = "Opening closed database connection was successfull";
|
||||
public const string LOG_ERROR_WHILE_OPENING_DATABASE_CONNECTION = "Error occurred while opening database connection";
|
||||
public const string LOG_ADDING_UNIQUE_KEY_CONSTRAINT = "Adding unique key constraint";
|
||||
public const string LOG_CHECKING_IF_UNIQUE_KEY_ALREADY_EXISTS = "Checking if unique key already exists in the database";
|
||||
public const string LOG_UNIQUE_KEY_ALREADY_EXISTS = "Unique key already exists in the database";
|
||||
public const string LOG_START_PROCESSING_IMAGES = "Starting image processing";
|
||||
public const string LOG_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB = "Getting image sequence data from the database";
|
||||
public const string LOG_COULD_NOT_RETRIEVE_IMAGE_SEQUENCES_FROM_DATABASE = "Could not retrive image sequences from the database";
|
||||
public const string LOG_NUM_IMAGE_SEQUENCES = "Number of image sequences returned from DB: ";
|
||||
public const string LOG_ITERATING_IMAGES_IN_JSON_FILE = "Iterating through the images in the JSON file";
|
||||
|
||||
//Progress messages
|
||||
public const string PROCESSING_IMAGES = "Processing images...";
|
||||
public const string PROGRESS_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB = "Getting image sequence data from the database";
|
||||
public const string PROGRESS_UPDATING_ANNOTATIONS_IN_DB = "Updating annotations in the database";
|
||||
public const string PROGRESS_CONTINUING_WITH_NEXT_IMAGE = "Continuing with next image";
|
||||
|
||||
//Log and Progress messages
|
||||
public const string INSERTING_DETECTIONS = "Inserting detections";
|
||||
public const string INSERTING_REMAINING_DETECTIONS = "Inserting remaining detections";
|
||||
|
||||
public const string ANNOTATIONS_ADDED_FOR_ALL_IMAGES = "Annotations added for all images in eMammal database";
|
||||
|
||||
//Error messages
|
||||
public const string ERROR_WHILE_VERIFYING_ANNOTATIONS_IN_DB = "Error occurred while verifying annotations in eMammal database";
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -1,17 +0,0 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
/// <summary>
|
||||
/// Class for generating image sequence annotation list
|
||||
/// </summary>
|
||||
public class ImageTaxa
|
||||
{
|
||||
public int sequenceId { get; set; }
|
||||
public int projectTaxaId { get; set; }
|
||||
}
|
||||
}
|
|
@ -1,52 +0,0 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Text;
|
||||
using Newtonsoft.Json;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
public class JsonData
|
||||
{
|
||||
public Info info { get; set; }
|
||||
public Dictionary<string, string> detection_categories { get; set; }
|
||||
public Classification_Categories classification_categories { get; set; }
|
||||
public List<Image> images { get; set; }
|
||||
|
||||
}
|
||||
|
||||
public class Info
|
||||
{
|
||||
public string detector { get; set; }
|
||||
public string detection_completion_time { get; set; }
|
||||
public string format_version { get; set; }
|
||||
}
|
||||
|
||||
public class Detection_Categories
|
||||
{
|
||||
public string _1 { get; set; }
|
||||
public string _2 { get; set; }
|
||||
}
|
||||
|
||||
public class Classification_Categories
|
||||
{
|
||||
}
|
||||
|
||||
public class Image
|
||||
{
|
||||
public Detection[] detections { get; set; }
|
||||
public string file { get; set; }
|
||||
public dynamic max_detection_conf { get; set; }
|
||||
}
|
||||
|
||||
public class Detection
|
||||
{
|
||||
[JsonProperty(Order = 1)]
|
||||
public string category { get; set; }
|
||||
|
||||
[JsonProperty(Order = 2)]
|
||||
public dynamic conf { get; set; }
|
||||
|
||||
[JsonProperty(Order = 3)]
|
||||
public float[] bbox { get; set; }
|
||||
}
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
using System.Reflection;
|
||||
using System.Resources;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Windows;
|
||||
|
||||
// General Information about an assembly is controlled through the following
|
||||
// set of attributes. Change these attribute values to modify the information
|
||||
// associated with an assembly.
|
||||
[assembly: AssemblyTitle("eMammal-integration-application")]
|
||||
[assembly: AssemblyDescription("")]
|
||||
[assembly: AssemblyConfiguration("")]
|
||||
[assembly: AssemblyCompany("")]
|
||||
[assembly: AssemblyProduct("eMammal-integration-application")]
|
||||
[assembly: AssemblyCopyright("Copyright © 2020")]
|
||||
[assembly: AssemblyTrademark("")]
|
||||
[assembly: AssemblyCulture("")]
|
||||
|
||||
// Setting ComVisible to false makes the types in this assembly not visible
|
||||
// to COM components. If you need to access a type in this assembly from
|
||||
// COM, set the ComVisible attribute to true on that type.
|
||||
[assembly: ComVisible(false)]
|
||||
|
||||
//In order to begin building localizable applications, set
|
||||
//<UICulture>CultureYouAreCodingWith</UICulture> in your .csproj file
|
||||
//inside a <PropertyGroup>. For example, if you are using US english
|
||||
//in your source files, set the <UICulture> to en-US. Then uncomment
|
||||
//the NeutralResourceLanguage attribute below. Update the "en-US" in
|
||||
//the line below to match the UICulture setting in the project file.
|
||||
|
||||
//[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.Satellite)]
|
||||
|
||||
|
||||
[assembly: ThemeInfo(
|
||||
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
|
||||
//(used if a resource is not found in the page,
|
||||
// or application resource dictionaries)
|
||||
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
|
||||
//(used if a resource is not found in the page,
|
||||
// app, or any theme specific resource dictionaries)
|
||||
)]
|
||||
|
||||
|
||||
// Version information for an assembly consists of the following four values:
|
||||
//
|
||||
// Major Version
|
||||
// Minor Version
|
||||
// Build Number
|
||||
// Revision
|
||||
//
|
||||
// You can specify all the values or you can default the Build and Revision Numbers
|
||||
// by using the '*' as shown below:
|
||||
// [assembly: AssemblyVersion("1.0.*")]
|
||||
[assembly: AssemblyVersion("1.0.0.0")]
|
||||
[assembly: AssemblyFileVersion("1.0.0.0")]
|
|
@ -1,71 +0,0 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Runtime Version:4.0.30319.42000
|
||||
//
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace eMammal_integration_application.Properties
|
||||
{
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// A strongly-typed resource class, for looking up localized strings, etc.
|
||||
/// </summary>
|
||||
// This class was auto-generated by the StronglyTypedResourceBuilder
|
||||
// class via a tool like ResGen or Visual Studio.
|
||||
// To add or remove a member, edit your .ResX file then rerun ResGen
|
||||
// with the /str option, or rebuild your VS project.
|
||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
|
||||
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||
internal class Resources
|
||||
{
|
||||
|
||||
private static global::System.Resources.ResourceManager resourceMan;
|
||||
|
||||
private static global::System.Globalization.CultureInfo resourceCulture;
|
||||
|
||||
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
|
||||
internal Resources()
|
||||
{
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns the cached ResourceManager instance used by this class.
|
||||
/// </summary>
|
||||
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
|
||||
internal static global::System.Resources.ResourceManager ResourceManager
|
||||
{
|
||||
get
|
||||
{
|
||||
if ((resourceMan == null))
|
||||
{
|
||||
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("eMammal_integration_application.Properties.Resources", typeof(Resources).Assembly);
|
||||
resourceMan = temp;
|
||||
}
|
||||
return resourceMan;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Overrides the current thread's CurrentUICulture property for all
|
||||
/// resource lookups using this strongly typed resource class.
|
||||
/// </summary>
|
||||
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
|
||||
internal static global::System.Globalization.CultureInfo Culture
|
||||
{
|
||||
get
|
||||
{
|
||||
return resourceCulture;
|
||||
}
|
||||
set
|
||||
{
|
||||
resourceCulture = value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,117 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<root>
|
||||
<!--
|
||||
Microsoft ResX Schema
|
||||
|
||||
Version 2.0
|
||||
|
||||
The primary goals of this format is to allow a simple XML format
|
||||
that is mostly human readable. The generation and parsing of the
|
||||
various data types are done through the TypeConverter classes
|
||||
associated with the data types.
|
||||
|
||||
Example:
|
||||
|
||||
... ado.net/XML headers & schema ...
|
||||
<resheader name="resmimetype">text/microsoft-resx</resheader>
|
||||
<resheader name="version">2.0</resheader>
|
||||
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
|
||||
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
|
||||
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
|
||||
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
|
||||
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
|
||||
<value>[base64 mime encoded serialized .NET Framework object]</value>
|
||||
</data>
|
||||
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
|
||||
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
|
||||
<comment>This is a comment</comment>
|
||||
</data>
|
||||
|
||||
There are any number of "resheader" rows that contain simple
|
||||
name/value pairs.
|
||||
|
||||
Each data row contains a name, and value. The row also contains a
|
||||
type or mimetype. Type corresponds to a .NET class that support
|
||||
text/value conversion through the TypeConverter architecture.
|
||||
Classes that don't support this are serialized and stored with the
|
||||
mimetype set.
|
||||
|
||||
The mimetype is used for serialized objects, and tells the
|
||||
ResXResourceReader how to depersist the object. This is currently not
|
||||
extensible. For a given mimetype the value must be set accordingly:
|
||||
|
||||
Note - application/x-microsoft.net.object.binary.base64 is the format
|
||||
that the ResXResourceWriter will generate, however the reader can
|
||||
read any of the formats listed below.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.binary.base64
|
||||
value : The object must be serialized with
|
||||
: System.Serialization.Formatters.Binary.BinaryFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.soap.base64
|
||||
value : The object must be serialized with
|
||||
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
|
||||
: and then encoded with base64 encoding.
|
||||
|
||||
mimetype: application/x-microsoft.net.object.bytearray.base64
|
||||
value : The object must be serialized into a byte array
|
||||
: using a System.ComponentModel.TypeConverter
|
||||
: and then encoded with base64 encoding.
|
||||
-->
|
||||
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
|
||||
<xsd:element name="root" msdata:IsDataSet="true">
|
||||
<xsd:complexType>
|
||||
<xsd:choice maxOccurs="unbounded">
|
||||
<xsd:element name="metadata">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" />
|
||||
<xsd:attribute name="type" type="xsd:string" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="assembly">
|
||||
<xsd:complexType>
|
||||
<xsd:attribute name="alias" type="xsd:string" />
|
||||
<xsd:attribute name="name" type="xsd:string" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="data">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
|
||||
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
|
||||
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
<xsd:element name="resheader">
|
||||
<xsd:complexType>
|
||||
<xsd:sequence>
|
||||
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
|
||||
</xsd:sequence>
|
||||
<xsd:attribute name="name" type="xsd:string" use="required" />
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:choice>
|
||||
</xsd:complexType>
|
||||
</xsd:element>
|
||||
</xsd:schema>
|
||||
<resheader name="resmimetype">
|
||||
<value>text/microsoft-resx</value>
|
||||
</resheader>
|
||||
<resheader name="version">
|
||||
<value>2.0</value>
|
||||
</resheader>
|
||||
<resheader name="reader">
|
||||
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
<resheader name="writer">
|
||||
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
|
||||
</resheader>
|
||||
</root>
|
|
@ -1,30 +0,0 @@
|
|||
//------------------------------------------------------------------------------
|
||||
// <auto-generated>
|
||||
// This code was generated by a tool.
|
||||
// Runtime Version:4.0.30319.42000
|
||||
//
|
||||
// Changes to this file may cause incorrect behavior and will be lost if
|
||||
// the code is regenerated.
|
||||
// </auto-generated>
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
namespace eMammal_integration_application.Properties
|
||||
{
|
||||
|
||||
|
||||
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
|
||||
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "11.0.0.0")]
|
||||
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase
|
||||
{
|
||||
|
||||
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
|
||||
|
||||
public static Settings Default
|
||||
{
|
||||
get
|
||||
{
|
||||
return defaultInstance;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +0,0 @@
|
|||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<SettingsFile xmlns="uri:settings" CurrentProfile="(Default)">
|
||||
<Profiles>
|
||||
<Profile Name="(Default)" />
|
||||
</Profiles>
|
||||
<Settings />
|
||||
</SettingsFile>
|
|
@ -1,34 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<configuration>
|
||||
<startup>
|
||||
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.7.2" />
|
||||
</startup>
|
||||
<appSettings configProtectionProvider="RsaProtectedConfigurationProvider">
|
||||
<EncryptedData Type="http://www.w3.org/2001/04/xmlenc#Element"
|
||||
xmlns="http://www.w3.org/2001/04/xmlenc#">
|
||||
<EncryptionMethod Algorithm="http://www.w3.org/2001/04/xmlenc#aes256-cbc" />
|
||||
<KeyInfo xmlns="http://www.w3.org/2000/09/xmldsig#">
|
||||
<EncryptedKey xmlns="http://www.w3.org/2001/04/xmlenc#">
|
||||
<EncryptionMethod Algorithm="http://www.w3.org/2001/04/xmlenc#rsa-oaep-mgf1p" />
|
||||
<KeyInfo xmlns="http://www.w3.org/2000/09/xmldsig#">
|
||||
<KeyName>Rsa Key</KeyName>
|
||||
</KeyInfo>
|
||||
<CipherData>
|
||||
<CipherValue>V8N7vxvIXIVMARjoh9X2tVnXfPmUZVPLPnTIM+vwAy2R4cCM5SUBvqAma7LswAXPbrTBBHpgmUDbA5gaHhB9X0chT/SbvOgaST+OUWCV6h4T+fJbk3inh4JO+XE/jDcrYJXxkVhp3B5uNyuJVWRk/2SahliUA1Hp3AWZzBDhPlOSztgJYtqBCZIHuj9QIajstexXVC7CSpczPrfNy3Tb4ZHWt86L/xLFiIDE/r01gfxo/QadQClD6/7SQrvZRcPVTkNegjXkwgcZOz6fnVQeyj9O/yILOHk3HjO3vArfIk/RWtY5JVyxRmw4aRbV6ej1mhxopr0K8ZJSH6DwZLYWKg==</CipherValue>
|
||||
</CipherData>
|
||||
</EncryptedKey>
|
||||
</KeyInfo>
|
||||
<CipherData>
|
||||
<CipherValue>Aj23BmHyt8L+8RaGZ3RmhuQ3s5+ua2pPCp1/uoNksaAePtnvvuNGBjWLX6m9p8KbJVwsyXJBjrS9jk19TPc4gJVRLJCZvXC4iVB45egUb9bIoB0E/erRykgskejMmVyjmS8lehIhO99oOezM8X8kvgXb+OMTQ/zG79T3k4GD8uUxqgfPgcWKD34Zq2rs35foZbg85mbIb8GPXM/P6I0F9h3LhItZodwq7+xtWSTmc2kSPt5wZk0UqIaGAEuh4fMrn/MKUpVsDkgkGmFiQMQoKg==</CipherValue>
|
||||
</CipherData>
|
||||
</EncryptedData>
|
||||
</appSettings>
|
||||
<runtime>
|
||||
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
|
||||
<dependentAssembly>
|
||||
<assemblyIdentity name="System.Buffers" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
|
||||
<bindingRedirect oldVersion="0.0.0.0-4.0.2.0" newVersion="4.0.2.0" />
|
||||
</dependentAssembly>
|
||||
</assemblyBinding>
|
||||
</runtime>
|
||||
</configuration>
|
|
@ -1,182 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
|
||||
<PropertyGroup>
|
||||
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
|
||||
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
|
||||
<ProjectGuid>{D09FFD0B-7666-4974-86F3-7E130EC56E48}</ProjectGuid>
|
||||
<OutputType>WinExe</OutputType>
|
||||
<RootNamespace>eMammal_integration_application</RootNamespace>
|
||||
<AssemblyName>eMammal-integration-application</AssemblyName>
|
||||
<TargetFrameworkVersion>v4.7.2</TargetFrameworkVersion>
|
||||
<FileAlignment>512</FileAlignment>
|
||||
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
|
||||
<Deterministic>true</Deterministic>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugSymbols>true</DebugSymbols>
|
||||
<DebugType>full</DebugType>
|
||||
<Optimize>false</Optimize>
|
||||
<OutputPath>bin\Debug\</OutputPath>
|
||||
<DefineConstants>DEBUG;TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
|
||||
<PlatformTarget>AnyCPU</PlatformTarget>
|
||||
<DebugType>pdbonly</DebugType>
|
||||
<Optimize>true</Optimize>
|
||||
<OutputPath>bin\Release\</OutputPath>
|
||||
<DefineConstants>TRACE</DefineConstants>
|
||||
<ErrorReport>prompt</ErrorReport>
|
||||
<WarningLevel>4</WarningLevel>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup>
|
||||
<ApplicationIcon>favicon.ico</ApplicationIcon>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Reference Include="BouncyCastle.Crypto">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\BouncyCastle.Crypto.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Google.Protobuf">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\Google.Protobuf.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="K4os.Compression.LZ4">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\K4os.Compression.LZ4.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="K4os.Compression.LZ4.Streams">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\K4os.Compression.LZ4.Streams.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="K4os.Hash.xxHash">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\K4os.Hash.xxHash.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="MySql.Data, Version=8.0.21.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL">
|
||||
<HintPath>packages\MySql.Data.8.0.21\lib\net452\MySql.Data.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Newtonsoft.Json, Version=12.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
|
||||
<HintPath>packages\Newtonsoft.Json.12.0.3\lib\net45\Newtonsoft.Json.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="NLog, Version=4.0.0.0, Culture=neutral, PublicKeyToken=5120e14c03d0593c, processorArchitecture=MSIL">
|
||||
<HintPath>packages\NLog.4.7.2\lib\net45\NLog.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Renci.SshNet">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\Renci.SshNet.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="Serilog">
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\Serilog.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System" />
|
||||
<Reference Include="System.Buffers, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Buffers.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.ComponentModel" />
|
||||
<Reference Include="System.ComponentModel.DataAnnotations" />
|
||||
<Reference Include="System.Configuration" />
|
||||
<Reference Include="System.Configuration.Install" />
|
||||
<Reference Include="System.Data" />
|
||||
<Reference Include="System.Drawing" />
|
||||
<Reference Include="System.Drawing.Design" />
|
||||
<Reference Include="System.IO.Compression" />
|
||||
<Reference Include="System.Management" />
|
||||
<Reference Include="System.Memory, Version=4.0.1.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Memory.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Numerics.Vectors, Version=4.1.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Numerics.Vectors.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Runtime.CompilerServices.Unsafe, Version=4.0.4.1, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
|
||||
<SpecificVersion>False</SpecificVersion>
|
||||
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Runtime.CompilerServices.Unsafe.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="System.Runtime.Serialization" />
|
||||
<Reference Include="System.ServiceModel" />
|
||||
<Reference Include="System.Transactions" />
|
||||
<Reference Include="System.Xml" />
|
||||
<Reference Include="Microsoft.CSharp" />
|
||||
<Reference Include="System.Core" />
|
||||
<Reference Include="System.Xml.Linq" />
|
||||
<Reference Include="System.Data.DataSetExtensions" />
|
||||
<Reference Include="System.Net.Http" />
|
||||
<Reference Include="System.Xaml">
|
||||
<RequiredTargetFramework>4.0</RequiredTargetFramework>
|
||||
</Reference>
|
||||
<Reference Include="Ubiety.Dns.Core, Version=2.2.1.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL">
|
||||
<HintPath>packages\MySql.Data.8.0.21\lib\net452\Ubiety.Dns.Core.dll</HintPath>
|
||||
</Reference>
|
||||
<Reference Include="WindowsBase" />
|
||||
<Reference Include="PresentationCore" />
|
||||
<Reference Include="PresentationFramework" />
|
||||
<Reference Include="Zstandard.Net, Version=1.1.7.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL">
|
||||
<HintPath>packages\MySql.Data.8.0.21\lib\net452\Zstandard.Net.dll</HintPath>
|
||||
</Reference>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ApplicationDefinition Include="App.xaml">
|
||||
<Generator>MSBuild:Compile</Generator>
|
||||
<SubType>Designer</SubType>
|
||||
</ApplicationDefinition>
|
||||
<Page Include="eMammalIntegrationWindow.xaml">
|
||||
<Generator>MSBuild:Compile</Generator>
|
||||
<SubType>Designer</SubType>
|
||||
</Page>
|
||||
<Compile Include="App.xaml.cs">
|
||||
<DependentUpon>App.xaml</DependentUpon>
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="Category.cs" />
|
||||
<Compile Include="Common.cs" />
|
||||
<Compile Include="Constants.cs" />
|
||||
<Compile Include="eMammalIntegration.cs" />
|
||||
<Compile Include="eMammalIntegrationWindow.xaml.cs">
|
||||
<DependentUpon>eMammalIntegrationWindow.xaml</DependentUpon>
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="eMammalMySQLOps.cs" />
|
||||
<Compile Include="ImageTaxa.cs" />
|
||||
<Compile Include="JsonData.cs" />
|
||||
<Compile Include="Properties\AssemblyInfo.cs">
|
||||
<SubType>Code</SubType>
|
||||
</Compile>
|
||||
<Compile Include="Properties\Resources.Designer.cs">
|
||||
<AutoGen>True</AutoGen>
|
||||
<DesignTime>True</DesignTime>
|
||||
<DependentUpon>Resources.resx</DependentUpon>
|
||||
</Compile>
|
||||
<Compile Include="Properties\Settings.Designer.cs">
|
||||
<AutoGen>True</AutoGen>
|
||||
<DependentUpon>Settings.settings</DependentUpon>
|
||||
<DesignTimeSharedInput>True</DesignTimeSharedInput>
|
||||
</Compile>
|
||||
<EmbeddedResource Include="Properties\Resources.resx">
|
||||
<Generator>ResXFileCodeGenerator</Generator>
|
||||
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
|
||||
</EmbeddedResource>
|
||||
<None Include="nlog.config">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Include="packages.config" />
|
||||
<None Include="Properties\Settings.settings">
|
||||
<Generator>SettingsSingleFileGenerator</Generator>
|
||||
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="App.config" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Resource Include="favicon.ico" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Resource Include="images\elephants-277329_1280.jpg" />
|
||||
<Resource Include="images\MS-AIforEarth.JPG" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
|
||||
</Project>
|
|
@ -1,25 +0,0 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.30309.148
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "eMammal-integration-application", "eMammal-integration-application.csproj", "{D09FFD0B-7666-4974-86F3-7E130EC56E48}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {10B34E61-E305-42E9-B53B-F1055C67A690}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
|
@ -1,314 +0,0 @@
|
|||
using NLog;
|
||||
using System.Data;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Windows;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
public class eMammalIntegration
|
||||
{
|
||||
|
||||
eMammalIntegrationWindow window;
|
||||
|
||||
Logger logger = LogManager.GetCurrentClassLogger();
|
||||
|
||||
// Category constants
|
||||
const string animal = "1";
|
||||
const string person = "2";
|
||||
const string vehicle = "3";
|
||||
|
||||
eMammalMySQLOps db;
|
||||
public eMammalIntegration(eMammalIntegrationWindow window)
|
||||
{
|
||||
this.window = window;
|
||||
db = new eMammalMySQLOps(window);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Create list of image sequence and annotations for bulk insertion
|
||||
/// Call function to insert or update annotations
|
||||
/// Update progress bar and message
|
||||
/// </summary>
|
||||
/// <param name="data"></param>
|
||||
/// <param name="deploymentId"></param>
|
||||
/// <param name="eMammalCategory"></param>
|
||||
public bool ProcessDetections(JsonData data, int deploymentId, string deploymentName, Category eMammalCategory)
|
||||
{
|
||||
|
||||
StringBuilder logImages = new StringBuilder();
|
||||
|
||||
int totalImages = data.images.Count();
|
||||
window.ProgressbarUpdateProgress.Maximum = totalImages;
|
||||
|
||||
logger.Info(Constants.LOG_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB);
|
||||
Common.ShowProgress(window, Constants.PROGRESS_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB, 1);
|
||||
|
||||
DataTable dtImageSequences = db.GetsequenceIDsfromDB(deploymentId);
|
||||
int imageSequenceCount = -1;
|
||||
if (dtImageSequences == null)
|
||||
{
|
||||
logger.Info(Constants.LOG_COULD_NOT_RETRIEVE_IMAGE_SEQUENCES_FROM_DATABASE);
|
||||
return false;
|
||||
}
|
||||
else
|
||||
imageSequenceCount = dtImageSequences.Rows.Count;
|
||||
|
||||
logger.Info(Constants.LOG_NUM_IMAGE_SEQUENCES + " " + dtImageSequences.Rows.Count.ToString());
|
||||
|
||||
if (imageSequenceCount == 0)
|
||||
{
|
||||
string msg = string.Format("The selected eMammal deployment {0} does not contain any images", deploymentName);
|
||||
logger.Info(msg);
|
||||
|
||||
Common.SetMessage(window,msg,true);
|
||||
return false;
|
||||
}
|
||||
|
||||
int showProgressCount = 10;
|
||||
showProgressCount = Common.GetShowProgressCount(showProgressCount, totalImages);
|
||||
|
||||
logger.Info(Constants.LOG_ITERATING_IMAGES_IN_JSON_FILE);
|
||||
|
||||
Common.ShowProgress(window, Constants.PROCESSING_IMAGES, 1);
|
||||
|
||||
// This variable will be set to true if there is atleast one matching image that matches the image (by name)
|
||||
// in eMammal database that is in the
|
||||
bool foundImage = false;
|
||||
int logCount = 0;
|
||||
int maxBulkInsertCount = 10000;
|
||||
int count = 0;
|
||||
int progressCount = 1;
|
||||
|
||||
bool recordsAdded = false;
|
||||
bool imageNotFoundProgressSet = false;
|
||||
|
||||
StringBuilder sql = db.GetBulkInsertInitialString();
|
||||
foreach (var image in data.images)
|
||||
{
|
||||
recordsAdded = false;
|
||||
|
||||
string filePath = image.file.Replace("/", "\\");
|
||||
string imageName = System.IO.Path.GetFileName(filePath);
|
||||
string imageplusLastFolderName = "";
|
||||
var folders = filePath.Split(System.IO.Path.DirectorySeparatorChar);
|
||||
|
||||
var detections = image.detections;
|
||||
float max_confidence = (float)image.max_detection_conf;
|
||||
|
||||
int currenteMammalCategory = eMammalCategory.blank;
|
||||
|
||||
logImages.Append(imageName + "\n");
|
||||
logCount++;
|
||||
LogProcessedImages(ref logImages, ref logCount);
|
||||
|
||||
if (folders.Length > 1)
|
||||
imageplusLastFolderName = folders[folders.Length - 2].ToString() + "_" + imageName;
|
||||
|
||||
int imageSequenceId = FindSequenceId(dtImageSequences, imageName, imageplusLastFolderName);
|
||||
progressCount++;
|
||||
|
||||
// if the image is not in the eMammal database continue to next image
|
||||
if (imageSequenceId == -1)
|
||||
{
|
||||
Common.ShowProgress(window, string.Format("image: {0} not found in deployment {1}",
|
||||
imageName, deploymentName), progressCount);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
foundImage = true;
|
||||
if (imageNotFoundProgressSet == true)
|
||||
Common.ShowProgress(window, Constants.PROGRESS_CONTINUING_WITH_NEXT_IMAGE,
|
||||
progressCount);
|
||||
}
|
||||
|
||||
if (detections.Count() == 0)
|
||||
{
|
||||
sql.AppendFormat("('{0}', '{1}', '{2}'),", imageSequenceId, currenteMammalCategory, 1);
|
||||
count++;
|
||||
}
|
||||
|
||||
if (progressCount % showProgressCount == 0)
|
||||
{
|
||||
if (totalImages > imageSequenceCount)
|
||||
|
||||
Common.ShowProgress(window, string.Format("Processed {0} images",
|
||||
progressCount.ToString(), totalImages.ToString()), progressCount);
|
||||
else
|
||||
Common.ShowProgress(window, string.Format("Processed {0} out of {1} images",
|
||||
progressCount.ToString(), totalImages.ToString()), progressCount);
|
||||
|
||||
}
|
||||
|
||||
EnumerateDetections(eMammalCategory, ref count, ref sql, detections, max_confidence, ref currenteMammalCategory, imageSequenceId);
|
||||
|
||||
if (count >= maxBulkInsertCount)
|
||||
{
|
||||
logger.Info("Inserting {0} detections", maxBulkInsertCount.ToString());
|
||||
count = 0;
|
||||
|
||||
bool success = db.BulkInsertAnnotations(sql);
|
||||
if (!success)
|
||||
return false;
|
||||
|
||||
sql = db.GetBulkInsertInitialString();
|
||||
recordsAdded = true;
|
||||
|
||||
Common.ShowProgress(window,
|
||||
string.Format("Inserting {0} detections", maxBulkInsertCount.ToString()),
|
||||
progressCount, false);
|
||||
}
|
||||
}
|
||||
if (logCount > 0)
|
||||
logger.Info(logImages.ToString());
|
||||
|
||||
// Add remaining detections
|
||||
if (!recordsAdded & foundImage)
|
||||
{
|
||||
Common.ShowProgress(window, Constants.PROGRESS_UPDATING_ANNOTATIONS_IN_DB, progressCount);
|
||||
db.BulkInsertAnnotations(sql);
|
||||
|
||||
progressCount++;
|
||||
|
||||
if (data.images.Count < maxBulkInsertCount)
|
||||
{
|
||||
logger.Info(Constants.INSERTING_DETECTIONS);
|
||||
Common.ShowProgress(window, Constants.INSERTING_DETECTIONS, progressCount);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.Info(Constants.INSERTING_REMAINING_DETECTIONS);
|
||||
Common.ShowProgress(window, Constants.INSERTING_REMAINING_DETECTIONS, progressCount);
|
||||
}
|
||||
}
|
||||
|
||||
// The deployment does not contain any images that is within the provided JSON file
|
||||
if (!foundImage)
|
||||
{
|
||||
logger.Info("No matching images found in " + deploymentName + " that match the image names in the provided JSON file");
|
||||
|
||||
Common.SetMessage(window, "No matching images found in " + deploymentName + " that match the image names in the provided JSON file", true);
|
||||
return false;
|
||||
}
|
||||
|
||||
logger.Info(Constants.ANNOTATIONS_ADDED_FOR_ALL_IMAGES);
|
||||
|
||||
//ShowProgress((int)window.ProgressbarUpdateProgress.Maximum, Constants.ANNOTATIONS_ADDED_FOR_ALL_IMAGES, true, true);
|
||||
Common.ShowProgress(window, Constants.ANNOTATIONS_ADDED_FOR_ALL_IMAGES, (int)window.ProgressbarUpdateProgress.Maximum);
|
||||
Common.delay();
|
||||
|
||||
db.CloseConnection();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private void LogProcessedImages(ref StringBuilder logImages, ref int logCount)
|
||||
{
|
||||
if (logCount > 100)
|
||||
{
|
||||
logger.Info(logImages.ToString());
|
||||
logImages = new StringBuilder();
|
||||
logCount = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Enumerate detections and udpate sql query
|
||||
/// </summary>
|
||||
/// <param name="eMammalCategory"></param>
|
||||
/// <param name="count"></param>
|
||||
/// <param name="sql"></param>
|
||||
/// <param name="detections"></param>
|
||||
/// <param name="max_confidence"></param>
|
||||
/// <param name="currenteMammalCategory"></param>
|
||||
/// <param name="imageSequenceId"></param>
|
||||
private static void EnumerateDetections(Category eMammalCategory, ref int count, ref StringBuilder sql,
|
||||
Detection[] detections, float max_confidence, ref int currenteMammalCategory, int imageSequenceId)
|
||||
{
|
||||
foreach (var d in detections)
|
||||
{
|
||||
// TODO: confirm json file is reading in detections correctly
|
||||
if ((float)d.conf != max_confidence)
|
||||
continue;
|
||||
|
||||
// map to selected eMammal categories
|
||||
if (d.category == animal)
|
||||
currenteMammalCategory = eMammalCategory.animal;
|
||||
|
||||
else if (d.category == person)
|
||||
currenteMammalCategory = eMammalCategory.person;
|
||||
|
||||
else if (d.category == vehicle)
|
||||
currenteMammalCategory = eMammalCategory.vehicle;
|
||||
|
||||
sql.AppendFormat("('{0}', '{1}', '{2}'),", imageSequenceId, currenteMammalCategory, 1);
|
||||
|
||||
count++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private int FindSequenceId(DataTable imageSequences, string imageName, string lastFolderName)
|
||||
{
|
||||
foreach (DataRow row in imageSequences.Rows)
|
||||
{
|
||||
if (row["raw_name"].ToString() == imageName)
|
||||
return (int)row["image_sequence_id"];
|
||||
|
||||
if (row["raw_name"].ToString() == lastFolderName)
|
||||
return (int)row["image_sequence_id"];
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
public bool VerifyAnnotations(int deploymentId)
|
||||
{
|
||||
DataTable dt = db.GetImagesForDeployment(deploymentId);
|
||||
|
||||
StringBuilder logInfo = new StringBuilder();
|
||||
|
||||
int count = 0;
|
||||
int totalImages = dt.Rows.Count;
|
||||
window.ProgressbarUpdateProgress.Maximum = dt.Rows.Count;
|
||||
int progressCount = 0;
|
||||
int showProgressCount = 10;
|
||||
|
||||
window.TextBlockInfo.Visibility = Visibility.Hidden;
|
||||
|
||||
showProgressCount = Common.GetShowProgressCount(showProgressCount, totalImages);
|
||||
|
||||
foreach (DataRow row in dt.Rows)
|
||||
{
|
||||
progressCount++;
|
||||
string annotation = row[0].ToString() + " - " + row[3].ToString();
|
||||
|
||||
window.RichTextBoxResults.AppendText(annotation + "\n");
|
||||
|
||||
logInfo.Append(annotation);
|
||||
|
||||
count++;
|
||||
|
||||
if (count > showProgressCount)
|
||||
{
|
||||
logger.Info(logInfo.ToString());
|
||||
logInfo = new StringBuilder();
|
||||
count = 0;
|
||||
|
||||
Common.ShowProgress(window,
|
||||
string.Format("Enumerating {0} annotations out of {1}",
|
||||
progressCount.ToString(),
|
||||
totalImages.ToString()),
|
||||
progressCount);
|
||||
}
|
||||
}
|
||||
|
||||
if (logInfo.Length > 0)
|
||||
{
|
||||
logger.Info(logInfo.ToString());
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,95 +0,0 @@
|
|||
<Window x:Class="eMammal_integration_application.eMammalIntegrationWindow"
|
||||
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
|
||||
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
|
||||
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
|
||||
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
|
||||
xmlns:local="clr-namespace:eMammal_integration_application"
|
||||
mc:Ignorable="d"
|
||||
Title="Microsoft AI for Earth - eMammal Integration App" Height="740" Width="795" Loaded="WindowLoaded" Initialized="WindowInitialized" ResizeMode="CanMinimize" Closing="WindowClosing">
|
||||
<Grid Margin="0,0,0,0">
|
||||
<Canvas Height="75" Width="710" Margin="0,0,0,330" Background="#fafafa">
|
||||
<!--<Label Name="LabelError" Content="Erricror" HorizontalAlignment="Left" Margin="42,153,0,0" VerticalAlignment="Top" Foreground="red" FontSize="14" Width="700" Visibility="Hidden"></Label>-->
|
||||
<TextBlock Name="TextBlockInfo" HorizontalAlignment="Left" Canvas.Left="0" Canvas.Top="5" TextWrapping="Wrap" Text="TextBlock" VerticalAlignment="Top" Foreground="Blue"
|
||||
FontSize="14" Width="700" Visibility="Hidden" Padding="7"/>
|
||||
<ProgressBar Name="ProgressbarUpdateProgress" Height="15" Width="690" Canvas.Top="20" Canvas.Left="5" Visibility="Hidden" />
|
||||
<Label Name="LabelProgress" Content="" HorizontalAlignment="Left" Canvas.Top="30" VerticalAlignment="Top" Canvas.Left="5" Visibility="Hidden" FontSize="12"/>
|
||||
</Canvas>
|
||||
|
||||
<Canvas Height="120" Width="790" Margin="0,0,0,570">
|
||||
<Image Source="/images/MS-AIforEarth.JPG" Canvas.Left="30" Canvas.Top="12"/>
|
||||
<Image Source="/images/elephants-277329_1280.jpg" Canvas.Left="35" Canvas.Top="-7" Width="710" Height="196"/>
|
||||
|
||||
<Button Name="ButtonVerify" Content="VERIFY" HorizontalAlignment="left" VerticalAlignment="Top" Height="35" Width="100" Canvas.Top="620"
|
||||
Canvas.Left="50" Click="ButtonVerifyClick" Foreground="#005ce6" FontWeight="Bold" FontStretch="ExtraExpanded" BorderBrush="LightGray" Visibility="Hidden"/>
|
||||
<Button Name="ButtonNext" Content="NEXT" HorizontalAlignment="Left" VerticalAlignment="Top" Height="35" Width="100" Canvas.Top="620"
|
||||
Canvas.Left="640" Click="ButtonNextClick" Foreground="#005ce6" FontWeight="Bold" FontStretch="ExtraExpanded" BorderBrush="LightGray"/>
|
||||
<Button Name="ButtonBack" Content="BACK" HorizontalAlignment="Left" VerticalAlignment="Top" Height="35" Width="100" Canvas.Top="620"
|
||||
Canvas.Left="524" Click="ButtonBackClick" Foreground="#005ce6" FontWeight="Bold" FontStretch="ExtraExpanded" Visibility="Hidden" BorderBrush="LightGray"/>
|
||||
<!--<Button Content="__" Canvas.Left="670" Canvas.Top="8" Background="White" Padding="10,2,10,5" BorderBrush="LightGray" Click="ButtonMinimizeClick"/>
|
||||
<Button Content="X" Canvas.Left="710" Canvas.Top="8" Background="White" Padding="10,5,10,5" FontFamily="serif" BorderBrush="LightGray" Click="ButtonCloseClick"/>-->
|
||||
</Canvas>
|
||||
|
||||
<TabControl Name="Tab" Width="690" Height="390" Margin="35,210,42,72" BorderBrush="Gray" BorderThickness="1" SelectionChanged="TabSelectionChanged">
|
||||
<TabItem Name="TabDetails" Header="Details" Height="30" Width="80" BorderBrush="White" BorderThickness="2" FontWeight="DemiBold">
|
||||
<Canvas>
|
||||
|
||||
<TextBox Name="TextBoxJsonFile" Text="" Margin="40,50,0,15" TextWrapping="Wrap" Width="480" Height="30" VerticalContentAlignment="Center" IsReadOnly="True" TextChanged="TextBoxJsonTextChanged"/>
|
||||
<Label Name="LabelJsonFileError" Content="Error" Canvas.Top="77" Canvas.Left="34" Foreground="Red" Visibility="Hidden"/>
|
||||
<Button Name="ButtonBrowse" Content="BROWSE" HorizontalAlignment="Left" Margin="550,50,0,0" VerticalAlignment="Top" Height="30" Width="100" Click="ButtonBrowseJsonClick"
|
||||
Foreground="#005ce6" FontWeight="DemiBold" BorderBrush="LightGray"/>
|
||||
<GroupBox Header="eMammal details" Height="220" Width="600" Margin="45,110,0,15">
|
||||
<Canvas>
|
||||
<ComboBox Name="comboBoxProject" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="180,30,0,0" Width="300" Height="30"
|
||||
SelectionChanged="ComboBoxProjectSelectionChanged" FontWeight="DemiBold"/>
|
||||
<ComboBox Name="comboBoxSubProject" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="180,90,0,0" Width="300" Height="30"
|
||||
SelectionChanged="ComboBoxSubProjectSelectionChanged" FontWeight="DemiBold"/>
|
||||
<ComboBox Name="comboBoxDeployment" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="180,150,0,0" Width="300" Height="30" FontWeight="DemiBold"/>
|
||||
<Label Content="Project :" Canvas.Left="100" Canvas.Top="30"/>
|
||||
<Label Content="Sub project :" Canvas.Left="90" Canvas.Top="90"/>
|
||||
<Label Content="Deployment :" Canvas.Left="90" Canvas.Top="150"/>
|
||||
</Canvas>
|
||||
</GroupBox>
|
||||
|
||||
</Canvas>
|
||||
</TabItem>
|
||||
<TabItem Name="TabClassMapping" Header="Category Mapping" Width="140" FontWeight="DemiBold" BorderBrush="White" IsEnabled="False">
|
||||
<Grid Background="White">
|
||||
<Grid.ColumnDefinitions>
|
||||
<ColumnDefinition Width="13*"/>
|
||||
<ColumnDefinition Width="334*"/>
|
||||
</Grid.ColumnDefinitions>
|
||||
<Canvas Margin="20,50,0,0" Name="CanvasClassMapping">
|
||||
<ComboBox Name="cmbProjectTaxaMappingAnimal" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,50,0,0" Width="150" Height="30" />
|
||||
<ComboBox Name="cmbProjectTaxaMappingPerson" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,100,0,0" Width="150" Height="30"/>
|
||||
<ComboBox Name="cmbProjectTaxaMappingVehicle" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,150,0,0" Width="150" Height="30"/>
|
||||
<ComboBox Name="cmbProjectTaxaMappingBlank" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,200,0,0" Width="150" Height="30"/>
|
||||
|
||||
<Label Content="Animal" Margin="170,50,0,0" Width="100" Height="30"/>
|
||||
<Label Content="Person" Margin="170,100,0,0" Width="100" Height="30"/>
|
||||
<Label Content="Vehicle" Margin="170,150,0,0" Width="100" Height="30"/>
|
||||
<Label Content="Blank" Margin="170,200,0,0" Width="100" Height="30"/>
|
||||
|
||||
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,60,0,0"/>
|
||||
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,110,0,0"/>
|
||||
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,160,0,0"/>
|
||||
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,210,0,0"/>
|
||||
|
||||
</Canvas>
|
||||
</Grid>
|
||||
</TabItem>
|
||||
<TabItem Name="TabResults" Header="Results" Width="100" FontWeight="DemiBold" BorderBrush="White" IsEnabled="False"
|
||||
Visibility="Hidden" Height="32" Margin="0,-2,0,0" VerticalAlignment="Top">
|
||||
<Grid Background="White">
|
||||
<Canvas Margin="0,0,0,0" Grid.ColumnSpan="2">
|
||||
<RichTextBox Name="RichTextBoxResults" ScrollViewer.VerticalScrollBarVisibility="Auto" Grid.Column="1" HorizontalAlignment="Left" Height="350" Margin="0,0,0,0" VerticalAlignment="Top" Width="678" BorderBrush="White">
|
||||
<FlowDocument>
|
||||
<Paragraph>
|
||||
</Paragraph>
|
||||
</FlowDocument>
|
||||
</RichTextBox>
|
||||
</Canvas>
|
||||
</Grid>
|
||||
</TabItem>
|
||||
</TabControl>
|
||||
</Grid>
|
||||
</Window>
|
|
@ -1,565 +0,0 @@
|
|||
using MySql.Data.MySqlClient;
|
||||
using Newtonsoft.Json;
|
||||
using NLog;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Data;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using System.Windows;
|
||||
using System.Windows.Controls;
|
||||
using System.Windows.Data;
|
||||
using System.Windows.Documents;
|
||||
using System.Windows.Input;
|
||||
using System.Windows.Media;
|
||||
using System.Windows.Media.Imaging;
|
||||
using System.Windows.Navigation;
|
||||
using System.Windows.Shapes;
|
||||
using System.Windows.Threading;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
|
||||
/// <summary>
|
||||
/// Interaction logic for eMammalIntegrationWindow.xaml
|
||||
/// </summary>
|
||||
public partial class eMammalIntegrationWindow : Window
|
||||
{
|
||||
Logger logger = LogManager.GetCurrentClassLogger();
|
||||
|
||||
eMammalMySQLOps db;
|
||||
|
||||
eMammalIntegration eMammalIntegration;
|
||||
|
||||
//double tabTopOriginalMargin;
|
||||
//double originalHeight;
|
||||
|
||||
public eMammalIntegrationWindow()
|
||||
{
|
||||
InitializeComponent();
|
||||
db = new eMammalMySQLOps(this);
|
||||
|
||||
//tabTopOriginalMargin = Tab.Margin.Top;
|
||||
|
||||
eMammalIntegration = new eMammalIntegration(this);
|
||||
}
|
||||
|
||||
|
||||
private void WindowInitialized(object sender, EventArgs e)
|
||||
{
|
||||
WindowStartupLocation = System.Windows.WindowStartupLocation.CenterScreen;
|
||||
}
|
||||
private void WindowLoaded(object sender, RoutedEventArgs e)
|
||||
{
|
||||
if (db.OpenConnectionIfNotOpen(true))
|
||||
{
|
||||
logger.Info(Constants.LOG_MESSAGE_APP_CONNECTED_TO_DATABASE);
|
||||
|
||||
Loadproject();
|
||||
logger.Info(Constants.LOG_MESSAGE_PROJECT_LOADED);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.Info(Constants.LOG_APP_COULD_NOT_CONNECT_TO_DATABASE);
|
||||
|
||||
Common.SetMessage(this, Constants.DATABASE_CONNECTION_ERROR, true, true);
|
||||
|
||||
this.IsEnabled = false;
|
||||
|
||||
DisableButton(ButtonNext);
|
||||
DisableButton(ButtonBack);
|
||||
DisableButton(ButtonBrowse);
|
||||
|
||||
this.Activate();
|
||||
|
||||
Thread thread = new Thread(() => Common.CheckConnection(this, true));
|
||||
thread.Start();
|
||||
}
|
||||
}
|
||||
|
||||
private void WindowClosing(object sender, System.ComponentModel.CancelEventArgs e)
|
||||
{
|
||||
logger.Info(Constants.LOG_APP_CLOSING);
|
||||
|
||||
db.CloseConnection();
|
||||
}
|
||||
|
||||
private void TabSelectionChanged(object sender, SelectionChangedEventArgs e)
|
||||
{
|
||||
if (Tab.SelectedIndex == 0 | Tab.SelectedIndex == 1)
|
||||
{
|
||||
TabResults.Visibility = Visibility.Hidden;
|
||||
}
|
||||
if (Tab.SelectedIndex == 0)
|
||||
{
|
||||
ButtonBack.Visibility = Visibility.Hidden;
|
||||
}
|
||||
if (Tab.SelectedIndex == 1)
|
||||
{
|
||||
ButtonBack.Visibility = Visibility.Visible;
|
||||
}
|
||||
}
|
||||
private void ButtonNextClick(object sender, RoutedEventArgs e)
|
||||
{
|
||||
try
|
||||
{
|
||||
//this.Tab.Margin = new Thickness(Tab.Margin.Left, tabTopOriginalMargin, Tab.Margin.Right, Tab.Margin.Bottom);
|
||||
TabResults.Visibility = Visibility.Hidden;
|
||||
|
||||
ResetControlsAfterProcessing();
|
||||
|
||||
TextBlockInfo.Text = "";
|
||||
TextBlockInfo.Visibility = Visibility.Hidden;
|
||||
|
||||
if (Tab.SelectedIndex == 0)
|
||||
{
|
||||
if (String.IsNullOrEmpty(TextBoxJsonFile.Text))
|
||||
{
|
||||
SetInvalidJsonError(Constants.NO_JSON_FILE_ERROR);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!IsJsonFile())
|
||||
return;
|
||||
}
|
||||
TabClassMapping.IsEnabled = true;
|
||||
|
||||
Tab.SelectedIndex = 1;
|
||||
|
||||
LoadCategoryMappings();
|
||||
ButtonBack.Visibility = Visibility.Visible;
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
TabClassMapping.IsEnabled = false;
|
||||
CanvasClassMapping.IsEnabled = false;
|
||||
|
||||
TabDetails.IsEnabled = false;
|
||||
|
||||
ButtonBack.IsEnabled = false;
|
||||
ButtonNext.IsEnabled = false;
|
||||
ButtonBack.Foreground = new SolidColorBrush(Colors.Gray);
|
||||
ButtonNext.Foreground = new SolidColorBrush(Colors.Gray);
|
||||
|
||||
// Invoking change in one element to Refresh UI with the above changes
|
||||
ButtonBack.Dispatcher.Invoke(() => ButtonBack.Foreground = new SolidColorBrush(Colors.Gray), DispatcherPriority.Background);
|
||||
|
||||
var data = LoadJson(TextBoxJsonFile.Text);
|
||||
int deploymentId = (int)comboBoxDeployment.SelectedValue;
|
||||
|
||||
int eMammalBlankCategory = (int)cmbProjectTaxaMappingBlank.SelectedValue;
|
||||
int eMammalAnimalCategory = (int)cmbProjectTaxaMappingAnimal.SelectedValue;
|
||||
int eMammalPersonCategory = (int)cmbProjectTaxaMappingPerson.SelectedValue;
|
||||
int eMammalVehicleCategory = (int)cmbProjectTaxaMappingVehicle.SelectedValue;
|
||||
|
||||
if (ProgressbarUpdateProgress.Maximum == 0)
|
||||
ProgressbarUpdateProgress.Maximum = 1;
|
||||
|
||||
// This makes inserts into the eMammal app much faster
|
||||
db.AddUniqueKeySequenceTaxa();
|
||||
|
||||
logger.Info(Constants.LOG_START_PROCESSING_IMAGES);
|
||||
|
||||
Common.ShowProgress(this, Constants.PROCESSING_IMAGES, 1);
|
||||
bool success = eMammalIntegration.ProcessDetections(data, deploymentId, comboBoxDeployment.Text, new Category()
|
||||
{
|
||||
blank = eMammalBlankCategory,
|
||||
animal = eMammalAnimalCategory,
|
||||
person = eMammalPersonCategory,
|
||||
vehicle = eMammalVehicleCategory
|
||||
});
|
||||
|
||||
if (success)
|
||||
{
|
||||
ButtonVerify.Visibility = Visibility.Visible;
|
||||
//Tab.Margin = new Thickness(Tab.Margin.Left, Tab.Margin.Top + 50, Tab.Margin.Right, Tab.Margin.Bottom);
|
||||
//Tab.Visibility = Visibility.Hidden;
|
||||
|
||||
TextBlockInfo.Text = "";
|
||||
TextBlockInfo.Inlines.Add("Processed all images in the JSON file.");
|
||||
TextBlockInfo.Inlines.Add(" Open and close the eMammal application, then in the eMammal application select ");
|
||||
|
||||
TextBlockInfo.Inlines.Add("project >");
|
||||
|
||||
Run run = new Run(comboBoxProject.Text);
|
||||
run.FontWeight = FontWeights.Bold;
|
||||
TextBlockInfo.Inlines.Add(run);
|
||||
|
||||
TextBlockInfo.Inlines.Add(" sub-project >");
|
||||
run = new Run(comboBoxSubProject.Text);
|
||||
run.FontWeight = FontWeights.Bold;
|
||||
TextBlockInfo.Inlines.Add(run);
|
||||
|
||||
TextBlockInfo.Inlines.Add(" deployment > ");
|
||||
run = new Run(comboBoxDeployment.Text);
|
||||
run.FontWeight = FontWeights.Bold;
|
||||
TextBlockInfo.Inlines.Add(run);
|
||||
|
||||
TextBlockInfo.Foreground = new SolidColorBrush(Colors.Blue);
|
||||
TextBlockInfo.Visibility = Visibility.Visible;
|
||||
|
||||
ReactivateButton(ButtonNext);
|
||||
ReactivateButton(ButtonBack);
|
||||
|
||||
DisableButton(ButtonNext);
|
||||
|
||||
//this.Activate();
|
||||
|
||||
ResetControlsAfterProcessing();
|
||||
DisableButton(ButtonNext);
|
||||
}
|
||||
else
|
||||
{
|
||||
ResetControlsAfterProcessing();
|
||||
DisableButton(ButtonNext);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
Common.HideProgress(this);
|
||||
HandleExceptions(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void ReactivateButton(Button button)
|
||||
{
|
||||
button.IsEnabled = true;
|
||||
button.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
|
||||
}
|
||||
|
||||
private void DisableButton(Button button)
|
||||
{
|
||||
button.IsEnabled = false;
|
||||
button.Foreground = new SolidColorBrush(Colors.Gray);
|
||||
}
|
||||
|
||||
/// <summary>, ta
|
||||
/// Remove progress bar and message, re-enable back and next buttons after processing
|
||||
/// </summary>
|
||||
private void ResetControlsAfterProcessing()
|
||||
{
|
||||
LabelProgress.Dispatcher.Invoke(() => LabelProgress.Content = "", DispatcherPriority.Background);
|
||||
LabelProgress.Dispatcher.Invoke(() => LabelProgress.Visibility
|
||||
= Visibility.Hidden, DispatcherPriority.Background);
|
||||
|
||||
ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
ProgressbarUpdateProgress.Value = 0, DispatcherPriority.Background);
|
||||
ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
|
||||
ProgressbarUpdateProgress.Visibility = Visibility.Hidden, DispatcherPriority.Background);
|
||||
|
||||
TabDetails.IsEnabled = true;
|
||||
TabClassMapping.IsEnabled = true;
|
||||
CanvasClassMapping.IsEnabled = true;
|
||||
|
||||
ButtonBack.IsEnabled = true;
|
||||
ButtonNext.IsEnabled = true;
|
||||
ButtonBack.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
|
||||
ButtonNext.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Browse button for selecting a json file
|
||||
/// When the button is clicked file dialog opens from which user can
|
||||
/// select a json file
|
||||
/// </summary>
|
||||
/// <param name="sender"></param>
|
||||
/// <param name="e"></param>
|
||||
private void ButtonBrowseJsonClick(object sender, RoutedEventArgs e)
|
||||
{
|
||||
// TODO: change this code copied from web
|
||||
Microsoft.Win32.OpenFileDialog openFileDlg = new Microsoft.Win32.OpenFileDialog();
|
||||
|
||||
Nullable<bool> result = openFileDlg.ShowDialog();
|
||||
if (result == true)
|
||||
TextBoxJsonFile.Text = openFileDlg.FileName;
|
||||
}
|
||||
|
||||
private void ComboBoxProjectSelectionChanged(object sender, SelectionChangedEventArgs e)
|
||||
{
|
||||
if (IsComboBoxLoaded(sender))
|
||||
LoadSubProject();
|
||||
}
|
||||
private void ComboBoxSubProjectSelectionChanged(object sender, SelectionChangedEventArgs e)
|
||||
{
|
||||
if (IsComboBoxLoaded(sender))
|
||||
LoadDeployment();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Switch back to details tab
|
||||
/// </summary>
|
||||
/// <param name="sender"></param>
|
||||
/// <param name="e"></param>
|
||||
private void ButtonBackClick(object sender, RoutedEventArgs e)
|
||||
{
|
||||
Tab.SelectedIndex = 0;
|
||||
Tab.Visibility = Visibility.Visible;
|
||||
ButtonNext.IsEnabled = true;
|
||||
ButtonNext.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
|
||||
ButtonBack.Visibility = Visibility.Hidden;
|
||||
ButtonVerify.Visibility = Visibility.Hidden;
|
||||
TabResults.Visibility = Visibility.Hidden;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Text box changed event for json file textbox
|
||||
/// Hide error message and change border of textbox from red to black
|
||||
/// </summary>
|
||||
/// <param name="sender"></param>
|
||||
/// <param name="e"></param>
|
||||
private void TextBoxJsonTextChanged(object sender, TextChangedEventArgs e)
|
||||
{
|
||||
LabelJsonFileError.Visibility = Visibility.Hidden;
|
||||
TextBoxJsonFile.BorderBrush = Brushes.Black;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads the eMammal project id and names
|
||||
/// </summary>
|
||||
public void Loadproject()
|
||||
{
|
||||
DataTable dt = db.GetProjectDetails();
|
||||
FillDrodownLists(comboBoxProject, dt, "name", "project_id");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads the eMammal sub project id and names
|
||||
/// </summary>
|
||||
private void LoadSubProject()
|
||||
{
|
||||
DataTable dt = db.GetSubProjectDetails(comboBoxProject.SelectedValue.ToString());
|
||||
FillDrodownLists(comboBoxSubProject, dt, "name", "event_id");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads the eMammal deployment id and names
|
||||
/// </summary>
|
||||
private void LoadDeployment()
|
||||
{
|
||||
bool success;
|
||||
DataTable dt = db.GetDeploymentDetails(out success, comboBoxSubProject.SelectedValue.ToString());
|
||||
FillDrodownLists(comboBoxDeployment, dt, "name", "deployment_id");
|
||||
}
|
||||
|
||||
private void FillDrodownLists(ComboBox combobox, DataTable dt, string displayMemberPath,
|
||||
string SelectedValuePath)
|
||||
{
|
||||
combobox.ItemsSource = dt.DefaultView;
|
||||
combobox.DisplayMemberPath = displayMemberPath;
|
||||
combobox.SelectedValuePath = SelectedValuePath;
|
||||
combobox.SelectedIndex = 0;
|
||||
}
|
||||
|
||||
private bool IsComboBoxLoaded(object sender)
|
||||
{
|
||||
var comboBox = (ComboBox)sender;
|
||||
if (!comboBox.IsLoaded)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sets error message in a label
|
||||
/// </summary>
|
||||
/// <param name="message"></param>
|
||||
private void SetInvalidJsonError(string message)
|
||||
{
|
||||
TextBoxJsonFile.BorderBrush = Brushes.Red;
|
||||
|
||||
LabelJsonFileError.Content = message;
|
||||
LabelJsonFileError.Visibility = Visibility.Visible;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a file provided is a JSON file
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private bool IsJsonFile()
|
||||
{
|
||||
string ext = System.IO.Path.GetExtension(TextBoxJsonFile.Text);
|
||||
if (ext.ToLower() != ".json")
|
||||
{
|
||||
SetInvalidJsonError("Please select a valid JSON file");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
///
|
||||
/// </summary>
|
||||
private void LoadCategoryMappings()
|
||||
{
|
||||
if (!cmbProjectTaxaMappingAnimal.HasItems)
|
||||
{
|
||||
var taxas = db.GetEmammalTaxas((int)comboBoxProject.SelectedValue);
|
||||
|
||||
FillDrodownLists(cmbProjectTaxaMappingAnimal, taxas, "species", "emammal_project_taxa_id");
|
||||
FillDrodownLists(cmbProjectTaxaMappingPerson, taxas, "species", "emammal_project_taxa_id");
|
||||
FillDrodownLists(cmbProjectTaxaMappingVehicle, taxas, "species", "emammal_project_taxa_id");
|
||||
FillDrodownLists(cmbProjectTaxaMappingBlank, taxas, "species", "emammal_project_taxa_id");
|
||||
|
||||
// Set the initial category in the category mapping dropdown lists
|
||||
SetPossibleCategory(cmbProjectTaxaMappingAnimal, "unknown animal");
|
||||
SetPossibleCategory(cmbProjectTaxaMappingPerson, "homo sapiens");
|
||||
SetPossibleCategory(cmbProjectTaxaMappingVehicle, "vehicle");
|
||||
SetPossibleCategory(cmbProjectTaxaMappingBlank, "no animal");
|
||||
}
|
||||
}
|
||||
///<summary>
|
||||
/// Sets the initial category mapping in comboboxes
|
||||
/// in the category mapping section
|
||||
/// </summary>
|
||||
/// <param name="comboBox"></param>
|
||||
/// <param name="text"></param>
|
||||
private void SetPossibleCategory(ComboBox comboBox, string text)
|
||||
{
|
||||
foreach (Object item in comboBox.Items)
|
||||
{
|
||||
DataRowView row = item as DataRowView;
|
||||
if (row != null)
|
||||
{
|
||||
string displayValue = row["species"].ToString();
|
||||
if (displayValue.ToLower() == text)
|
||||
comboBox.SelectedIndex = comboBox.Items.IndexOf(item);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Loads json file into JsonData object
|
||||
/// </summary>
|
||||
/// <param name="inputFileName"></param>
|
||||
/// <returns></returns>
|
||||
private JsonData LoadJson(string inputFileName)
|
||||
{
|
||||
string json = File.ReadAllText(TextBoxJsonFile.Text);
|
||||
var data = JsonConvert.DeserializeObject<JsonData>(json);
|
||||
return data;
|
||||
}
|
||||
|
||||
private void DisableTabs()
|
||||
{
|
||||
TabClassMapping.IsEnabled = false;
|
||||
TabDetails.IsEnabled = false;
|
||||
}
|
||||
private void EnableTabs()
|
||||
{
|
||||
TabClassMapping.IsEnabled = true;
|
||||
TabDetails.IsEnabled = true;
|
||||
}
|
||||
private void ButtonVerifyClick(object sender, RoutedEventArgs e)
|
||||
{
|
||||
try
|
||||
{
|
||||
logger.Info("Verifying images...");
|
||||
|
||||
Mouse.OverrideCursor = System.Windows.Input.Cursors.Wait;
|
||||
|
||||
//Common.delay(100);
|
||||
|
||||
DisableButton(ButtonBack);
|
||||
DisableButton(ButtonNext);
|
||||
|
||||
ButtonVerify.Visibility = Visibility.Hidden;
|
||||
|
||||
TabDetails.IsEnabled = false;
|
||||
TabClassMapping.IsEnabled = false;
|
||||
TabResults.IsEnabled = true;
|
||||
|
||||
int deploymentId = (int)comboBoxDeployment.SelectedValue;
|
||||
|
||||
RichTextBoxResults.AppendText("\n");
|
||||
|
||||
bool success = eMammalIntegration.VerifyAnnotations(deploymentId);
|
||||
|
||||
Tab.SelectedIndex = 2;
|
||||
TabResults.Visibility = Visibility.Visible;
|
||||
TabResults.IsEnabled = true;
|
||||
|
||||
ButtonVerify.Visibility = Visibility.Hidden;
|
||||
|
||||
ResetControlsAfterProcessing();
|
||||
|
||||
ReactivateButton(ButtonNext);
|
||||
ReactivateButton(ButtonBack);
|
||||
|
||||
TabDetails.IsEnabled = true;
|
||||
TabClassMapping.IsEnabled = true;
|
||||
|
||||
TextBlockInfo.Visibility = Visibility.Visible;
|
||||
|
||||
if (!success)
|
||||
TabResults.Visibility = Visibility.Hidden;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
HandleExceptions(ex);
|
||||
}
|
||||
finally
|
||||
{
|
||||
Mouse.OverrideCursor = System.Windows.Input.Cursors.Arrow;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void HandleExceptions(Exception ex)
|
||||
{
|
||||
logger.Error(ex.ToString());
|
||||
|
||||
if (ex is MySqlException)
|
||||
{
|
||||
HandleSQLExceptions(ex as MySqlException);
|
||||
Common.HideProgress(this);
|
||||
|
||||
Thread thread = new Thread(() => Common.CheckConnection(this));
|
||||
thread.Start();
|
||||
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
Common.HideProgress(this);
|
||||
}
|
||||
MessageBox.Show(ex.Message);
|
||||
}
|
||||
|
||||
private void HandleSQLExceptions(MySqlException ex)
|
||||
{
|
||||
Tab.IsEnabled = false;
|
||||
Common.HideProgress(this);
|
||||
int number = -1;
|
||||
|
||||
if (ex.InnerException != null && ex.InnerException is MySqlException)
|
||||
{
|
||||
number = ((MySqlException)ex.InnerException).Number;
|
||||
logger.Error(ex.InnerException.ToString());
|
||||
}
|
||||
|
||||
if (number == 0 || ex.Number == 1042)
|
||||
Common.SetMessage(this, Constants.DATABASE_CONNECTION_ERROR, true, true);
|
||||
|
||||
else if (ex.InnerException != null)
|
||||
{
|
||||
// no way to get the errorcode from inner exception therefore using this method
|
||||
if (ex.InnerException.InnerException != null)
|
||||
{
|
||||
string errmsg = ex.InnerException.InnerException.Message;
|
||||
if (errmsg.Contains("120.0.0.1:3307") && errmsg.Contains("No connection could be made"))
|
||||
Common.SetMessage(this, Constants.DATABASE_CONNECTION_ERROR, true, true);
|
||||
}
|
||||
else
|
||||
Common.SetMessage(this, ex.InnerException.Message, true, true);
|
||||
}
|
||||
else
|
||||
Common.SetMessage(this, ex.Message, true, true);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,315 +0,0 @@
|
|||
using System;
|
||||
using System.Configuration;
|
||||
using System.Text;
|
||||
using System.Data;
|
||||
using MySql.Data.MySqlClient;
|
||||
using NLog;
|
||||
|
||||
namespace eMammal_integration_application
|
||||
{
|
||||
public class eMammalMySQLOps
|
||||
{
|
||||
Logger logger = LogManager.GetCurrentClassLogger();
|
||||
|
||||
eMammalIntegrationWindow window;
|
||||
|
||||
private string mysqlConnectionstring = ConfigurationManager.AppSettings["mysqlConnectionstring"].ToString();
|
||||
MySqlConnection connection = new MySqlConnection();
|
||||
|
||||
public eMammalMySQLOps(eMammalIntegrationWindow window)
|
||||
{
|
||||
this.window = window;
|
||||
connection = new MySqlConnection(mysqlConnectionstring);
|
||||
}
|
||||
public eMammalMySQLOps()
|
||||
{
|
||||
connection = new MySqlConnection(mysqlConnectionstring);
|
||||
}
|
||||
|
||||
public bool OpenConnectionIfNotOpen(bool returnOnError = false)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (connection.State == ConnectionState.Closed)
|
||||
{
|
||||
logger.Info(Constants.LOG_OPEN_CLOSED_DATABASE_CONNECTION);
|
||||
|
||||
connection.Open();
|
||||
}
|
||||
logger.Info(Constants.LOG_OPENING_CLOSED_DATABASE_CONNECTION_SUCCESSFULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (returnOnError)
|
||||
return false;
|
||||
|
||||
throw;
|
||||
}
|
||||
}
|
||||
public void CloseConnection()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (connection.State == ConnectionState.Open)
|
||||
{
|
||||
connection.Close();
|
||||
logger.Info(Constants.LOG_CLOSING_OPEN_DATABASE_CONNECTION);
|
||||
}
|
||||
else
|
||||
logger.Info(Constants.LOG_DATABASE_CONNECTION_NOT_OPEN);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
logger.Info(Constants.LOG_ERROR_WHILE_CLOSING_DATABASE_CONNECTION);
|
||||
logger.Error(ex.ToString());
|
||||
}
|
||||
}
|
||||
|
||||
public bool IsConnectionOpen()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (connection.State == ConnectionState.Open)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// TODO: add error checking return null on error
|
||||
/// <summary>
|
||||
/// This function called for select statements, returning multiple rows
|
||||
/// </summary>
|
||||
/// <param name="query"></param>
|
||||
/// <returns></returns>
|
||||
public DataTable GeData(string query)
|
||||
{
|
||||
DataTable dt = new DataTable();
|
||||
using (MySqlCommand command = new MySqlCommand(query, connection))
|
||||
{
|
||||
command.CommandType = CommandType.Text;
|
||||
dt.Load(command.ExecuteReader());
|
||||
|
||||
}
|
||||
return dt;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This function is called for inserting or updating data in the DB
|
||||
/// </summary>
|
||||
/// <param name="query">SQL query string</param>
|
||||
public void ExecuteQuery(string query)
|
||||
{
|
||||
//using (MySqlConnection connection = new MySqlConnection(mysqlConnectionstring))
|
||||
//{
|
||||
//connection.Open();
|
||||
using (MySqlCommand command = new MySqlCommand(query, connection))
|
||||
{
|
||||
command.CommandType = CommandType.Text;
|
||||
command.CommandText = query;
|
||||
|
||||
int result = command.ExecuteNonQuery();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// This function is called for returning a single value from DB
|
||||
/// </summary>
|
||||
/// <param name="query">SQL query string</param>
|
||||
/// <returns></returns>
|
||||
public object ExecuteScalar(string query)
|
||||
{
|
||||
OpenConnectionIfNotOpen();
|
||||
|
||||
using (MySqlCommand command = new MySqlCommand(query, connection))
|
||||
{
|
||||
command.CommandType = CommandType.Text;
|
||||
command.CommandText = query;
|
||||
Object result = null;
|
||||
|
||||
result = command.ExecuteScalar();
|
||||
|
||||
return result;
|
||||
}
|
||||
}
|
||||
/// <summary>
|
||||
/// Add unique for sequenceid, projecttaxaid to prevent duplicate inserts
|
||||
/// </summary>
|
||||
public void AddUniqueKeySequenceTaxa()
|
||||
{
|
||||
string sql = " SELECT constraint_name" +
|
||||
" FROM information_schema.TABLE_CONSTRAINTS" +
|
||||
" WHERE table_name = 'emammal_sequence_annotation'" +
|
||||
" AND constraint_name = 'ai4e_unique_key'";
|
||||
|
||||
logger.Info(Constants.LOG_CHECKING_IF_UNIQUE_KEY_ALREADY_EXISTS);
|
||||
|
||||
var result = ExecuteScalar(sql);
|
||||
if (result == null)
|
||||
{
|
||||
sql = " ALTER TABLE emammal_sequence_annotation " +
|
||||
" ADD CONSTRAINT ai4e_unique_key UNIQUE KEY(sequence_id, project_taxa_id); ";
|
||||
|
||||
logger.Info(Constants.LOG_ADDING_UNIQUE_KEY_CONSTRAINT);
|
||||
logger.Info(sql);
|
||||
|
||||
ExecuteQuery(sql);
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.Info(Constants.LOG_UNIQUE_KEY_ALREADY_EXISTS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Get sequenceids for all the images in a deployment
|
||||
/// </summary>
|
||||
/// <param name="deploymentId"></param>
|
||||
/// <returns></returns>
|
||||
public DataTable GetsequenceIDsfromDB(int deploymentId)
|
||||
{
|
||||
string sql = string.Format(" SELECT b.raw_name, b.image_sequence_id " +
|
||||
" FROM wild_ID.image_sequence a, wild_id.image b " +
|
||||
" WHERE a.image_sequence_id = b.image_sequence_id " +
|
||||
" AND a.deployment_id = {0}; ", deploymentId);
|
||||
|
||||
string mysqlConnectionstring = ConfigurationManager.AppSettings["mysqlConnectionstring"].ToString();
|
||||
DataTable dt = new DataTable("imageSequences");
|
||||
|
||||
using (MySqlConnection connection = new MySqlConnection(mysqlConnectionstring))
|
||||
{
|
||||
OpenConnectionIfNotOpen();
|
||||
|
||||
using (MySqlDataAdapter adapter = new MySqlDataAdapter(sql, connection))
|
||||
{
|
||||
adapter.Fill(dt);
|
||||
return dt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public DataTable GetEmammalTaxas(int projectId)
|
||||
{
|
||||
string sql = string.Format(" SELECT species, emammal_project_taxa_id FROM wild_id.emammal_project_taxa " +
|
||||
" WHERE project_id = {0}", projectId);
|
||||
|
||||
DataTable dt = GetDataTable(sql, "ProjectDetails");
|
||||
return dt;
|
||||
|
||||
}
|
||||
|
||||
public DataTable GetProjectDetails()
|
||||
{
|
||||
// Get eMammal project name and ids
|
||||
string sql = " SELECT e.project_id, " +
|
||||
" CONCAT('p', '-', e.project_id, ' ', p.name ) as name " +
|
||||
" FROM wild_id.project p, wild_id.emammal_project e " +
|
||||
" WHERE p.project_id = e.project_id ";
|
||||
|
||||
DataTable dt = GetDataTable(sql, "ProjectDetails");
|
||||
return dt;
|
||||
}
|
||||
public DataTable GetSubProjectDetails(string projectId)
|
||||
{
|
||||
// Get eMammal project name and ids
|
||||
string sql = string.Format(" SELECT e.event_id, " +
|
||||
" CONCAT('sp', '-', e.event_id, ' ', e.name ) as name " +
|
||||
" FROM wild_id.event e " +
|
||||
" WHERE e.project_id = {0} ", projectId);
|
||||
|
||||
DataTable dt = GetDataTable(sql, "SubProjectDetails");
|
||||
|
||||
return dt;
|
||||
}
|
||||
public DataTable GetDeploymentDetails(out bool success, string eventId)
|
||||
{
|
||||
success = false;
|
||||
|
||||
// Get eMammal project name and ids
|
||||
string sql = string.Format(" SELECT d.deployment_id, " +
|
||||
" CONCAT('d', '-', d.deployment_id, ' ', d.name ) as name " +
|
||||
" FROM deployment d, emammal_deployment e " +
|
||||
" WHERE d.deployment_id = e.deployment_id " +
|
||||
" AND event_id = {0} ", eventId);
|
||||
|
||||
|
||||
DataTable dt = GetDataTable(sql, "DeploymentsDetails");
|
||||
return dt;
|
||||
}
|
||||
|
||||
public DataTable GetDataTable(string sql, string type)
|
||||
{
|
||||
DataTable dt = new DataTable();
|
||||
|
||||
OpenConnectionIfNotOpen();
|
||||
|
||||
using (MySqlDataAdapter adapter = new MySqlDataAdapter(sql, connection))
|
||||
{
|
||||
adapter.Fill(dt);
|
||||
return dt;
|
||||
}
|
||||
}
|
||||
|
||||
public StringBuilder GetBulkInsertInitialString()
|
||||
{
|
||||
StringBuilder sql = new StringBuilder("INSERT INTO wild_id.emammal_sequence_annotation(sequence_id, project_taxa_id, total_count) VALUES ");
|
||||
return sql;
|
||||
}
|
||||
|
||||
public bool BulkInsertAnnotations(StringBuilder sql)
|
||||
{
|
||||
string loginfo = "";
|
||||
|
||||
string sqlString = sql.ToString().Remove(sql.Length - 1);
|
||||
|
||||
sqlString += " ON DUPLICATE KEY UPDATE " +
|
||||
" sequence_id = VALUES(sequence_id)," +
|
||||
" project_taxa_id = VALUES(project_taxa_id), " +
|
||||
" total_count = VALUES(total_count);";
|
||||
|
||||
loginfo += "\n" + sqlString;
|
||||
|
||||
OpenConnectionIfNotOpen();
|
||||
|
||||
|
||||
using (MySqlCommand cmd = new MySqlCommand(sql.ToString(), connection))
|
||||
{
|
||||
cmd.CommandType = CommandType.Text;
|
||||
cmd.CommandText = sqlString;
|
||||
cmd.ExecuteNonQuery();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public DataTable GetImagesForDeployment(int deploymentId)
|
||||
{
|
||||
|
||||
logger.Info("Starting verification...");
|
||||
string sql = string.Format(" SELECT b.raw_name, b.image_sequence_id, deployment_id, d.common_name " +
|
||||
" FROM wild_ID.image_sequence a, wild_id.image b, " +
|
||||
" wild_id.emammal_sequence_annotation c, " +
|
||||
" wild_id.emammal_project_taxa d " +
|
||||
" WHERE a.image_sequence_id = b.image_sequence_id " +
|
||||
" AND c.sequence_id = a.image_sequence_id " +
|
||||
" AND c.project_taxa_id = d.emammal_project_taxa_id " +
|
||||
" AND a.deployment_id = {0} order by b.raw_name", deploymentId);
|
||||
|
||||
|
||||
logger.Info(sql);
|
||||
|
||||
DataTable dt = new DataTable();
|
||||
dt = GetDataTable(sql, "");
|
||||
|
||||
return dt;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
До Ширина: | Высота: | Размер: 4.2 KiB |
До Ширина: | Высота: | Размер: 5.5 KiB |
До Ширина: | Высота: | Размер: 91 KiB |
|
@ -1,18 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8" ?>
|
||||
<nlog
|
||||
xmlns="http://www.nlog-project.org/schemas/NLog.xsd"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
|
||||
<targets>
|
||||
<target name="file" xsi:type="File"
|
||||
layout="-------------- ${level} (${longdate}) --------------${newline}
|
||||
${newline}
|
||||
Call Site: ${callsite}${newline}
|
||||
${newline}
|
||||
${message}${newline}" fileName="log.log" archiveOldFileOnStartup="true" maxArchiveFiles="1"/>
|
||||
</targets>
|
||||
|
||||
<rules>
|
||||
<logger name="*" minlevel="Trace" writeTo="file" />
|
||||
</rules>
|
||||
</nlog>
|
|
@ -1,17 +0,0 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<packages>
|
||||
<package id="BouncyCastle" version="1.8.3.1" targetFramework="net472" />
|
||||
<package id="Google.Protobuf" version="3.15.0" targetFramework="net472" />
|
||||
<package id="K4os.Compression.LZ4" version="1.1.11" targetFramework="net472" />
|
||||
<package id="K4os.Compression.LZ4.Streams" version="1.1.11" targetFramework="net472" />
|
||||
<package id="K4os.Hash.xxHash" version="1.0.6" targetFramework="net472" />
|
||||
<package id="MySql.Data" version="8.0.21" targetFramework="net472" />
|
||||
<package id="Newtonsoft.Json" version="13.0.1" targetFramework="net472" />
|
||||
<package id="NLog" version="4.7.2" targetFramework="net472" />
|
||||
<package id="Serilog" version="2.9.0" targetFramework="net472" />
|
||||
<package id="SSH.NET" version="2020.0.2" targetFramework="net472" />
|
||||
<package id="System.Buffers" version="4.5.0" targetFramework="net472" />
|
||||
<package id="System.Memory" version="4.5.3" targetFramework="net472" />
|
||||
<package id="System.Numerics.Vectors" version="4.4.0" targetFramework="net472" />
|
||||
<package id="System.Runtime.CompilerServices.Unsafe" version="4.6.0" targetFramework="net472" />
|
||||
</packages>
|
|
@ -1,23 +0,0 @@
|
|||
# Troubleshooting eMammal app installation errors
|
||||
|
||||
If you get the following error during installation:
|
||||
|
||||
![](images/error-invoking-method.jpg)
|
||||
|
||||
![](images/failed-to-launch-JVM.jpg)
|
||||
|
||||
...try the following:
|
||||
|
||||
1. If the JDK is not installed in the computer, install the latest JDK version (be sure to install it as administrator).
|
||||
|
||||
2. If JDK is already installed, uninstall and reinstall the JDK (be sure to install it as administrator).
|
||||
|
||||
3. After JDK is installed, make sure that the JDK path is include in the system path. [This tutorial](https://javatutorial.net/set-java-home-windows-10) shows how to add the JDK path
|
||||
to the system path in Windows 10.
|
||||
|
||||
4. If you have MySQL server installed on your machine, uninstall MySQL server or turn off the MySQL service. To turn off MySQL server, open
|
||||
the services.msc app in Windows, find the MySQL service, right-click on the service, and click "stop".
|
||||
|
||||
|
||||
|
||||
|
До Ширина: | Высота: | Размер: 132 KiB |
До Ширина: | Высота: | Размер: 76 KiB |
До Ширина: | Высота: | Размер: 74 KiB |
До Ширина: | Высота: | Размер: 105 KiB |
До Ширина: | Высота: | Размер: 87 KiB |
До Ширина: | Высота: | Размер: 11 KiB |
До Ширина: | Высота: | Размер: 9.5 KiB |
До Ширина: | Высота: | Размер: 111 KiB |
До Ширина: | Высота: | Размер: 70 KiB |
До Ширина: | Высота: | Размер: 91 KiB |
До Ширина: | Высота: | Размер: 131 KiB |
До Ширина: | Высота: | Размер: 110 KiB |
До Ширина: | Высота: | Размер: 124 KiB |
До Ширина: | Высота: | Размер: 100 KiB |
До Ширина: | Высота: | Размер: 118 KiB |
|
@ -1,5 +0,0 @@
|
|||
host="localhost"
|
||||
username="root"
|
||||
password=""
|
||||
port=3307
|
||||
database="wild_id"
|
|
@ -1,127 +0,0 @@
|
|||
#
|
||||
# Test script for pushing annotations to the eMammal db
|
||||
#
|
||||
|
||||
#%% Imports
|
||||
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import pymysql
|
||||
import config as cfg
|
||||
|
||||
from tqdm import tqdm
|
||||
from enum import Enum
|
||||
|
||||
|
||||
#%% Database functions
|
||||
|
||||
class Categories(Enum):
|
||||
animal = 1
|
||||
person = 2
|
||||
vehicle = 3
|
||||
|
||||
mysql_connection = pymysql.connect( host=cfg.host,
|
||||
user=cfg.username,
|
||||
passwd=cfg.password,
|
||||
db=cfg.database,
|
||||
port=cfg.port)
|
||||
|
||||
def update_data(sql):
|
||||
with mysql_connection.cursor() as cursor:
|
||||
cursor.execute(sql)
|
||||
|
||||
def get_records_all(sql):
|
||||
with mysql_connection.cursor() as cursor:
|
||||
sql = sql
|
||||
cursor.execute(sql)
|
||||
rows = cursor.fetchall()
|
||||
return rows
|
||||
|
||||
def format_data_print_deployments(rows):
|
||||
count = 0
|
||||
result = []
|
||||
for row in rows:
|
||||
count += 1
|
||||
print("{}. {}-{}".format(str(count), row[0],row[1]))
|
||||
result.append((count, row[0], row[1]))
|
||||
|
||||
return result
|
||||
|
||||
|
||||
#%% Command-line driver
|
||||
|
||||
def main():
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('input_file', type=str, help='Input .json filename')
|
||||
|
||||
if len(sys.argv[1:]) == 0:
|
||||
parser.print_help()
|
||||
parser.exit()
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print("Enter the number of the deployment:")
|
||||
|
||||
rows = get_records_all(''' select * from deployment ''')
|
||||
deployments = format_data_print_deployments(rows)
|
||||
print("\n")
|
||||
deployment_choice = input()
|
||||
deployment_id = deployments[int(deployment_choice)][1]
|
||||
|
||||
print(deployment_id)
|
||||
|
||||
# TODO: check project ID ?
|
||||
sql = ''' SELECT emammal_project_taxa_id FROM wild_id.emammal_project_taxa
|
||||
where species in ("No Animal", "Unknown Animal", "Homo sapiens", "Vehicle") '''
|
||||
|
||||
|
||||
emammal_categories = get_records_all(sql)
|
||||
|
||||
with open(args.input_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
images = data['images']
|
||||
emammal_category = 0
|
||||
for index, im in tqdm(enumerate(images), total=len(images)):
|
||||
fn = im['file']
|
||||
|
||||
if len(im['detections']) <= 0:
|
||||
image_type_id = 2
|
||||
|
||||
# No-animal category
|
||||
emammal_categories = emammal_categories[0]
|
||||
else:
|
||||
max_conf = im['max_detection_conf']
|
||||
detection = [k for k in im['detections'] if k['conf'] == max_conf]
|
||||
category= int(detection[0]['category'])
|
||||
|
||||
if category == Categories.animal:
|
||||
image_type_id = 1
|
||||
emammal_category = emammal_categories[1]
|
||||
else:
|
||||
image_type_id = 5
|
||||
if category == Categories.person:
|
||||
emammal_category = emammal_categories[2]
|
||||
elif category == Categories.vehicle:
|
||||
emammal_category = emammal_categories[3]
|
||||
|
||||
sql = """ UPDATE wild_id.emammal_sequence_annotation,
|
||||
wild_id.image,
|
||||
wild_id.image_sequence,
|
||||
wild_id.deployment
|
||||
SET wild_id.emammal_sequence_annotation.project_taxa_id = 4
|
||||
WHERE wild_id.image.image_sequence_id = wild_id.emammal_sequence_annotation.sequence_id
|
||||
AND wild_id.image_sequence.deployment_id = wild_id.deployment.deployment_id
|
||||
AND wild_id.image.raw_name = '{}' """.format(fn)
|
||||
|
||||
|
||||
print(sql)
|
||||
update_data(sql)
|
||||
mysql_connection.commit()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|