This commit is contained in:
v-andreshern 2023-10-27 16:13:33 +00:00
Родитель e9eff3b2dc
Коммит a80b079f7b
827 изменённых файлов: 1379 добавлений и 246674 удалений

70
.gitignore поставляемый
Просмотреть файл

@ -1,61 +1,9 @@
################################################################################
# This .gitignore file was automatically created by Microsoft(R) Visual Studio.
################################################################################
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Jupyter Notebook
.ipynb_checkpoints
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Apple directory
*.DS_Store
# IDEs
*.idea/
*.project
.spyderproject
.spyproject
.vscode
# Demo files
demo/aadconfig.py
demo/apiconfig.py
demo/CameraTrapAssets
demo/static/uploads
demo/static/results
.webassets-cache/
# CameraTrapJsonFileProcessingApp
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/.vs/
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/bin/
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/obj/
api/batch_processing/postprocessing/CameraTrapJsonFileProcessingApp/packages/
# TF and PyTorch model files
*.pb
*.pt
# batch processing API config files
api_config*.py
# Other
*.pth
*.o
debug.log
*.swp
# Things created when building the sync API
yolov5
api/synchronous/api_core/animal_detection_api/detection
__pycache__
*weights*
*processed.*
*output*
*flagged*
*temp*
PytorchWildlife.egg-info/
*dev*
*test*

3
.gitmodules поставляемый
Просмотреть файл

@ -1,3 +0,0 @@
[submodule "demo/CameraTrapAssets"]
path = demo/CameraTrapAssets
url = https://ai4evisionexternal.visualstudio.com/gramener/_git/camera-trap-app-assets

34
LICENSE
Просмотреть файл

@ -1,21 +1,21 @@
MIT License
MIT License
Copyright (c) Microsoft Corporation. All rights reserved.
Copyright (c) [2023] [Microsoft]
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

Просмотреть файл

@ -0,0 +1,3 @@
from .data import *
from .models import *
from .utils import *

Просмотреть файл

@ -0,0 +1,2 @@
from .datasets import *
from .transforms import *

Просмотреть файл

@ -0,0 +1,120 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import os
from PIL import Image
import numpy as np
import supervision as sv
from torch.utils.data import Dataset
# Making the DetectionImageFolder class available for import from this module
__all__ = [
"DetectionImageFolder",
]
class DetectionImageFolder(Dataset):
"""
A PyTorch Dataset for loading images from a specified directory.
Each item in the dataset is a tuple containing the image data,
the image's path, and the original size of the image.
"""
def __init__(self, image_dir, transform=None):
"""
Initializes the dataset.
Parameters:
image_dir (str): Path to the directory containing the images.
transform (callable, optional): Optional transform to be applied on the image.
"""
self.image_dir = image_dir
# Listing and sorting all image files in the specified directory
self.images = sorted(os.listdir(self.image_dir))
self.transform = transform
def __getitem__(self, idx):
"""
Retrieves an image from the dataset.
Parameters:
idx (int): Index of the image to retrieve.
Returns:
tuple: Contains the image data, the image's path, and its original size.
"""
# Get image filename and path
img = self.images[idx]
img_path = os.path.join(self.image_dir, img)
# Load and convert image to RGB
img = Image.open(img_path).convert("RGB")
img = np.asarray(img)
img_size_ori = img.shape
# Apply transformation if specified
if self.transform:
img = self.transform(img)
return img, img_path, np.array(img_size_ori)
def __len__(self):
"""
Returns the total number of images in the dataset.
Returns:
int: Total number of images.
"""
return len(self.images)
class DetectionCrops(Dataset):
def __init__(self, detection_results, transform=None, path_head=None, animal_cls_id=0):
self.detection_results = detection_results
self.transform = transform
self.path_head = path_head
self.animal_cls_id = animal_cls_id # This determins which detection class id represents animals.
self.img_ids = []
self.xyxys = []
self.load_detection_results()
def load_detection_results(self):
for det in self.detection_results:
for xyxy, det_id in zip(det["detections"].xyxy, det["detections"].class_id):
# Only run recognition on animal detections
if det_id == self.animal_cls_id:
self.img_ids.append(det["img_id"])
self.xyxys.append(xyxy)
def __getitem__(self, idx):
"""
Retrieves an image from the dataset.
Parameters:
idx (int): Index of the image to retrieve.
Returns:
tuple: Contains the image data and the image's path.
"""
# Get image path and corresponding bbox xyxy for cropping
img_id = self.img_ids[idx]
xyxy = self.xyxys[idx]
img_path = os.path.join(self.path_head, img_id) if self.path_head else img_id
# Load and crop image with supervision
img = sv.crop_image(np.array(Image.open(img_path).convert("RGB")),
xyxy=xyxy)
# Apply transformation if specified
if self.transform:
img = self.transform(Image.fromarray(img))
return img, img_path
def __len__(self):
return len(self.img_ids)

Просмотреть файл

@ -0,0 +1,90 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import numpy as np
import torch
from torchvision import transforms
from yolov5.utils.augmentations import letterbox
# Making the provided classes available for import from this module
__all__ = [
"MegaDetector_v5_Transform",
"Classification_Inference_Transform"
]
class MegaDetector_v5_Transform:
"""
A transformation class to preprocess images for the MegaDetector v5 model.
This includes resizing, transposing, and normalization operations.
This is a required transformation for the YoloV5 model.
"""
def __init__(self, target_size=1280, stride=32):
"""
Initializes the transform.
Args:
target_size (int): Desired size for the image's longest side after resizing.
stride (int): Stride value for resizing.
"""
self.target_size = target_size
self.stride = stride
def __call__(self, np_img):
"""
Applies the transformation on the provided image.
Args:
np_img (np.ndarray): Input image as a numpy array.
Returns:
torch.Tensor: Transformed image.
"""
# Resize and pad the image using the letterbox function
img = letterbox(np_img, new_shape=self.target_size, stride=self.stride, auto=False)[0]
# Transpose and convert image to PyTorch tensor
img = img.transpose((2, 0, 1))
img = np.ascontiguousarray(img)
img = torch.from_numpy(img).float()
img /= 255.0
return img
class Classification_Inference_Transform:
"""
A transformation class to preprocess images for classification inference.
This includes resizing, normalization, and conversion to a tensor.
"""
# Normalization constants
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
def __init__(self, target_size=224):
"""
Initializes the transform.
Args:
target_size (int): Desired size for the height and width after resizing.
"""
# Define the sequence of transformations
self.trans = transforms.Compose([
transforms.Resize((target_size, target_size)),
transforms.ToTensor(),
transforms.Normalize(self.mean, self.std)
])
def __call__(self, img):
"""
Applies the transformation on the provided image.
Args:
img (PIL.Image.Image): Input image in PIL format.
Returns:
torch.Tensor: Transformed image.
"""
img = self.trans(img)
return img

Просмотреть файл

@ -0,0 +1 @@
from .resnet import *

Просмотреть файл

@ -0,0 +1,3 @@
from .base_classifier import *
from .opossum import *
from .amazon import *

Просмотреть файл

@ -0,0 +1,105 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import torch
from .base_classifier import PlainResNetInference
__all__ = [
"AI4GAmazonRainforest"
]
class AI4GAmazonRainforest(PlainResNetInference):
"""
Amazon Ranforest Animal Classifier that inherits from PlainResNetInference.
This classifier is specialized for recognizing 36 different animals in the Amazon Rainforest.
"""
# Image size for the Opossum classifier
IMAGE_SIZE = 224
# Class names for prediction
CLASS_NAMES = {
0: 'Dasyprocta',
1: 'Bos',
2: 'Pecari',
3: 'Mazama',
4: 'Cuniculus',
5: 'Leptotila',
6: 'Human',
7: 'Aramides',
8: 'Tinamus',
9: 'Eira',
10: 'Crax',
11: 'Procyon',
12: 'Capra',
13: 'Dasypus',
14: 'Sciurus',
15: 'Crypturellus',
16: 'Tamandua',
17: 'Proechimys',
18: 'Leopardus',
19: 'Equus',
20: 'Columbina',
21: 'Nyctidromus',
22: 'Ortalis',
23: 'Emballonura',
24: 'Odontophorus',
25: 'Geotrygon',
26: 'Metachirus',
27: 'Catharus',
28: 'Cerdocyon',
29: 'Momotus',
30: 'Tapirus',
31: 'Canis',
32: 'Furnarius',
33: 'Didelphis',
34: 'Sylvilagus',
35: 'Unknown'
}
def __init__(self, weights=None, device="cpu", pretrained=True):
"""
Initialize the Amazon animal Classifier.
Args:
weights (str, optional): Path to the model weights. Defaults to None.
device (str, optional): Device for model inference. Defaults to "cpu".
pretrained (bool, optional): Whether to use pretrained weights. Defaults to True.
"""
# If pretrained, use the provided URL to fetch the weights
if pretrained:
url = "https://zenodo.org/records/10042023/files/AI4GAmazonClassification_v0.0.0.ckpt?download=1"
else:
url = None
super(AI4GAmazonRainforest, self).__init__(weights=weights, device=device,
num_cls=36, num_layers=50, url=url)
def results_generation(self, logits, img_ids, id_strip=None):
"""
Generate results for classification.
Args:
logits (torch.Tensor): Output tensor from the model.
img_id (str): Image identifier.
id_strip (str): stiping string for better image id saving.
Returns:
dict: Dictionary containing image ID, prediction, and confidence score.
"""
probs = torch.softmax(logits, dim=1)
preds = probs.argmax(dim=1)
confs = probs.max(dim=1)[0]
results = []
for pred, img_id, conf in zip(preds, img_ids, confs):
r = {"img_id": str(img_id).strip(id_strip)}
r["prediction"] = self.CLASS_NAMES[pred.item()]
r["class_id"] = pred.item()
r["confidence"] = conf.item()
results.append(r)
return results

Просмотреть файл

@ -0,0 +1,155 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import numpy as np
import torch
import torch.nn as nn
from torchvision.models.resnet import BasicBlock, Bottleneck, ResNet
from torch.hub import load_state_dict_from_url
from tqdm import tqdm
from collections import OrderedDict
# Making the PlainResNetInference class available for import from this module
__all__ = ["PlainResNetInference"]
class ResNetBackbone(ResNet):
"""
Custom ResNet Backbone that extracts features from input images.
"""
def _forward_impl(self, x):
# Following the ResNet structure to extract features
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
return x
class PlainResNetClassifier(nn.Module):
"""
Basic ResNet Classifier that uses a custom ResNet backbone.
"""
name = "PlainResNetClassifier"
def __init__(self, num_cls=1, num_layers=50):
super(PlainResNetClassifier, self).__init__()
self.num_cls = num_cls
self.num_layers = num_layers
self.feature = None
self.classifier = None
self.criterion_cls = None
# Initialize the network and weights
self.setup_net()
def setup_net(self):
"""
Set up the ResNet classifier according to the specified number of layers.
"""
kwargs = {}
if self.num_layers == 18:
block = BasicBlock
layers = [2, 2, 2, 2]
# ... [Missing weight URL definition for ResNet18]
elif self.num_layers == 50:
block = Bottleneck
layers = [3, 4, 6, 3]
# ... [Missing weight URL definition for ResNet50]
else:
raise Exception("ResNet Type not supported.")
self.feature = ResNetBackbone(block, layers, **kwargs)
self.classifier = nn.Linear(512 * block.expansion, self.num_cls)
def setup_criteria(self):
"""
Setup the criterion for classification.
"""
self.criterion_cls = nn.CrossEntropyLoss()
def feat_init(self):
"""
Initialize the features using pretrained weights.
"""
init_weights = self.pretrained_weights.get_state_dict(progress=True)
init_weights = OrderedDict({k.replace("module.", "").replace("feature.", ""): init_weights[k]
for k in init_weights})
self.feature.load_state_dict(init_weights, strict=False)
# Print missing and unused keys for debugging purposes
load_keys = set(init_weights.keys())
self_keys = set(self.feature.state_dict().keys())
missing_keys = self_keys - load_keys
unused_keys = load_keys - self_keys
print("missing keys:", sorted(list(missing_keys)))
print("unused_keys:", sorted(list(unused_keys)))
class PlainResNetInference(nn.Module):
"""
Inference module for the PlainResNet Classifier.
"""
def __init__(self, num_cls=36, num_layers=50, weights=None, device="cpu", url=None):
super(PlainResNetInference, self).__init__()
self.device = device
self.net = PlainResNetClassifier(num_cls=num_cls, num_layers=num_layers)
if weights:
clf_weights = torch.load(weights, map_location=torch.device(self.device))
elif url:
clf_weights = load_state_dict_from_url(url, map_location=torch.device(self.device))
else:
raise Exception("Need weights for inference.")
self.load_state_dict(clf_weights["state_dict"], strict=True)
self.eval()
self.net.to(self.device)
def results_generation(self, logits, img_id, id_strip=None):
"""
Process logits to produce final results.
Args:
logits (torch.Tensor): Logits from the network.
img_id (str): image path.
id_strip (str): stiping string for better image id saving.
Returns:
dict: Dictionary containing the results.
"""
pass
def forward(self, img):
feats = self.net.feature(img)
logits = self.net.classifier(feats)
return logits
def single_image_classification(self, img, img_id=None, id_strip=None):
logits = self.forward(img.unsqueeze(0).to(self.device))
return self.results_generation(logits.cpu(), [img_id], id_strip=id_strip)[0]
def batch_image_classification(self, dataloader, id_strip=None):
"""
Process a batch of images for classification.
"""
total_logits = []
total_paths = []
with tqdm(total=len(dataloader)) as pbar:
for batch in dataloader:
imgs, paths = batch
imgs = imgs.to(self.device)
total_logits.append(self.forward(imgs))
total_paths.append(paths)
pbar.update(1)
total_logits = torch.cat(total_logits, dim=0).cpu()
total_paths = np.concatenate(total_paths, axis=0)
return self.results_generation(total_logits, total_paths, id_strip=id_strip)

Просмотреть файл

@ -0,0 +1,70 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import torch
from .base_classifier import PlainResNetInference
__all__ = [
"AI4GOpossum"
]
class AI4GOpossum(PlainResNetInference):
"""
Opossum Classifier that inherits from PlainResNetInference.
This classifier is specialized for distinguishing between Opossums and Non-opossums.
"""
# Image size for the Opossum classifier
IMAGE_SIZE = 224
# Class names for prediction
CLASS_NAMES = {
0: "Non-opossum",
1: "Opossum"
}
def __init__(self, weights=None, device="cpu", pretrained=True):
"""
Initialize the Opossum Classifier.
Args:
weights (str, optional): Path to the model weights. Defaults to None.
device (str, optional): Device for model inference. Defaults to "cpu".
pretrained (bool, optional): Whether to use pretrained weights. Defaults to True.
"""
# If pretrained, use the provided URL to fetch the weights
if pretrained:
url = "https://zenodo.org/records/10023414/files/OpossumClassification_v0.0.0.ckpt?download=1"
else:
url = None
super(AI4GOpossum, self).__init__(weights=weights, device=device,
num_cls=1, num_layers=50, url=url)
def results_generation(self, logits, img_ids, id_strip=None):
"""
Generate results for classification.
Args:
logits (torch.Tensor): Output tensor from the model.
img_id (list): List of image identifier.
id_strip (str): stiping string for better image id saving.
Returns:
dict: Dictionary containing image ID, prediction, and confidence score.
"""
probs = torch.sigmoid(logits)
preds = (probs > 0.5).squeeze(1).numpy().astype(int)
results = []
for pred, img_id, prob in zip(preds, img_ids, probs):
r = {"img_id": str(img_id).strip(id_strip)}
r["prediction"] = self.CLASS_NAMES[pred]
r["class_id"] = pred
r["confidence"] = prob.item() if pred == 1 else (1 - prob.item())
results.append(r)
return results

Просмотреть файл

@ -0,0 +1 @@
from .yolov5 import *

Просмотреть файл

@ -0,0 +1,2 @@
from .base_detector import *
from .megadetector import *

Просмотреть файл

@ -0,0 +1,158 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
""" YoloV5 base detector class. """
# Importing basic libraries
import numpy as np
from tqdm import tqdm
import supervision as sv
import torch
from torch.hub import load_state_dict_from_url
from yolov5.utils.general import non_max_suppression, scale_coords
class YOLOV5Base:
"""
Base detector class for YOLO V5. This class provides utility methods for
loading the model, generating results, and performing single and batch image detections.
"""
# Placeholder class-level attributes to be defined in derived classes
IMAGE_SIZE = None
STRIDE = None
CLASS_NAMES = None
TRANSFORM = None
def __init__(self, weights=None, device="cpu", url=None):
"""
Initialize the YOLO V5 detector.
Args:
weights (str, optional):
Path to the model weights. Defaults to None.
device (str, optional):
Device for model inference. Defaults to "cpu".
url (str, optional):
URL to fetch the model weights. Defaults to None.
"""
self.model = None
self.device = device
self._load_model(weights, self.device, url)
self.model.to(self.device)
def _load_model(self, weights=None, device="cpu", url=None):
"""
Load the YOLO V5 model weights.
Args:
weights (str, optional):
Path to the model weights. Defaults to None.
device (str, optional):
Device for model inference. Defaults to "cpu".
url (str, optional):
URL to fetch the model weights. Defaults to None.
Raises:
Exception: If weights are not provided.
"""
if weights:
checkpoint = torch.load(weights, map_location=torch.device(device))
elif url:
checkpoint = load_state_dict_from_url(url, map_location=torch.device(self.device))
else:
raise Exception("Need weights for inference.")
self.model = checkpoint["model"].float().fuse().eval() # Convert to FP32 model
def results_generation(self, preds, img_id, id_strip=None):
"""
Generate results for detection based on model predictions.
Args:
preds (numpy.ndarray):
Model predictions.
img_id (str):
Image identifier.
id_strip (str, optional):
Strip specific characters from img_id. Defaults to None.
Returns:
dict: Dictionary containing image ID, detections, and labels.
"""
results = {"img_id": str(img_id).strip(id_strip)}
results["detections"] = sv.Detections(
xyxy=preds[:, :4],
confidence=preds[:, 4],
class_id=preds[:, 5].astype(int)
)
results["labels"] = [
f"{self.CLASS_NAMES[class_id]} {confidence:0.2f}"
for _, _, confidence, class_id, _ in results["detections"]
]
return results
def single_image_detection(self, img, img_size, img_path, conf_thres=0.2, id_strip=None):
"""
Perform detection on a single image.
Args:
img (torch.Tensor):
Input image tensor.
img_size (tuple):
Original image size.
img_path (str):
Image path or identifier.
conf_thres (float, optional):
Confidence threshold for predictions. Defaults to 0.2.
id_strip (str, optional):
Characters to strip from img_id. Defaults to None.
Returns:
dict: Detection results.
"""
preds = self.model(img.unsqueeze(0).to(self.device))[0]
preds = torch.cat(non_max_suppression(prediction=preds, conf_thres=conf_thres), axis=0)
preds[:, :4] = scale_coords([self.IMAGE_SIZE] * 2, preds[:, :4], img_size).round()
return self.results_generation(preds.cpu().numpy(), img_path, id_strip)
def batch_image_detection(self, dataloader, conf_thres=0.2, id_strip=None):
"""
Perform detection on a batch of images.
Args:
dataloader (DataLoader):
DataLoader containing image batches.
conf_thres (float, optional):
Confidence threshold for predictions. Defaults to 0.2.
id_strip (str, optional):
Characters to strip from img_id. Defaults to None.
Returns:
list: List of detection results for all images.
"""
results = []
total_preds = []
total_paths = []
total_img_sizes = []
with tqdm(total=len(dataloader)) as pbar:
for batch in dataloader:
imgs, paths, sizes = batch
imgs = imgs.to(self.device)
total_preds.append(self.model(imgs)[0])
total_paths.append(paths)
total_img_sizes.append(sizes)
pbar.update(1)
total_preds = [
non_max_suppression(prediction=pred.unsqueeze(0), conf_thres=conf_thres)[0].numpy()
for pred in torch.cat(total_preds, dim=0).cpu()
]
total_paths = np.concatenate(total_paths, axis=0)
total_img_sizes = np.concatenate(total_img_sizes, axis=0)
# If there are size differences in the input images, use a for loop instead of matrix processing for scaling
for pred, size, path in zip(total_preds, total_img_sizes, total_paths):
pred[:, :4] = scale_coords([self.IMAGE_SIZE] * 2, pred[:, :4], size).round()
results.append(self.results_generation(pred, path, id_strip))
return results

Просмотреть файл

@ -0,0 +1,47 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
from .base_detector import YOLOV5Base
__all__ = [
'MegaDetectorV5',
]
class MegaDetectorV5(YOLOV5Base):
"""
MegaDetectorV5 is a specialized class derived from the YOLOV5Base class
that is specifically designed for detecting animals, persons, and vehicles.
Attributes:
IMAGE_SIZE (int): The standard image size used during training.
STRIDE (int): Stride value used in the detector.
CLASS_NAMES (dict): Mapping of class IDs to their respective names.
"""
IMAGE_SIZE = 1280 # image size used in training
STRIDE = 64
CLASS_NAMES = {
0: "animal",
1: "person",
2: "vehicle"
}
def __init__(self, weights=None, device="cpu", pretrained=True):
"""
Initializes the MegaDetectorV5 model with the option to load pretrained weights.
Args:
weights (str, optional): Path to the weights file.
device (str, optional): Device to load the model on (e.g., "cpu" or "cuda"). Default is "cpu".
pretrained (bool, optional): Whether to load the pretrained model. Default is True.
"""
if pretrained:
url = "https://zenodo.org/records/10023414/files/MegaDetector_v5b.0.0.pt?download=1"
else:
url = None
super(MegaDetectorV5, self).__init__(weights=weights, device=device, url=url)
# %%

Просмотреть файл

@ -0,0 +1,2 @@
from .misc import *
from .post_process import *

Просмотреть файл

@ -0,0 +1,51 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
""" Miscellaneous functions."""
import numpy as np
from tqdm import tqdm
from typing import Callable
from supervision import VideoInfo, VideoSink, get_video_frames_generator
__all__ = [
"process_video"
]
def process_video(
source_path: str,
target_path: str,
callback: Callable[[np.ndarray, int], np.ndarray],
target_fps: int = 1,
codec: str = "avc1"
) -> None:
"""
Process a video frame-by-frame, applying a callback function to each frame and saving the results
to a new video. This version includes a progress bar and allows codec selection.
Args:
source_path (str):
Path to the source video file.
target_path (str):
Path to save the processed video.
callback (Callable[[np.ndarray, int], np.ndarray]):
A function that takes a video frame and its index as input and returns the processed frame.
codec (str, optional):
Codec used to encode the processed video. Default is "avc1".
"""
source_video_info = VideoInfo.from_video_path(video_path=source_path)
if source_video_info.fps > target_fps:
stride = int(source_video_info.fps / target_fps)
source_video_info.fps = target_fps
else:
stride = 1
with VideoSink(target_path=target_path, video_info=source_video_info, codec=codec) as sink:
with tqdm(total=int(source_video_info.total_frames / stride)) as pbar:
for index, frame in enumerate(
get_video_frames_generator(source_path=source_path, stride=stride)
):
result_frame = callback(frame, index)
sink.write_frame(frame=result_frame)
pbar.update(1)

Просмотреть файл

@ -0,0 +1,148 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
""" Post-processing functions."""
import os
import numpy as np
import json
from PIL import Image
import supervision as sv
__all__ = [
"save_detection_images",
"save_crop_images",
"save_detection_json",
"save_detection_classification_json"
]
# !!! Output paths need to be optimized !!!
def save_detection_images(results, output_dir):
"""
Save detected images with bounding boxes and labels annotated.
Args:
results (list or dict):
Detection results containing image ID, detections, and labels.
output_dir (str):
Directory to save the annotated images.
"""
box_annotator = sv.BoxAnnotator(thickness=4, text_thickness=4, text_scale=2)
os.makedirs(output_dir, exist_ok=True)
with sv.ImageSink(target_dir_path=output_dir, overwrite=True) as sink:
if isinstance(results, list):
for entry in results:
annotated_img = box_annotator.annotate(
scene=np.array(Image.open(entry["img_id"])),
detections=entry["detections"],
labels=entry["labels"]
)
sink.save_image(image=annotated_img, image_name=entry["img_id"].rsplit('/', 1)[1])
else:
annotated_img = box_annotator.annotate(
scene=np.array(Image.open(results["img_id"])),
detections=results["detections"],
labels=results["labels"]
)
sink.save_image(image=annotated_img, image_name=results["img_id"].rsplit('/', 1)[1])
# !!! Output paths need to be optimized !!!
def save_crop_images(results, output_dir):
"""
Save cropped images based on the detection bounding boxes.
Args:
results (list):
Detection results containing image ID and detections.
output_dir (str):
Directory to save the cropped images.
"""
assert(isinstance(results, list))
os.makedirs(output_dir, exist_ok=True)
with sv.ImageSink(target_dir_path=output_dir, overwrite=True) as sink:
for entry in results:
for i, (xyxy, _, _, cat, _) in enumerate(entry["detections"]):
cropped_img = sv.crop_image(image=np.array(Image.open(entry["img_id"])), xyxy=xyxy)
sink.save_image(
image=cropped_img,
image_name="{}_{}_{}".format(int(cat), i, entry["img_id"].rsplit('/', 1)[1])
)
def save_detection_json(results, output_dir, categories=None):
"""
Save detection results to a JSON file.
Args:
results (list):
Detection results containing image ID, bounding boxes, category, and confidence.
output_dir (str):
Path to save the output JSON file.
categories (list, optional):
List of categories for detected objects. Defaults to None.
"""
json_results = {
"annotations": [],
"categories": categories
}
with open(output_dir, 'w') as f:
for r in results:
json_results["annotations"].append({
"img_id": r["img_id"],
"bbox": r["detections"].xyxy.astype(int).tolist(),
"category": r["detections"].class_id.tolist(),
"confidence": r["detections"].confidence.tolist()
})
json.dump(json_results, f)
def save_detection_classification_json(det_results, clf_results, output_path,
det_categories=None, clf_categories=None):
"""
Save classification results to a JSON file.
Args:
det_results (list):
Detection results containing image ID, bounding boxes, detection category, and confidence.
clf_results (list):
classification results containing image ID, classification category, and confidence.
output_dir (str):
Path to save the output JSON file.
det_categories (list, optional):
List of categories for detected objects. Defaults to None.
clf_categories (list, optional):
List of categories for classified objects. Defaults to None.
"""
json_results = {
"annotations": [],
"det_categories": det_categories,
"clf_categories": clf_categories
}
with open(output_path, 'w') as f:
counter = 0
for det_r in det_results:
clf_categories = []
clf_confidence = []
for i in range(counter, len(clf_results)):
clf_r = clf_results[i]
if clf_r["img_id"] == det_r["img_id"]:
clf_categories.append(clf_r["class_id"])
clf_confidence.append(clf_r["confidence"])
counter += 1
else:
break
json_results["annotations"].append({
"img_id": det_r["img_id"],
"bbox": det_r["detections"].xyxy.astype(int).tolist(),
"det_category": det_r["detections"].class_id.tolist(),
"det_confidence": det_r["detections"].confidence.tolist(),
"clf_category": clf_categories,
"clf_confidence": clf_confidence
})
json.dump(json_results, f)

Просмотреть файл

@ -1,41 +0,0 @@
<!-- BEGIN MICROSOFT SECURITY.MD V0.0.8 BLOCK -->
## Security
Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below.
## Reporting Security Issues
**Please do not report security vulnerabilities through public GitHub issues.**
Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report).
If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey).
You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc).
Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
* Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
* Full paths of source file(s) related to the manifestation of the issue
* The location of the affected source code (tag/branch/commit or direct URL)
* Any special configuration required to reproduce the issue
* Step-by-step instructions to reproduce the issue
* Proof-of-concept or exploit code (if possible)
* Impact of the issue, including how an attacker might exploit the issue
This information will help us triage your report more quickly.
If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs.
## Preferred Languages
We prefer all communications to be in English.
## Policy
Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd).
<!-- END MICROSOFT SECURITY.MD BLOCK -->

Просмотреть файл

@ -1,13 +0,0 @@
# Camera Trap Image Processing APIs
Though most of our users either use the MegaDetector model directly or work with us to run MegaDetector on the cloud, we also package useful components developed in the Camera Traps project into APIs that users can operate (on the cloud or on local computers) to process camera trap images in a variety of scenarios. This folder contains the source code of the APIs and documentation on how to set them up.
### Synchronous API
This API is intended for real-time scenarios where a small number of images are processed at a time and latency is a priority. See documentation [here](synchronous).
### Batch processing API
This API runs the detector on lots of images (typically millions) and distributes the work over potentially many nodes using [Azure Batch](https://azure.microsoft.com/en-us/services/batch/). See documentation [here](batch_processing).

Просмотреть файл

@ -1,313 +0,0 @@
# Camera trap batch processing API user guide
Though most of our users either use the [MegaDetector](https://github.com/ecologize/CameraTraps#megadetector) model directly or work with us to run MegaDetector on the cloud, we also offer an open-source reference implementation for a an API that processes a large quantity of camera trap images, to support a variety of online scenarios. The output is most helpful for separating empty from non-empty images based on a detector confidence threshold that you select, and putting bounding boxes around animals, people, and vehicles to help manual review proceed more quickly. If you are interested in setting up an endpoint to process very small numbers of images for real-time applications (e.g. for anti-poaching applications), see the source for our [real-time camera trap image processing API](https://github.com/ecologize/CameraTraps/tree/main/api/synchronous).
With the batch processing API, you can process a batch of up to a few million images in one request to the API. If in addition you have some images that are labeled, we can evaluate the performance of the MegaDetector on your labeled images (see [Post-processing tools](#post-processing-tools)).
All references to &ldquo;container&rdquo; in this document refer to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) containers.
We have referred to one submission of images as a "request" in this documentation but as a "job" elsewhere in the source code and emails; confusingly, the endpoint for checking the status of a request/job is called `/task` and the RequestID is called `task_id`. Consider "request" and "job" interchangeable, and the `/task` endpoint a legacy issue. Note that the terms "job" and "task" mean different things in the source code (in the context of Azure Batch).
## API
### API endpoints
Once configured to run on a live instance, the endpoints of this API are available at
```
http://URL/v4/camera-trap/detection-batch
```
#### `/request_detections`
To submit a request for batch processing, make a POST call to this endpoint with a json body containing input fields defined below. The API will return with a json response very quickly to give you a RequestID (UUID4 hex) representing the request you have submitted, for example:
```json
{
"request_id": "f940ecd58c7746b1bde89bd6ba5a5202"
}
```
or an error message, if your inputs are not acceptable:
```json
{
"error": "error message."
}
```
In particular the endpoint will return a 503 error if the queue of requests is full. Please re-try later in that case.
#### `/task`
Check the status of your request by calling the `/task` endpoint via a GET call, passing in your RequestID:
```http://URL/v4/camera-trap/detection-batch/task/RequestID```
This returns a json with the fields `Status`, `TaskId` (which is the `request_id` in this document), and a few others. The `Status` field is a json object with the following fields:
- `request_status`: one of `running`, `failed`, `problem`, `completed`, and `canceled`.
- The status `failed` indicates that the images have not been submitted to the cluster for processing, and so you can go ahead and call the `\request_detections` endpoint again, correcting your inputs according to the error message returned with the status.
- The status `problem` indicates that the images have already been submitted for processing but the API encountered an error while monitoring progress; in this case, please contact us to retrieve your results so that no unnecessary processing would occupy the cluster (`message` field will mention "please contact us").
- `canceled` if your call to the `/cancel_request` endpoint took effect.
- `message`: a longer string describing the `request_status` and any errors; when the request is completed, the URLs to the output files will also be here (see [Outputs](#23-outputs) section below).
#### `/supported_model_versions`
Check which versions of the MegaDetector are supported by this API by making a GET call to this endpoint.
#### `/default_model_version`
Check which version of the MegaDetector is used by default by making a GET call to this endpoint.
#### `/cancel_request`
If you have submitted a request by mistake, you can make a POST call to this endpoint to cancel it.
The body should contain the `caller` (see next section on _API inputs_) and `request_id` fields. You should get back a response immediately with status code 200 if the signal was successfully sent. You can verify that the request has been canceled using the `/task` endpoint.
### API inputs
| Parameter | Is required | Type | Explanation |
|--------------------------|-------------|-------|----------------------------|
| input_container_sas | Yes<sup>1</sup> | string | SAS URL with list and read permissions to the Blob Storage container where the images are stored. |
| images_requested_json_sas | No<sup>1</sup> | string | SAS URL with list and read permissions to a json file in Blob Storage. See below for explanation of the content of the json to provide. |
| image_path_prefix | No | string | Only process images whose full path starts with `image_path_prefix` (case-_sensitive_). Note that any image paths specified in `images_requested_json_sas` will need to be the full path from the root of the container, regardless whether `image_path_prefix` is provided. |
| first_n | No | int | Only process the first `first_n` images. Order of images is not guaranteed, but is likely to be alphabetical. Set this to a small number to avoid taking time to fully list all images in the blob (about 15 minutes for 1 million images) if you just want to try this API. |
| sample_n | No | int | Randomly select `sample_n` images to process. |
| model_version | No | string | Version of the MegaDetector model to use. Default is the most updated stable version (check using the `/default_model_version` endpoint). Supported versions are available at the `/supported_model_versions` endpoint.|
| request_name | No | string | A string (letters, digits, `_`, `-` allowed, max length 92 characters) that will be appended to the output file names to help you identify the resulting files. A timestamp in UTC (`%Y%m%d%H%M%S`) of the time of submission will be appended to the resulting files automatically. |
| use_url | No | bool | Set to `true` if you are providing public image URLs. |
| caller | Yes | string | An identifier that we use to whitelist users for now. |
| country | No (but recommended) | string | Country where the majority of the images in this batch are taken. Preferably use an [ISO 3166-1 alpha-3 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3#Officially_assigned_code_elements), such as "BWA" for Botswana and "USA" for the United States |
| organization_name | No (but recommended) | string | Organization conducting the survey. |
<sup>1</sup> There are two ways of giving the API access to your images.
1 - If you have all your images in a container in Azure Blob Storage, provide the parameter `input_container_sas` as described above. This means that your images do not have to be at publicly accessible URLs. In this case, the json pointed to by `images_requested_json_sas` should look like:
```json
[
"Season1/Location1/Camera1/image1.jpg",
"Season1/Location1/Camera1/image2.jpg"
]
```
Only images whose paths are listed here will be processed if you provide this list.
2 - If your images are stored elsewhere and you can provide a publicly accessible URL to each, you do not need to specify `input_container_sas`. Instead, list the URLs to all the images (instead of their paths) you&rsquo;d like to process in the json at `images_requested_json_sas`.
#### Attaching metadata
We can store a (short) string of metadata with each image path or URL. The json at `images_requested_json_sas` should then look like:
```json
[
["Season1/Location1/Camera1/image1.jpg", "metadata_string1"],
["Season1/Location1/Camera1/image2.jpg", "metadata_string2"]
]
```
The metadata string will be copied to the `meta` field in the image's entry in the output file (format see below).
#### Other notes and example
- Only images with file name ending in ".jpg", ".jpeg" or ".png" (case insensitive) will be processed, so please make sure the file names are compliant before you upload them to the container (you cannot rename a blob without copying it entirely once it is in Blob Storage).
- By default we process all such images in the specified container. You can choose to only process a subset of them by specifying the other input parameters. The images will be filtered out accordingly in this order:
- `images_requested_json_sas`
- `image_path_prefix`
- `first_n`
- `sample_n`
- For example, if you specified both `images_requested_json_sas` and `first_n`, only images that are in your provided list at `images_requested_json_sas` will be considered, and then we process the `first_n` of those.
Example body of the POST request:
```json
{
"input_container_sas": "https://storageaccountname.blob.core.windows.net/container-name?se=2019-04-23T01%3A30%3A00Z&sp=rl&sv=2018-03-28&sr=c&sig=A_LONG_STRING",
"images_requested_json_sas": "https://storageaccountname2.blob.core.windows.net/container-name2/possibly_in_a_folder/my_list_of_images.json?se=2019-04-19T20%3A31%3A00Z&sp=rl&sv=2018-03-28&sr=b&sig=ANOTHER_LONG_STRING",
"image_path_prefix": "2020/Alberta",
"first_n": 100000,
"request_name": "Alberta_2020",
"model_version": "4.1",
"caller": "allowlisted_user_x",
"country": "CAN",
"organization_name": "Name of Organization"
}
```
You can manually call the API using applications such as Postman:
![Screenshot of Azure Storage Explorer used for generating SAS tokens with read and list permissions](./images/Postman_screenshot.png)
#### How to obtain a SAS token
You can easily generate a [SAS token](https://docs.microsoft.com/en-us/azure/storage/common/storage-dotnet-shared-access-signature-part-1) to a container using the desktop app [Azure Storage Explorer](https://azure.microsoft.com/en-us/features/storage-explorer/) (available on Windows, macOS and Linux). You can also issue SAS tokens programmatically by using the [Azure Storage SDK](https://azure-storage.readthedocs.io/ref/azure.storage.blob.baseblobservice.html#azure.storage.blob.baseblobservice.BaseBlobService.generate_blob_shared_access_signature).
Using Storage Explorer, right click on the container or blob you&rsquo;d like to grant access for, and choose &ldquo;Get Shared Access Signature...&rdquo;. On the dialog window that appears,
- cross out the &ldquo;Start time&rdquo; field if you will be using the SAS token right away
- set the &ldquo;Expiry time&rdquo; to a date in the future, about a month ahead is reasonable. The SAS token needs to be valid for the duration of the batch processing request.
- make sure &ldquo;Read&rdquo; and &ldquo;List&rdquo; are checked under &ldquo;Permissions&rdquo; (see screenshot)
Click &ldquo;Create&rdquo;, and the &ldquo;URL&rdquo; field on the next screen is the value required for `input_container_sas` or `images_requested_json_sas`.
![Screenshot of Azure Storage Explorer used for generating SAS tokens with read and list permissions](./images/SAS_screenshot.png)
### API outputs
Once your request is submitted and parameters validated, the API divides all images into shards of about 2000 images each, and send them to an [Azure Batch](https://azure.microsoft.com/en-us/services/batch/) node pool to be scored by the model. Another process will monitor how many shards have been evaluated, checking every 15 minutes, and update the status of the request, which you can check via the `/task` endpoint.
When all shards have finished processing, the `status` returned by the `/task` endpoint will have the `request_status` field as `completed`, and the `message` field will contain a URL to the output file. The returned body looks like
```json
{
"Status": {
"request_status": "completed",
"message": {
"num_failed_shards": 0,
"output_file_urls": {
"detections": "https://cameratrap.blob.core.windows.net/async-api-internal/ee26326e-7e0d-4524-a9ea-f57a5799d4ba/ee26326e-7e0d-4524-a9ea-f57a5799d4ba_detections_4_1_on_test_images_20200709211752.json?sv=2019-02-02&sr=b&sig=key1"
}
},
"time": "2020-07-09 21:27:17"
},
"Timestamp": "2020-07-09 21:27:17",
"Endpoint": "/v3/camera-trap/detection-batch/request_detections",
"TaskId": "ea26326e-7e0d-4524-a9ea-f57a5799d4ba"
}
```
To obtain the URL of the output file:
```python
task_status = body['Status']
assert task_status['request_status'] == 'completed'
message = task_status['message']
assert message['num_failed_shards'] == 0
url_to_results_file = message['output_file_urls']['detections']
```
Note that the field `Status` in the returned body is capitalized (since July 2020).
The URL to the output file is valid for 180 days from the time the request has finished. If you neglected to retrieve them before the link expired, contact us with the RequestID and we can send the results to you.
The output file is a JSON in the format described below.
#### Batch processing API output format
The output of the detector is saved in `requestID_detections_requestName_timestamp.json`. The `classifications` fields will be added if a classifier was trained for your project and applied to the images.
If an image could not be opened or an error occurred when applying the model to it, it will still have an entry in the output file images list, but it will have a `failure` field indicating the type of error (see last entry in the example below). However, if the API runs into problems processing an entire shard of images (usually 2000 images per shard), they will not have entries in the results file - this should be very rare.
Example output with both detection and classification results:
```json
{
"info": {
"format_version": "1.3",
"detector": "md_v4.1.0.pb",
"detection_completion_time": "2019-05-22 02:12:19",
"classifier": "ecosystem1_v2",
"classification_completion_time": "2019-05-26 01:52:08",
"detector_metadata": {
"megadetector_version":"v4.1.0",
"typical_detection_threshold":0.8,
"conservative_detection_threshold":0.6
}
"classifier_metadata": {
"typical_classification_threshold":0.75
}
},
"detection_categories": {
"1": "animal",
"2": "person",
"3": "vehicle"
},
"classification_categories": {
"0": "fox",
"1": "elk",
"2": "wolf",
"3": "bear",
"4": "moose"
},
"images": [
{
"file": "path/from/base/dir/image_with_animal.jpg",
"meta": "optional free-text metadata",
"detections": [
{
"category": "1",
"conf": 0.926,
"bbox": [0.0, 0.2762, 0.1539, 0.2825],
"classifications": [
["3", 0.901],
["1", 0.071],
["4", 0.025]
]
},
{
"category": "1",
"conf": 0.061,
"bbox": [0.0451, 0.1849, 0.3642, 0.4636]
}
]
},
{
"file": "/path/from/base/dir/empty_image.jpg",
"meta": "",
"detections": []
},
{
"file": "/path/from/base/dir2/corrupted_image.jpg",
"failure": "Failure image access"
}
]
}
```
##### Model metadata
The 'detector' field (within the 'info' field) specifies the filename of the detector model that produced this results file. It was omitted in old files generated with run_detector_batch.py, so with extremely high probability, if this field is not present, you can assume the file was generated with MegaDetector v4.
In newer files, this should contain the filename (base name only) of the model file, which typically will be one of:
* megadetector_v4.1 (MegaDetector v4, run via the batch API)
* md_v4.1.0.pb (MegaDetector v4, run locally)
* md_v5a.0.0.pt (MegaDetector v5a)
* md_v5b.0.0.pt (MegaDetector v5b)
This string is used by some tools to choose appropriate default confidence values, which depend on the model version. If you change the name of the MegaDetector file, you will break this convention, and YMMV.
The "detector_metadata" and "classifier_metadata" fields are also optionally added as of format version 1.2. These currently contain useful default confidence values for downstream tools (particularly Timelapse), but we strongly recommend against blindly trusting these defaults; always explore your data before choosing a confidence threshold, as the optimal value can vary widely.
##### Detector outputs
The bounding box in the `bbox` field is represented as
```
[x_min, y_min, width_of_box, height_of_box]
```
where `(x_min, y_min)` is the upper-left corner of the detection bounding box, with the origin in the upper-left corner of the image. The coordinates and box width and height are *relative* to the width and height of the image. Note that this is different from the coordinate format used in the [COCO Camera Traps](data_management/README.md) databases, which are in absolute coordinates.
The detection category `category` can be interpreted using the `detection_categories` dictionary.
Detection categories not listed here are allowed by this format specification, but should be treated as "no detection".
When the detector model detects no animal (or person or vehicle), the confidence `conf` is shown as 0.0 (not confident that there is an object of interest) and the `detections` field is an empty list.
##### Classifier outputs
After a classifier is applied, each tuple in a `classifications` list represents `[species, confidence]`. They are listed in order of confidence. The species categories should be interpreted using the `classification_categories` dictionary. Keys in `classification_categories` will always be nonnegative integers formatted as strings.
## Post-processing tools
The [postprocessing](postprocessing) folder contains tools for working with the output of our detector API. In particular, [postprocess_batch_results.py](postprocessing/postprocess_batch_results.py) provides visualization and accuracy assessment tools for the output of the batch processing API. A sample output for the Snapshot Serengeti data when using ground-truth annotations can be seen [here](http://dolphinvm.westus2.cloudapp.azure.com/data/snapshot_serengeti/serengeti_val_detections_from_pkl_MDv1_20190528_w_classifications_eval/).
## Integration with other tools
The [integration](integration) folder contains guidelines and postprocessing scripts for using the output of our API in other applications.

Просмотреть файл

@ -1,87 +0,0 @@
# Camera trap batch processing API developer readme
## Build the Docker image for Batch node pools
We need to build a Docker image with the necessary packages (mainly TensorFlow) to run the scoring script. Azure Batch will pull this image from a private container registry, which needs to be in the same region as the Batch account.
Navigate to the subdirectory `batch_service` (otherwise you need to specify the Docker context).
Build the image from the Dockerfile in this folder:
```commandline
export IMAGE_NAME=***REMOVED***.azurecr.io/tensorflow:1.14.0-gpu-py3
export REGISTRY_NAME=***REMOVED***
sudo docker image build --rm --tag $IMAGE_NAME --file ./Dockerfile .
```
Test that TensorFlow can use the GPU in an interactive Python session:
```commandline
sudo docker run --gpus all -it --rm $IMAGE_NAME /bin/bash
python
import tensorflow as tf
print('tensorflow version:', tf.__version__)
print('tf.test.is_gpu_available:', tf.test.is_gpu_available())
quit()
```
You can now exit/stop the container.
Log in to the Azure Container Registry for the batch API project and push the image; you may have to `az login` first:
```commandline
sudo az acr login --name $REGISTRY_NAME
sudo docker image push $IMAGE_NAME
```
## Create a Batch node pool
We create a separate node pool for each instance of the API. For example, our `internal` instance of the API has one node pool.
Follow the notebook [api_support/create_batch_pool.ipynb](../api_support/create_batch_pool.ipynb) to create one. You should only need to do this for new instances of the API.
## Flask app
The API endpoints are in a Flask web application, which needs to be run in the conda environment `cameratraps-batch-api` specified by [environment-batch-api.yml](environment-batch-api.yml).
In addition, the API uses the `sas_blob_utils` module from the `ai4eutils` [repo](https://github.com/microsoft/ai4eutils), so that repo folder should be on the PYTHONPATH.
Make sure to update the `API_INSTANCE_NAME`, `POOL_ID`, `BATCH_ACCOUNT_NAME`, and `BATCH_ACCOUNT_URL` values in [server_api_config.py](./server_api_config.py) to reflect which instance of the API is being deployed.
To start the Flask app in development mode, first source `start_batch_api.sh` to retrieve secrets required for the various Azure services from KeyVault and export them as environment variables in the current shell:
```commandline
source start_batch_api.sh
```
You will be prompted to authenticate via AAD (you need to have access to the AI4E engineering subscription).
Set the logs directory as needed, and the name of the Flask app:
```
export LOGS_DIR=/home/otter/camtrap/batch_api_logs
export FLASK_APP=server
```
To start the app locally in debug mode:
```commandline
export FLASK_ENV=development
flask run -p 5000 --eager-loading --no-reload
```
To start the app on a VM, with external access:
```commandline
flask run -h 0.0.0.0 -p 6011 --eager-loading --no-reload |& tee -a $LOGS_DIR/log_internal_dev_20210216.txt
```
To start the app using the production server:
```commandline
gunicorn -w 1 -b 0.0.0.0:6011 --threads 4 --access-logfile $LOGS_DIR/log_internal_dev_20210218_access.txt --log-file $LOGS_DIR/log_internal_dev_20210218_error.txt --capture-output server:app --log-level=info
```
The logs will only be written to these two log files and will not show in the console.
The API should work with more than one process/Gunicorn worker, but we have not tested it.
## Send daily activity summary to Teams
Running [api_support/start_summarize_daily_activities.sh](../api_support/start_summarize_daily_activities.sh) will retrieve credentials from the KeyVault (you need to authenticate again) and run a script to send a summary of images processed on *all* instances of the API in the past day to a Teams webhook.

Просмотреть файл

@ -1,443 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = 'all' # default is last_expr\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'10.0.0'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import azure.batch\n",
"azure.batch.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import json\n",
"\n",
"from azure.batch import BatchServiceClient\n",
"from azure.batch.batch_auth import SharedKeyCredentials\n",
"from azure.batch.models import *\n",
"from azure.common.credentials import ServicePrincipalCredentials"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Azure Batch\n",
"\n",
"Documentation\n",
"- https://github.com/Azure-Samples/batch-python-quickstart/blob/master/src/python_quickstart_client.py\n",
"- https://docs.microsoft.com/en-us/azure/batch/batch-docker-container-workloads#prefetch-images-for-container-configuration\n",
"\n",
"TODO\n",
"\n",
"- Turn `enable_auto_scale` on and set the appropriate `auto_scale_formula`. This way we can cap the maximum available nodes. https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling\n",
"\n",
"## Create a pool for each instance of the API\n",
"\n",
"List all Batch supported images with their \"capabilities\" (e.g. \"DockerCompatible\", \"NvidiaTeslaDriverInstalled\"):\n",
"```\n",
"az batch pool supported-images list\n",
"```\n",
"with the pool information provided in additional parameters.\n",
"\n",
"Listing all versions of a SKU of image:\n",
"```\n",
"az vm image list --all --publisher microsoft-dsvm\n",
"```\n",
"\n",
"You may need to accept the terms of an image:\n",
"```\n",
"az vm image list --all --publisher <publisher>\n",
"```\n",
"to find the URN for the image you want to use, followed by:\n",
"\n",
"```\n",
"az vm image terms accept --urn <corresponding-urn>\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"os.environ['BATCH_ACCOUNT_NAME'] = ''\n",
"os.environ['BATCH_ACCOUNT_URL'] = ''\n",
"\n",
"os.environ['APP_CLIENT_ID'] = ''\n",
"os.environ['APP_CLIENT_SECRET'] = ''\n",
"os.environ['APP_TENANT_ID'] = ''\n",
"\n",
"os.environ['REGISTRY_SERVER'] = '.azurecr.io' # e.g. registryname.azurecr.io\n",
"os.environ['REGISTRY_USERNAME'] = ''\n",
"os.environ['REGISTRY_PASSWORD'] = ''\n",
"os.environ['REGISTRY_IMAGE_NAME'] = '.azurecr.io/tensorflow:1.14.0-gpu-py3' # login server/repository:tag"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"POOL_ID = 'internal_2'\n",
"assert len(POOL_ID) <= 64, 'pool_id has more than 64 characters'\n",
"\n",
"POOL_NODE_COUNT = 1\n",
"\n",
"POOL_VM_SIZE = 'Standard_NC6s_v3' # https://docs.microsoft.com/en-us/azure/virtual-machines/ncv3-series\n",
"\n",
"registry_server = os.environ['REGISTRY_SERVER']\n",
"registry_username = os.environ['REGISTRY_USERNAME']\n",
"registry_password = os.environ['REGISTRY_PASSWORD']\n",
"docker_image = os.environ['REGISTRY_IMAGE_NAME']"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def print_batch_exception(batch_exception):\n",
" \"\"\"\n",
" Prints the contents of the specified Batch exception.\n",
" \"\"\"\n",
" print('-------------------------------------------')\n",
" print('Exception encountered:')\n",
" if batch_exception.error and \\\n",
" batch_exception.error.message and \\\n",
" batch_exception.error.message.value:\n",
" print(batch_exception.error.message.value)\n",
" if batch_exception.error.values:\n",
" print()\n",
" for mesg in batch_exception.error.values:\n",
" print(f'{mesg.key}:\\t{mesg.value}')\n",
" print('-------------------------------------------')\n",
" \n",
"def create_pool(batch_service_client, pool_id):\n",
" \"\"\"\n",
" Create a pool with pool_id and the Docker image specified in the env variables.\n",
" \"\"\"\n",
" image_ref = ImageReference(\n",
" publisher=\"microsoft-azure-batch\",\n",
" offer=\"ubuntu-server-container\",\n",
" sku=\"16-04-lts\",\n",
" version=\"latest\" # URN: microsoft-azure-batch:ubuntu-server-container:16-04-lts:1.1.0\n",
" # The Azure Batch container image only accepts 'latest' version\n",
" )\n",
" \n",
" # Specify a container registry\n",
" container_registry = ContainerRegistry(\n",
" registry_server=registry_server,\n",
" user_name=registry_username,\n",
" password=registry_password\n",
" )\n",
" \n",
" container_conf = ContainerConfiguration(\n",
" container_image_names = [docker_image],\n",
" container_registries =[container_registry]\n",
" )\n",
" \n",
" vm_config = VirtualMachineConfiguration(\n",
" image_reference=image_ref,\n",
" container_configuration=container_conf,\n",
" node_agent_sku_id=\"batch.node.ubuntu 16.04\"\n",
" )\n",
" \n",
" new_pool = PoolAddParameter(\n",
" id=POOL_ID,\n",
" display_name=POOL_ID,\n",
" \n",
" vm_size=POOL_VM_SIZE,\n",
" target_dedicated_nodes=POOL_NODE_COUNT, # we only used dedicated nodes\n",
" \n",
" virtual_machine_configuration=vm_config\n",
" )\n",
" batch_service_client.pool.add(new_pool)\n",
"\n",
"def create_job():\n",
" pass\n",
"\n",
"def create_task():\n",
" \"\"\"\n",
" All Tasks should be idempotent as they may need to be retried due to a recovery operation.\n",
" \"\"\"\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"account_url = os.environ['BATCH_ACCOUNT_URL']\n",
"\n",
"app_client_id = os.environ['APP_CLIENT_ID']\n",
"app_client_secret = os.environ['APP_CLIENT_SECRET']\n",
"app_tenant_id = os.environ['APP_TENANT_ID']\n",
"\n",
"credentials = ServicePrincipalCredentials(\n",
" client_id=app_client_id,\n",
" secret=app_client_secret,\n",
" tenant=app_tenant_id,\n",
" resource=\"https://batch.core.windows.net/\"\n",
")\n",
"\n",
"# if using the Batch quota system, use https://docs.microsoft.com/en-us/python/api/azure-batch/azure.batch.batch_auth.sharedkeycredentials?view=azure-python\n",
"# to authenticate instead of the service principal is also okay.\n",
"\n",
"batch_client = BatchServiceClient(credentials=credentials, batch_url=account_url)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"try:\n",
" create_pool(batch_client, POOL_ID)\n",
"except BatchErrorException as e:\n",
" print_batch_exception(e)\n",
" raise"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Submitting a job\n",
"\n",
"Job is what we have been referring to as Requests. Each shard corresponds to a Task.\n",
"\n",
"The Azure Batch service sets these environment variables on the compute nodes:\n",
"\n",
"- AZ_BATCH_JOB_ID\n",
"\n",
"- AZ_BATCH_TASK_ID\n",
"- AZ_BATCH_TASK_DIR\n",
"- AZ_BATCH_TASK_WORKING_DIR - currently running task has read/write access to this directory"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"job_id = 'test_docker0'"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"# job id is the request id in the old API context\n",
"\n",
"job = JobAddParameter(\n",
" id=job_id,\n",
" pool_info=PoolInformation(pool_id=POOL_ID),\n",
")\n",
"\n",
"batch_client.job.add(job)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Submit tasks to the job (the shards)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"command = \"\"\"/bin/sh -c \"python /app/score.py\" \"\"\"\n",
"\n",
"task = TaskAddParameter(\n",
" id='task_{}'.format(0),\n",
" command_line=command,\n",
" container_settings=TaskContainerSettings(\n",
" image_name=docker_image,\n",
" working_directory='taskWorkingDirectory'\n",
" )\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"azure.batch.models._models_py3.TaskAddParameter"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(task)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"batch_client.task.add(job_id, task)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Monitoring a job\n",
"\n",
"Optimization: remember which tasks have already Completed so that we do not repeatedly query for their status.\n",
"\n",
"Documentation: https://docs.microsoft.com/en-us/azure/batch/batch-efficient-list-queries"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"tasks = batch_client.task.list(job_id)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"tasks = [task for task in tasks]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"azure.batch.models._models_py3.CloudTask"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(tasks[0])"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"incomplete_tasks = [task for task in tasks if\n",
" task.state != TaskState.completed]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"incomplete_tasks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:cameratraps-batch-api]",
"language": "python",
"name": "conda-env-cameratraps-batch-api-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Просмотреть файл

@ -1,5 +0,0 @@
FROM tensorflow/tensorflow:1.14.0-gpu-py3
# Python version is 3.6.8
RUN pip install --upgrade pip
RUN pip install azure-storage-blob==12.7.1 pillow numpy requests

Просмотреть файл

@ -1,439 +0,0 @@
import io
import json
import math
import os
import sys
from datetime import datetime
from io import BytesIO
from typing import Union
from PIL import Image
import numpy as np
import requests
import tensorflow as tf
from azure.storage.blob import ContainerClient
print('score.py, tensorflow version:', tf.__version__)
print('score.py, tf.test.is_gpu_available:', tf.test.is_gpu_available())
PRINT_EVERY = 500
#%% Helper functions *copied* from ct_utils.py and visualization/visualization_utils.py
IMAGE_ROTATIONS = {
3: 180,
6: 270,
8: 90
}
def truncate_float(x, precision=3):
"""
Function for truncating a float scalar to the defined precision.
For example: truncate_float(0.0003214884) --> 0.000321
This function is primarily used to achieve a certain float representation
before exporting to JSON
Args:
x (float) Scalar to truncate
precision (int) The number of significant digits to preserve, should be
greater or equal 1
"""
assert precision > 0
if np.isclose(x, 0):
return 0
else:
# Determine the factor, which shifts the decimal point of x
# just behind the last significant digit
factor = math.pow(10, precision - 1 - math.floor(math.log10(abs(x))))
# Shift decimal point by multiplicatipon with factor, flooring, and
# division by factor
return math.floor(x * factor)/factor
def open_image(input_file: Union[str, BytesIO]) -> Image:
"""Opens an image in binary format using PIL.Image and converts to RGB mode.
This operation is lazy; image will not be actually loaded until the first
operation that needs to load it (for example, resizing), so file opening
errors can show up later.
Args:
input_file: str or BytesIO, either a path to an image file (anything
that PIL can open), or an image as a stream of bytes
Returns:
an PIL image object in RGB mode
"""
if (isinstance(input_file, str)
and input_file.startswith(('http://', 'https://'))):
response = requests.get(input_file)
image = Image.open(BytesIO(response.content))
try:
response = requests.get(input_file)
image = Image.open(BytesIO(response.content))
except Exception as e:
print(f'Error opening image {input_file}: {e}')
raise
else:
image = Image.open(input_file)
if image.mode not in ('RGBA', 'RGB', 'L'):
raise AttributeError(f'Image {input_file} uses unsupported mode {image.mode}')
if image.mode == 'RGBA' or image.mode == 'L':
# PIL.Image.convert() returns a converted copy of this image
image = image.convert(mode='RGB')
# alter orientation as needed according to EXIF tag 0x112 (274) for Orientation
# https://gist.github.com/dangtrinhnt/a577ece4cbe5364aad28
# https://www.media.mit.edu/pia/Research/deepview/exif.html
try:
exif = image._getexif()
orientation: int = exif.get(274, None) # 274 is the key for the Orientation field
if orientation is not None and orientation in IMAGE_ROTATIONS:
image = image.rotate(IMAGE_ROTATIONS[orientation], expand=True) # returns a rotated copy
except Exception:
pass
return image
def load_image(input_file: Union[str, BytesIO]) -> Image.Image:
"""Loads the image at input_file as a PIL Image into memory.
Image.open() used in open_image() is lazy and errors will occur downstream
if not explicitly loaded.
Args:
input_file: str or BytesIO, either a path to an image file (anything
that PIL can open), or an image as a stream of bytes
Returns: PIL.Image.Image, in RGB mode
"""
image = open_image(input_file)
image.load()
return image
#%% TFDetector class, an unmodified *copy* of the class in detection/tf_detector.py,
# so we do not have to import the packages required by run_detector.py
class TFDetector:
"""
A detector model loaded at the time of initialization. It is intended to be used with
the MegaDetector (TF). The inference batch size is set to 1; code needs to be modified
to support larger batch sizes, including resizing appropriately.
"""
# Number of decimal places to round to for confidence and bbox coordinates
CONF_DIGITS = 3
COORD_DIGITS = 4
# MegaDetector was trained with batch size of 1, and the resizing function is a part
# of the inference graph
BATCH_SIZE = 1
# An enumeration of failure reasons
FAILURE_TF_INFER = 'Failure TF inference'
FAILURE_IMAGE_OPEN = 'Failure image access'
DEFAULT_RENDERING_CONFIDENCE_THRESHOLD = 0.85 # to render bounding boxes
DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD = 0.1 # to include in the output json file
DEFAULT_DETECTOR_LABEL_MAP = {
'1': 'animal',
'2': 'person',
'3': 'vehicle' # available in megadetector v4+
}
NUM_DETECTOR_CATEGORIES = 4 # animal, person, group, vehicle - for color assignment
def __init__(self, model_path):
"""Loads model from model_path and starts a tf.Session with this graph. Obtains
input and output tensor handles."""
detection_graph = TFDetector.__load_model(model_path)
self.tf_session = tf.Session(graph=detection_graph)
self.image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
self.box_tensor = detection_graph.get_tensor_by_name('detection_boxes:0')
self.score_tensor = detection_graph.get_tensor_by_name('detection_scores:0')
self.class_tensor = detection_graph.get_tensor_by_name('detection_classes:0')
@staticmethod
def round_and_make_float(d, precision=4):
return truncate_float(float(d), precision=precision)
@staticmethod
def __convert_coords(tf_coords):
"""Converts coordinates from the model's output format [y1, x1, y2, x2] to the
format used by our API and MegaDB: [x1, y1, width, height]. All coordinates
(including model outputs) are normalized in the range [0, 1].
Args:
tf_coords: np.array of predicted bounding box coordinates from the TF detector,
has format [y1, x1, y2, x2]
Returns: list of Python float, predicted bounding box coordinates [x1, y1, width, height]
"""
# change from [y1, x1, y2, x2] to [x1, y1, width, height]
width = tf_coords[3] - tf_coords[1]
height = tf_coords[2] - tf_coords[0]
new = [tf_coords[1], tf_coords[0], width, height] # must be a list instead of np.array
# convert numpy floats to Python floats
for i, d in enumerate(new):
new[i] = TFDetector.round_and_make_float(d, precision=TFDetector.COORD_DIGITS)
return new
@staticmethod
def convert_to_tf_coords(array):
"""From [x1, y1, width, height] to [y1, x1, y2, x2], where x1 is x_min, x2 is x_max
This is an extraneous step as the model outputs [y1, x1, y2, x2] but were converted to the API
output format - only to keep the interface of the sync API.
"""
x1 = array[0]
y1 = array[1]
width = array[2]
height = array[3]
x2 = x1 + width
y2 = y1 + height
return [y1, x1, y2, x2]
@staticmethod
def __load_model(model_path):
"""Loads a detection model (i.e., create a graph) from a .pb file.
Args:
model_path: .pb file of the model.
Returns: the loaded graph.
"""
print('TFDetector: Loading graph...')
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(model_path, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
print('TFDetector: Detection graph loaded.')
return detection_graph
def _generate_detections_one_image(self, image):
np_im = np.asarray(image, np.uint8)
im_w_batch_dim = np.expand_dims(np_im, axis=0)
# need to change the above line to the following if supporting a batch size > 1 and resizing to the same size
# np_images = [np.asarray(image, np.uint8) for image in images]
# images_stacked = np.stack(np_images, axis=0) if len(images) > 1 else np.expand_dims(np_images[0], axis=0)
# performs inference
(box_tensor_out, score_tensor_out, class_tensor_out) = self.tf_session.run(
[self.box_tensor, self.score_tensor, self.class_tensor],
feed_dict={self.image_tensor: im_w_batch_dim})
return box_tensor_out, score_tensor_out, class_tensor_out
def generate_detections_one_image(self, image, image_id,
detection_threshold=DEFAULT_OUTPUT_CONFIDENCE_THRESHOLD):
"""Apply the detector to an image.
Args:
image: the PIL Image object
image_id: a path to identify the image; will be in the "file" field of the output object
detection_threshold: confidence above which to include the detection proposal
Returns:
A dict with the following fields, see the 'images' key in https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing#batch-processing-api-output-format
- 'file' (always present)
- 'max_detection_conf'
- 'detections', which is a list of detection objects containing keys 'category', 'conf' and 'bbox'
- 'failure'
"""
result = {
'file': image_id
}
try:
b_box, b_score, b_class = self._generate_detections_one_image(image)
# our batch size is 1; need to loop the batch dim if supporting batch size > 1
boxes, scores, classes = b_box[0], b_score[0], b_class[0]
detections_cur_image = [] # will be empty for an image with no confident detections
max_detection_conf = 0.0
for b, s, c in zip(boxes, scores, classes):
if s > detection_threshold:
detection_entry = {
'category': str(int(c)), # use string type for the numerical class label, not int
'conf': truncate_float(float(s), # cast to float for json serialization
precision=TFDetector.CONF_DIGITS),
'bbox': TFDetector.__convert_coords(b)
}
detections_cur_image.append(detection_entry)
if s > max_detection_conf:
max_detection_conf = s
result['max_detection_conf'] = truncate_float(float(max_detection_conf),
precision=TFDetector.CONF_DIGITS)
result['detections'] = detections_cur_image
except Exception as e:
result['failure'] = TFDetector.FAILURE_TF_INFER
print('TFDetector: image {} failed during inference: {}'.format(image_id, str(e)))
return result
#%% Scoring script
class BatchScorer:
"""
Coordinates scoring images in this Task.
1. have a synchronized queue that download tasks enqueue and scoring function dequeues - but need to be able to
limit the size of the queue. We do not want to write the image to disk and then load it in the scoring func.
"""
def __init__(self, **kwargs):
print('score.py BatchScorer, __init__()')
detector_path = kwargs.get('detector_path')
self.detector = TFDetector(detector_path)
self.use_url = kwargs.get('use_url')
if not self.use_url:
input_container_sas = kwargs.get('input_container_sas')
self.input_container_client = ContainerClient.from_container_url(input_container_sas)
self.detection_threshold = kwargs.get('detection_threshold')
self.image_ids_to_score = kwargs.get('image_ids_to_score')
# determine if there is metadata attached to each image_id
self.metadata_available = True if isinstance(self.image_ids_to_score[0], list) else False
def _download_image(self, image_file) -> Image:
"""
Args:
image_file: Public URL if use_url, else the full path from container root
Returns:
PIL image loaded
"""
if not self.use_url:
downloader = self.input_container_client.download_blob(image_file)
image_file = io.BytesIO()
blob_props = downloader.download_to_stream(image_file)
image = open_image(image_file)
return image
def score_images(self) -> list:
detections = []
for i in self.image_ids_to_score:
if self.metadata_available:
image_id = i[0]
image_metadata = i[1]
else:
image_id = i
try:
image = self._download_image(image_id)
except Exception as e:
print(f'score.py BatchScorer, score_images, download_image exception: {e}')
result = {
'file': image_id,
'failure': TFDetector.FAILURE_IMAGE_OPEN
}
else:
result = self.detector.generate_detections_one_image(
image, image_id, detection_threshold=self.detection_threshold)
if self.metadata_available:
result['meta'] = image_metadata
detections.append(result)
if len(detections) % PRINT_EVERY == 0:
print(f'scored {len(detections)} images')
return detections
def main():
print('score.py, main()')
# information to determine input and output locations
api_instance_name = os.environ['API_INSTANCE_NAME']
job_id = os.environ['AZ_BATCH_JOB_ID']
task_id = os.environ['AZ_BATCH_TASK_ID']
mount_point = os.environ['AZ_BATCH_NODE_MOUNTS_DIR']
# other parameters for the task
begin_index = int(os.environ['TASK_BEGIN_INDEX'])
end_index = int(os.environ['TASK_END_INDEX'])
input_container_sas = os.environ.get('JOB_CONTAINER_SAS', None) # could be None if use_url
use_url = os.environ.get('JOB_USE_URL', None)
if use_url and use_url.lower() == 'true': # bool of any non-empty string is True
use_url = True
else:
use_url = False
detection_threshold = float(os.environ['DETECTION_CONF_THRESHOLD'])
print(f'score.py, main(), api_instance_name: {api_instance_name}, job_id: {job_id}, task_id: {task_id}, '
f'mount_point: {mount_point}, begin_index: {begin_index}, end_index: {end_index}, '
f'input_container_sas: {input_container_sas}, use_url (parsed): {use_url}'
f'detection_threshold: {detection_threshold}')
job_folder_mounted = os.path.join(mount_point, 'batch-api', f'api_{api_instance_name}', f'job_{job_id}')
task_out_dir = os.path.join(job_folder_mounted, 'task_outputs')
os.makedirs(task_out_dir, exist_ok=True)
task_output_path = os.path.join(task_out_dir, f'job_{job_id}_task_{task_id}.json')
# test that we can write to output path; also in case there is no image to process
with open(task_output_path, 'w') as f:
json.dump([], f)
# list images to process
list_images_path = os.path.join(job_folder_mounted, f'{job_id}_images.json')
with open(list_images_path) as f:
list_images = json.load(f)
print(f'score.py, main(), length of list_images: {len(list_images)}')
if (not isinstance(list_images, list)) or len(list_images) == 0:
print('score.py, main(), zero images in specified overall list, exiting...')
sys.exit(0)
# items in this list can be strings or [image_id, metadata]
list_images = list_images[begin_index: end_index]
if len(list_images) == 0:
print('score.py, main(), zero images in the shard, exiting')
sys.exit(0)
print(f'score.py, main(), processing {len(list_images)} images in this Task')
# model path
# Path to .pb TensorFlow detector model file, relative to the
# models/megadetector_copies folder in mounted container
detector_model_rel_path = os.environ['DETECTOR_REL_PATH']
detector_path = os.path.join(mount_point, 'models', 'megadetector_copies', detector_model_rel_path)
assert os.path.exists(detector_path), f'detector is not found at the specified path: {detector_path}'
# score the images
scorer = BatchScorer(
detector_path=detector_path,
use_url=use_url,
input_container_sas=input_container_sas,
detection_threshold=detection_threshold,
image_ids_to_score=list_images
)
try:
tick = datetime.now()
detections = scorer.score_images()
duration = datetime.now() - tick
print(f'score.py, main(), score_images() duration: {duration}')
except Exception as e:
raise RuntimeError(f'score.py, main(), exception in score_images(): {e}')
with open(task_output_path, 'w', encoding='utf-8') as f:
json.dump(detections, f, ensure_ascii=False)
if __name__ == '__main__':
main()

Просмотреть файл

@ -1,22 +0,0 @@
# Conda environment file for running the batch API.
#
# conda activate cameratraps-batch-api
#
# conda env update --name cameratraps-batch-api --file environment-batch-api.yml --prune
name: cameratraps-batch-api
dependencies:
- python>=3.6
- nb_conda_kernels
- humanfriendly
- requests
- flask>=1.1.0 # known-compatible Flask 1.1.2, Werkzeug 1.0.1
- gunicorn
- tqdm # required by sas_blob_utils.py
- pip
- pip:
- azure-storage-blob>=12.3.0 # known-compatible 12.7.1
- azure-batch>=4.0.0 # known-compatible 10.0.0
- azure-cosmos # known-compatible 4.2.0
- azure-appconfiguration # known-compatible 1.1.1

Просмотреть файл

@ -1,294 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
import string
import uuid
import threading
from datetime import timedelta
import sas_blob_utils # from ai4eutils
from flask import Flask, request, jsonify
import server_api_config as api_config
from server_app_config import AppConfig
from server_batch_job_manager import BatchJobManager
from server_orchestration import create_batch_job, monitor_batch_job
from server_job_status_table import JobStatusTable
from server_utils import *
# %% Flask app
app = Flask(__name__)
# reference: https://trstringer.com/logging-flask-gunicorn-the-manageable-way/
if __name__ != '__main__':
gunicorn_logger = logging.getLogger('gunicorn.error')
app.logger.handlers = gunicorn_logger.handlers
app.logger.setLevel(gunicorn_logger.level)
API_PREFIX = api_config.API_PREFIX
app.logger.info('server, created Flask application...')
# %% Helper classes
app_config = AppConfig()
job_status_table = JobStatusTable()
batch_job_manager = BatchJobManager()
app.logger.info('server, finished instantiating helper classes')
# %% Flask endpoints
@app.route(f'{API_PREFIX}/')
def hello():
return f'Camera traps batch processing API. Instance: {api_config.API_INSTANCE_NAME}'
@app.route(f'{API_PREFIX}/request_detections', methods=['POST'])
def request_detections():
"""
Checks that the input parameters to this endpoint are valid, starts a thread
to launch the batch processing job, and return the job_id/request_id to the user.
"""
if not request.is_json:
msg = 'Body needs to have a JSON mimetype (e.g., application/json).'
return make_error(415, msg)
try:
post_body = request.get_json()
except Exception as e:
return make_error(415, f'Error occurred reading POST request body: {e}.')
app.logger.info(f'server, request_detections, post_body: {post_body}')
# required params
caller_id = post_body.get('caller', None)
if caller_id is None or caller_id not in app_config.get_allowlist():
msg = ('Parameter caller is not supplied or is not on our allowlist. '
'Please email cameratraps@lila.science to request access.')
return make_error(401, msg)
use_url = post_body.get('use_url', False)
if use_url and isinstance(use_url, str): # in case it is included but is intended to be False
if use_url.lower() in ['false', 'f', 'no', 'n']:
use_url = False
input_container_sas = post_body.get('input_container_sas', None)
if not input_container_sas and not use_url:
msg = ('input_container_sas with read and list access is a required '
'field when not using image URLs.')
return make_error(400, msg)
if input_container_sas is not None:
if not sas_blob_utils.is_container_uri(input_container_sas):
return make_error(400, 'input_container_sas provided is not for a container.')
result = check_data_container_sas(input_container_sas)
if result is not None:
return make_error(result[0], result[1])
# can be an URL to a file not hosted in an Azure blob storage container
images_requested_json_sas = post_body.get('images_requested_json_sas', None)
if images_requested_json_sas is not None:
if not images_requested_json_sas.startswith(('http://', 'https://')):
return make_error(400, 'images_requested_json_sas needs to be an URL.')
# if use_url, then images_requested_json_sas is required
if use_url and images_requested_json_sas is None:
return make_error(400, 'images_requested_json_sas is required since use_url is true.')
# optional params
# check model_version is among the available model versions
model_version = post_body.get('model_version', '')
if model_version != '':
model_version = str(model_version) # in case user used an int
if model_version not in api_config.MD_VERSIONS_TO_REL_PATH: # TODO use AppConfig to store model version info
return make_error(400, f'model_version {model_version} is not supported.')
# check request_name has only allowed characters
request_name = post_body.get('request_name', '')
if request_name != '':
if len(request_name) > 92:
return make_error(400, 'request_name is longer than 92 characters.')
allowed = set(string.ascii_letters + string.digits + '_' + '-')
if not set(request_name) <= allowed:
msg = ('request_name contains invalid characters (only letters, '
'digits, - and _ are allowed).')
return make_error(400, msg)
# optional params for telemetry collection - logged to status table for now as part of call_params
country = post_body.get('country', None)
organization_name = post_body.get('organization_name', None)
# All API instances / node pools share a quota on total number of active Jobs;
# we cannot accept new Job submissions if we are at the quota
try:
num_active_jobs = batch_job_manager.get_num_active_jobs()
if num_active_jobs >= api_config.MAX_BATCH_ACCOUNT_ACTIVE_JOBS:
return make_error(503, f'Too many active jobs, please try again later')
except Exception as e:
return make_error(500, f'Error checking number of active jobs: {e}')
try:
job_id = uuid.uuid4().hex
job_status_table.create_job_status(
job_id=job_id,
status= get_job_status('created', 'Request received. Listing images next...'),
call_params=post_body
)
except Exception as e:
return make_error(500, f'Error creating a job status entry: {e}')
try:
thread = threading.Thread(
target=create_batch_job,
name=f'job_{job_id}',
kwargs={'job_id': job_id, 'body': post_body}
)
thread.start()
except Exception as e:
return make_error(500, f'Error creating or starting the batch processing thread: {e}')
return {'request_id': job_id}
@app.route(f'{API_PREFIX}/cancel_request', methods=['POST'])
def cancel_request():
"""
Cancels a request/job given the job_id and caller_id
"""
if not request.is_json:
msg = 'Body needs to have a JSON mimetype (e.g., application/json).'
return make_error(415, msg)
try:
post_body = request.get_json()
except Exception as e:
return make_error(415, f'Error occurred reading POST request body: {e}.')
app.logger.info(f'server, cancel_request received, body: {post_body}')
# required fields
job_id = post_body.get('request_id', None)
if job_id is None:
return make_error(400, 'request_id is a required field.')
caller_id = post_body.get('caller', None)
if caller_id is None or caller_id not in app_config.get_allowlist():
return make_error(401, 'Parameter caller is not supplied or is not on our allowlist.')
item_read = job_status_table.read_job_status(job_id)
if item_read is None:
return make_error(404, 'Task is not found.')
if 'status' not in item_read:
return make_error(404, 'Something went wrong. This task does not have a status field.')
request_status = item_read['status']['request_status']
if request_status not in ['running', 'problem']:
# request_status is either completed or failed
return make_error(400, f'Task has {request_status} and cannot be canceled')
try:
batch_job_manager.cancel_batch_job(job_id)
# the create_batch_job thread will stop when it wakes up the next time
except Exception as e:
return make_error(500, f'Error when canceling the request: {e}')
else:
job_status_table.update_job_status(job_id, {
'request_status': 'canceled',
'message': 'Request has been canceled by the user.'
})
return 'Canceling signal has been sent. You can verify the status at the /task endpoint'
@app.route(f'{API_PREFIX}/task/<job_id>')
def retrieve_job_status(job_id: str):
"""
Does not require the "caller" field to avoid checking the allowlist in App Configurations.
Retains the /task endpoint name to be compatible with previous versions.
"""
# Fix for Zooniverse - deleting any "-" characters in the job_id
job_id = job_id.replace('-', '')
item_read = job_status_table.read_job_status(job_id) # just what the monitoring thread wrote to the DB
if item_read is None:
return make_error(404, 'Task is not found.')
if 'status' not in item_read or 'last_updated' not in item_read or 'call_params' not in item_read:
return make_error(404, 'Something went wrong. This task does not have a valid status.')
# If the status is running, it could be a Job submitted before the last restart of this
# API instance. If that is the case, we should start to monitor its progress again.
status = item_read['status']
last_updated = datetime.fromisoformat(item_read['last_updated'][:-1]) # get rid of "Z" (required by Cosmos DB)
time_passed = datetime.utcnow() - last_updated
job_is_unmonitored = True if time_passed > timedelta(minutes=(api_config.MONITOR_PERIOD_MINUTES + 1)) else False
if isinstance(status, dict) and \
'request_status' in status and \
status['request_status'] in ['running', 'problem'] and \
'num_tasks' in status and \
job_id not in get_thread_names() and \
job_is_unmonitored:
# WARNING model_version could be wrong (a newer version number gets written to the output file) around
# the time that the model is updated, if this request was submitted before the model update
# and the API restart; this should be quite rare
model_version = item_read['call_params'].get('model_version', api_config.DEFAULT_MD_VERSION)
num_tasks = status['num_tasks']
job_name = item_read['call_params'].get('request_name', '')
job_submission_timestamp = item_read.get('job_submission_time', '')
thread = threading.Thread(
target=monitor_batch_job,
name=f'job_{job_id}',
kwargs={
'job_id': job_id,
'num_tasks': num_tasks,
'model_version': model_version,
'job_name': job_name,
'job_submission_timestamp': job_submission_timestamp
}
)
thread.start()
app.logger.info(f'server, started a new thread to monitor job {job_id}')
# conform to previous schemes
if 'num_tasks' in status:
del status['num_tasks']
item_to_return = {
'Status': status,
'Endpoint': f'{API_PREFIX}/request_detections',
'TaskId': job_id,
'Timestamp': item_read['last_updated']
}
return item_to_return
@app.route(f'{API_PREFIX}/default_model_version')
def get_default_model_version() -> str:
return api_config.DEFAULT_MD_VERSION
@app.route(f'{API_PREFIX}/supported_model_versions')
def get_supported_model_versions() -> str:
return jsonify(sorted(list(api_config.MD_VERSIONS_TO_REL_PATH.keys())))
# %% undocumented endpoints
def get_thread_names() -> list:
thread_names = []
for thread in threading.enumerate():
if thread.name.startswith('job_'):
thread_names.append(thread.name.split('_')[1])
return sorted(thread_names)
@app.route(f'{API_PREFIX}/all_jobs')
def get_all_jobs():
"""List all Jobs being monitored since this API instance started"""
thread_names = get_thread_names()
return jsonify(thread_names)

Просмотреть файл

@ -1,98 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
A module to hold the configurations specific to an instance of the API.
"""
import os
#%% instance-specific API settings
# you likely need to modify these when deploying a new instance of the API
API_INSTANCE_NAME = 'cm' # 'internal', 'cm', 'camelot', 'zooniverse'
POOL_ID = 'cm_1' # name of the Batch pool created for this API instance
MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB = 4 * 1000 * 1000 # inclusive
# Azure Batch for batch processing
BATCH_ACCOUNT_NAME = 'cameratrapssc'
BATCH_ACCOUNT_URL = 'https://cameratrapssc.southcentralus.batch.azure.com'
#%% general API settings
API_PREFIX = '/v4/camera-trap/detection-batch' # URL to root is http://127.0.0.1:5000/v4/camera-trap/detection-batch/
MONITOR_PERIOD_MINUTES = 10
# if this number of times the thread wakes up to check is exceeded, stop the monitoring thread
MAX_MONITOR_CYCLES = 4 * 7 * int((60 * 24) / MONITOR_PERIOD_MINUTES) # 4 weeks
IMAGE_SUFFIXES_ACCEPTED = ('.jpg', '.jpeg', '.png') # case-insensitive
assert isinstance(IMAGE_SUFFIXES_ACCEPTED, tuple)
OUTPUT_FORMAT_VERSION = '1.1'
NUM_IMAGES_PER_TASK = 2000
OUTPUT_SAS_EXPIRATION_DAYS = 180
# quota of active Jobs in our Batch account, which all node pools i.e. API instances share;
# cannot accept job submissions if there are this many active Jobs already
MAX_BATCH_ACCOUNT_ACTIVE_JOBS = 300
#%% MegaDetector info
DETECTION_CONF_THRESHOLD = 0.1
# relative to the `megadetector_copies` folder in the container `models`
# TODO add MD versions info to AppConfig
MD_VERSIONS_TO_REL_PATH = {
'4.1': 'megadetector_v4_1/md_v4.1.0.pb',
'3': 'megadetector_v3/megadetector_v3_tf19.pb',
'2': 'megadetector_v2/frozen_inference_graph.pb'
}
DEFAULT_MD_VERSION = '4.1'
assert DEFAULT_MD_VERSION in MD_VERSIONS_TO_REL_PATH
# copied from TFDetector class in detection/run_detector.py
DETECTOR_LABEL_MAP = {
'1': 'animal',
'2': 'person',
'3': 'vehicle'
}
#%% Azure Batch settings
NUM_TASKS_PER_SUBMISSION = 20 # max for the Python SDK without extension is 100
NUM_TASKS_PER_RESUBMISSION = 5
#%% env variables for service credentials, and info related to these services
# Cosmos DB `batch-api-jobs` table for job status
COSMOS_ENDPOINT = os.environ['COSMOS_ENDPOINT']
COSMOS_WRITE_KEY = os.environ['COSMOS_WRITE_KEY']
# Service principal of this "application", authorized to use Azure Batch
APP_TENANT_ID = os.environ['APP_TENANT_ID']
APP_CLIENT_ID = os.environ['APP_CLIENT_ID']
APP_CLIENT_SECRET = os.environ['APP_CLIENT_SECRET']
# Blob storage account for storing Batch tasks' outputs and scoring script
STORAGE_ACCOUNT_NAME = os.environ['STORAGE_ACCOUNT_NAME']
STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY']
# STORAGE_CONTAINER_MODELS = 'models' # names of the two containers supporting Batch
STORAGE_CONTAINER_API = 'batch-api'
# Azure Container Registry for Docker image used by our Batch node pools
REGISTRY_SERVER = os.environ['REGISTRY_SERVER']
REGISTRY_PASSWORD = os.environ['REGISTRY_PASSWORD']
CONTAINER_IMAGE_NAME = '***REMOVED***.azurecr.io/tensorflow:1.14.0-gpu-py3'
# Azure App Configuration instance to get configurations specific to
# this instance of the API
APP_CONFIG_CONNECTION_STR = os.environ['APP_CONFIG_CONNECTION_STR']

Просмотреть файл

@ -1,55 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
A class wrapping the Azure App Configuration client to get configurations
for each instance of the API.
"""
import logging
import os
from server_api_config import APP_CONFIG_CONNECTION_STR, API_INSTANCE_NAME
from azure.appconfiguration import AzureAppConfigurationClient
log = logging.getLogger(os.environ['FLASK_APP'])
class AppConfig:
"""Wrapper around the Azure App Configuration client"""
def __init__(self):
self.client = AzureAppConfigurationClient.from_connection_string(APP_CONFIG_CONNECTION_STR)
self.api_instance = API_INSTANCE_NAME
# sentinel should change if new configurations are available
self.sentinel = self._get_sentinel() # get initial sentinel and allowlist values
self.allowlist = self._get_allowlist()
def _get_sentinel(self):
return self.client.get_configuration_setting(key='batch_api:sentinel').value
def _get_allowlist(self):
filtered_listed = self.client.list_configuration_settings(key_filter='batch_api_allow:*')
allowlist = []
for item in filtered_listed:
if item.value == self.api_instance:
allowlist.append(item.key.split('batch_api_allow:')[1])
return allowlist
def get_allowlist(self):
try:
cur_sentinel = self._get_sentinel()
if cur_sentinel == self.sentinel:
# configs have not changed
return self.allowlist
else:
self.sentinel = cur_sentinel
self.allowlist = self._get_allowlist()
return self.allowlist
except Exception as e:
log.error(f'AppConfig, get_allowlist, exception so using old allowlist: {e}')
return self.allowlist

Просмотреть файл

@ -1,220 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
A class wrapping the Azure Batch client.
"""
import logging
import os
import math
from typing import Tuple
from datetime import datetime, timedelta
import sas_blob_utils # from ai4eutils
from azure.storage.blob import ContainerClient, ContainerSasPermissions, generate_container_sas
from azure.batch import BatchServiceClient
from azure.batch.models import *
from azure.common.credentials import ServicePrincipalCredentials
import server_api_config as api_config
# Gunicorn logger handler will get attached if needed in server.py
log = logging.getLogger(os.environ['FLASK_APP'])
class BatchJobManager:
"""Wrapper around the Azure App Configuration client"""
def __init__(self):
credentials = ServicePrincipalCredentials(
client_id=api_config.APP_CLIENT_ID,
secret=api_config.APP_CLIENT_SECRET,
tenant=api_config.APP_TENANT_ID,
resource='https://batch.core.windows.net/'
)
self.batch_client = BatchServiceClient(credentials=credentials,
batch_url=api_config.BATCH_ACCOUNT_URL)
def create_job(self, job_id: str, detector_model_rel_path: str,
input_container_sas: str, use_url: bool):
log.info(f'BatchJobManager, create_job, job_id: {job_id}')
job = JobAddParameter(
id=job_id,
pool_info=PoolInformation(pool_id=api_config.POOL_ID),
# set for all tasks in the job
common_environment_settings=[
EnvironmentSetting(name='DETECTOR_REL_PATH', value=detector_model_rel_path),
EnvironmentSetting(name='API_INSTANCE_NAME', value=api_config.API_INSTANCE_NAME),
EnvironmentSetting(name='JOB_CONTAINER_SAS', value=input_container_sas),
EnvironmentSetting(name='JOB_USE_URL', value=str(use_url)),
EnvironmentSetting(name='DETECTION_CONF_THRESHOLD', value=api_config.DETECTION_CONF_THRESHOLD)
]
)
self.batch_client.job.add(job)
def submit_tasks(self, job_id: str, num_images: int) -> Tuple[int, list]:
"""
Shard the images and submit each shard as a Task under the Job pointed to by this job_id
Args:
job_id: ID of the Batch Job to submit the tasks to
num_images: total number of images to be processed in this Job
Returns:
num_task: total number of Tasks that should be in this Job
task_ids_failed_to_submit: which Tasks from the above failed to be submitted
"""
log.info('BatchJobManager, submit_tasks')
# cannot execute the scoring script that is in the mounted directory; has to be copied to cwd
# not luck giving the commandline arguments via formatted string - set as env vars instead
score_command = '/bin/bash -c \"cp $AZ_BATCH_NODE_MOUNTS_DIR/batch-api/scripts/score.py . && python score.py\" '
num_images_per_task = api_config.NUM_IMAGES_PER_TASK
# form shards of images and assign each shard to a Task
num_tasks = math.ceil(num_images / num_images_per_task)
# for persisting stdout and stderr
permissions = ContainerSasPermissions(read=True, write=True, list=True)
access_duration_hrs = api_config.MONITOR_PERIOD_MINUTES * api_config.MAX_MONITOR_CYCLES / 60
container_sas_token = generate_container_sas(
account_name=api_config.STORAGE_ACCOUNT_NAME,
container_name=api_config.STORAGE_CONTAINER_API,
account_key=api_config.STORAGE_ACCOUNT_KEY,
permission=permissions,
expiry=datetime.utcnow() + timedelta(hours=access_duration_hrs))
container_sas_url = sas_blob_utils.build_azure_storage_uri(
account=api_config.STORAGE_ACCOUNT_NAME,
container=api_config.STORAGE_CONTAINER_API,
sas_token=container_sas_token)
tasks = []
for task_id in range(num_tasks):
begin_index = task_id * num_images_per_task
end_index = begin_index + num_images_per_task
# persist stdout and stderr (will be removed when node removed)
# paths are relative to the Task working directory
stderr_destination = OutputFileDestination(
container=OutputFileBlobContainerDestination(
container_url=container_sas_url,
path=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_logs/job_{job_id}_task_{task_id}_stderr.txt'
)
)
stdout_destination = OutputFileDestination(
container=OutputFileBlobContainerDestination(
container_url=container_sas_url,
path=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_logs/job_{job_id}_task_{task_id}_stdout.txt'
)
)
std_err_and_out = [
OutputFile(
file_pattern='../stderr.txt', # stderr.txt is at the same level as wd
destination=stderr_destination,
upload_options=OutputFileUploadOptions(upload_condition=OutputFileUploadCondition.task_completion)
# can also just upload on failure
),
OutputFile(
file_pattern='../stdout.txt',
destination=stdout_destination,
upload_options=OutputFileUploadOptions(upload_condition=OutputFileUploadCondition.task_completion)
)
]
task = TaskAddParameter(
id=str(task_id),
command_line=score_command,
container_settings=TaskContainerSettings(
image_name=api_config.CONTAINER_IMAGE_NAME,
working_directory='taskWorkingDirectory'
),
environment_settings=[
EnvironmentSetting(name='TASK_BEGIN_INDEX', value=begin_index),
EnvironmentSetting(name='TASK_END_INDEX', value=end_index),
],
output_files=std_err_and_out
)
tasks.append(task)
# first try submitting Tasks
task_ids_failed_to_submit = self._create_tasks(job_id, tasks, api_config.NUM_TASKS_PER_SUBMISSION, 1)
# retry submitting Tasks
if len(task_ids_failed_to_submit) > 0:
task_ids_failed_to_submit_set = set(task_ids_failed_to_submit)
tasks_to_retry = [t for t in tasks if t.id in task_ids_failed_to_submit_set]
task_ids_failed_to_submit = self._create_tasks(job_id,
tasks_to_retry,
api_config.NUM_TASKS_PER_RESUBMISSION,
2)
if len(task_ids_failed_to_submit) > 0:
log.info('BatchJobManager, submit_tasks, after retry, '
f'len of task_ids_failed_to_submit: {len(task_ids_failed_to_submit)}')
else:
log.info('BatchJobManager, submit_tasks, after retry, all Tasks submitted')
else:
log.info('BatchJobManager, submit_tasks, all Tasks submitted after first try')
# Change the Job's on_all_tasks_complete option to 'terminateJob' so the Job's status changes automatically
# after all submitted tasks are done
# This is so that we do not take up the quota for active Jobs in the Batch account.
job_patch_params = JobPatchParameter(
on_all_tasks_complete=OnAllTasksComplete.terminate_job
)
self.batch_client.job.patch(job_id, job_patch_params)
return num_tasks, task_ids_failed_to_submit
def _create_tasks(self, job_id, tasks, num_tasks_per_submission, n_th_try) -> list:
task_ids_failed_to_submit = []
for i in range(0, len(tasks), num_tasks_per_submission):
tasks_to_submit = tasks[i: i + num_tasks_per_submission]
# return type: TaskAddCollectionResult
collection_results = self.batch_client.task.add_collection(job_id, tasks_to_submit, threads=10)
for task_result in collection_results.value:
if task_result.status is not TaskAddStatus.success:
# actually we should probably only re-submit if it's a server_error
task_ids_failed_to_submit.append(task_result.task_id)
log.info(f'task {task_result.task_id} failed to submitted after {n_th_try} try/tries, '
f'status: {task_result.status}, error: {task_result.error}')
return task_ids_failed_to_submit
def get_num_completed_tasks(self, job_id: str) -> Tuple[int, int]:
"""
Returns the number of completed tasks for the job of job_id, as a tuple:
(number of succeeded jobs, number of failed jobs) - both are considered "completed".=
"""
# docs: # https://docs.microsoft.com/en-us/rest/api/batchservice/odata-filters-in-batch#list-tasks
tasks = self.batch_client.task.list(job_id,
task_list_options=TaskListOptions(
filter='state eq \'completed\'',
select='id, executionInfo' # only the id field will be non-empty
))
num_succeeded, num_failed = 0, 0
for task in tasks:
exit_code: int = task.execution_info.exit_code
if exit_code == 0:
num_succeeded += 1
else:
num_failed += 1
return num_succeeded, num_failed
def cancel_batch_job(self, job_id: str):
self.batch_client.job.terminate(job_id, terminate_reason='APIUserCanceled')
def get_num_active_jobs(self) -> int:
jobs_generator = self.batch_client.job.list(
job_list_options=JobListOptions(
filter='state eq \'active\'',
select='id'
))
jobs_list = [j for j in jobs_generator]
return len(jobs_list)

Просмотреть файл

@ -1,153 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
A class to manage updating the status of an API request / Azure Batch Job using
the Cosmos DB table "batch_api_jobs".
"""
import logging
import os
import unittest
import uuid
from typing import Union, Optional
from azure.cosmos.cosmos_client import CosmosClient
from azure.cosmos.exceptions import CosmosResourceNotFoundError
from server_api_config import API_INSTANCE_NAME, COSMOS_ENDPOINT, COSMOS_WRITE_KEY
from server_utils import get_utc_time
log = logging.getLogger(os.environ['FLASK_APP'])
class JobStatusTable:
"""
A wrapper around the Cosmos DB client. Each item in the table "batch_api_jobs" represents
a request/Batch Job, and should have the following fields:
- id: this is the job_id
- api_instance
- status
- last_updated
- call_params: the dict representing the body of the POST request from the user
The 'status' field is a dict with the following fields:
- request_status
- message
- num_tasks (present after Batch Job created)
- num_images (present after Batch Job created)
"""
# a job moves from created to running/problem after the Batch Job has been submitted
allowed_statuses = ['created', 'running', 'failed', 'problem', 'completed', 'canceled']
def __init__(self, api_instance=None):
self.api_instance = api_instance if api_instance is not None else API_INSTANCE_NAME
cosmos_client = CosmosClient(COSMOS_ENDPOINT, credential=COSMOS_WRITE_KEY)
db_client = cosmos_client.get_database_client('camera-trap')
self.db_jobs_client = db_client.get_container_client('batch_api_jobs')
def create_job_status(self, job_id: str, status: Union[dict, str], call_params: dict) -> dict:
assert 'request_status' in status and 'message' in status
assert status['request_status'] in JobStatusTable.allowed_statuses
# job_id should be unique across all instances, and is also the partition key
cur_time = get_utc_time()
item = {
'id': job_id,
'api_instance': self.api_instance,
'status': status,
'job_submission_time': cur_time,
'last_updated': cur_time,
'call_params': call_params
}
created_item = self.db_jobs_client.create_item(item)
return created_item
def update_job_status(self, job_id: str, status: Union[dict, str]) -> dict:
assert 'request_status' in status and 'message' in status
assert status['request_status'] in JobStatusTable.allowed_statuses
# TODO do not read the entry first to get the call_params when the Cosmos SDK add a
# patching functionality:
# https://feedback.azure.com/forums/263030-azure-cosmos-db/suggestions/6693091-be-able-to-do-partial-updates-on-document
item_old = self.read_job_status(job_id)
if item_old is None:
raise ValueError
# need to retain other fields in 'status' to be able to restart monitoring thread
if 'status' in item_old and isinstance(item_old['status'], dict):
# retain existing fields; update as needed
for k, v in item_old['status'].items():
if k not in status:
status[k] = v
item = {
'id': job_id,
'api_instance': self.api_instance,
'status': status,
'job_submission_time': item_old['job_submission_time'],
'last_updated': get_utc_time(),
'call_params': item_old['call_params']
}
replaced_item = self.db_jobs_client.replace_item(job_id, item)
return replaced_item
def read_job_status(self, job_id) -> Optional[dict]:
"""
Read the status of the job from the Cosmos DB table of job status.
Note that it does not check the actual status of the job on Batch, and just returns what
the monitoring thread wrote to the database.
job_id is also the partition key
"""
try:
read_item = self.db_jobs_client.read_item(job_id, partition_key=job_id)
assert read_item['api_instance'] == self.api_instance, 'Job does not belong to this API instance'
except CosmosResourceNotFoundError:
return None # job_id not a key
except Exception as e:
logging.error(f'server_job_status_table, read_job_status, exception: {e}')
raise
else:
item = {k: v for k, v in read_item.items() if not k.startswith('_')}
return item
class TestJobStatusTable(unittest.TestCase):
api_instance = 'api_test'
def test_insert(self):
table = JobStatusTable(TestJobStatusTable.api_instance)
status = {
'request_status': 'running',
'message': 'this is a test'
}
job_id = uuid.uuid4().hex
item = table.create_job_status(job_id, status, {'container_sas': 'random_string'})
self.assertTrue(job_id == item['id'], 'Expect job_id to be the id of the item')
self.assertTrue(item['status']['request_status'] == 'running', 'Expect fields to be inserted correctly')
def test_update_and_read(self):
table = JobStatusTable(TestJobStatusTable.api_instance)
status = {
'request_status': 'running',
'message': 'this is a test'
}
job_id = uuid.uuid4().hex
res = table.create_job_status(job_id, status, {'container_sas': 'random_string'})
status = {
'request_status': 'completed',
'message': 'this is a test again'
}
res = table.update_job_status(job_id, status)
item_read = table.read_job_status(job_id)
self.assertTrue(item_read['status']['request_status'] == 'completed', 'Expect field to have updated')
def test_read_invalid_id(self):
table = JobStatusTable(TestJobStatusTable.api_instance)
job_id = uuid.uuid4().hex # should not be in the database
item_read = table.read_job_status(job_id)
self.assertIsNone(item_read)
if __name__ == '__main__':
unittest.main()

Просмотреть файл

@ -1,360 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
Functions to submit images to the Azure Batch node pool for processing, monitor
the Job and fetch results when completed.
"""
import io
import json
import threading
import time
import logging
import os
import urllib.parse
from datetime import timedelta
from random import shuffle
import sas_blob_utils # from ai4eutils
import requests
from azure.storage.blob import ContainerClient, BlobSasPermissions, generate_blob_sas
from tqdm import tqdm
from server_utils import *
import server_api_config as api_config
from server_batch_job_manager import BatchJobManager
from server_job_status_table import JobStatusTable
# Gunicorn logger handler will get attached if needed in server.py
log = logging.getLogger(os.environ['FLASK_APP'])
def create_batch_job(job_id: str, body: dict):
"""
This is the target to be run in a thread to submit a batch processing job and monitor progress
"""
job_status_table = JobStatusTable()
try:
log.info(f'server_job, create_batch_job, job_id {job_id}, {body}')
input_container_sas = body.get('input_container_sas', None)
use_url = body.get('use_url', False)
images_requested_json_sas = body.get('images_requested_json_sas', None)
image_path_prefix = body.get('image_path_prefix', None)
first_n = body.get('first_n', None)
first_n = int(first_n) if first_n else None
sample_n = body.get('sample_n', None)
sample_n = int(sample_n) if sample_n else None
model_version = body.get('model_version', '')
if model_version == '':
model_version = api_config.DEFAULT_MD_VERSION
# request_name and request_submission_timestamp are for appending to
# output file names
job_name = body.get('request_name', '') # in earlier versions we used "request" to mean a "job"
job_submission_timestamp = get_utc_time()
# image_paths can be a list of strings (Azure blob names or public URLs)
# or a list of length-2 lists where each is a [image_id, metadata] pair
# Case 1: listing all images in the container
# - not possible to have attached metadata if listing images in a blob
if images_requested_json_sas is None:
log.info('server_job, create_batch_job, listing all images to process.')
# list all images to process
image_paths = sas_blob_utils.list_blobs_in_container(
container_uri=input_container_sas,
blob_prefix=image_path_prefix, # check will be case-sensitive
blob_suffix=api_config.IMAGE_SUFFIXES_ACCEPTED, # check will be case-insensitive
limit=api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB + 1
# + 1 so if the number of images listed > MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB
# we will know and not proceed
)
# Case 2: user supplied a list of images to process; can include metadata
else:
log.info('server_job, create_batch_job, using provided list of images.')
response = requests.get(images_requested_json_sas) # could be a file hosted anywhere
image_paths = response.json()
log.info('server_job, create_batch_job, length of image_paths provided by the user: {}'.format(
len(image_paths)))
if len(image_paths) == 0:
job_status = get_job_status(
'completed', '0 images found in provided list of images.')
job_status_table.update_job_status(job_id, job_status)
return
error, metadata_available = validate_provided_image_paths(image_paths)
if error is not None:
msg = 'image paths provided in the json are not valid: {}'.format(error)
raise ValueError(msg)
# filter down to those conforming to the provided prefix and accepted suffixes (image file types)
valid_image_paths = []
for p in image_paths:
locator = p[0] if metadata_available else p
# prefix is case-sensitive; suffix is not
if image_path_prefix is not None and not locator.startswith(image_path_prefix):
continue
# Although urlparse(p).path preserves the extension on local paths, it will not work for
# blob file names that contains "#", which will be treated as indication of a query.
# If the URL is generated via Azure Blob Storage, the "#" char will be properly encoded
path = urllib.parse.urlparse(locator).path if use_url else locator
if path.lower().endswith(api_config.IMAGE_SUFFIXES_ACCEPTED):
valid_image_paths.append(p)
image_paths = valid_image_paths
log.info(('server_job, create_batch_job, length of image_paths provided by user, '
f'after filtering to jpg: {len(image_paths)}'))
# apply the first_n and sample_n filters
if first_n:
assert first_n > 0, 'parameter first_n is 0.'
# OK if first_n > total number of images
image_paths = image_paths[:first_n]
if sample_n:
assert sample_n > 0, 'parameter sample_n is 0.'
if sample_n > len(image_paths):
msg = ('parameter sample_n specifies more images than '
'available (after filtering by other provided params).')
raise ValueError(msg)
# sample by shuffling image paths and take the first sample_n images
log.info('First path before shuffling:', image_paths[0])
shuffle(image_paths)
log.info('First path after shuffling:', image_paths[0])
image_paths = image_paths[:sample_n]
num_images = len(image_paths)
log.info(f'server_job, create_batch_job, num_images after applying all filters: {num_images}')
if num_images < 1:
job_status = get_job_status('completed', (
'Zero images found in container or in provided list of images '
'after filtering with the provided parameters.'))
job_status_table.update_job_status(job_id, job_status)
return
if num_images > api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB:
job_status = get_job_status(
'failed',
(f'The number of images ({num_images}) requested for processing exceeds the maximum '
f'accepted {api_config.MAX_NUMBER_IMAGES_ACCEPTED_PER_JOB} in one call'))
job_status_table.update_job_status(job_id, job_status)
return
# upload the image list to the container, which is also mounted on all nodes
# all sharding and scoring use the uploaded list
images_list_str_as_bytes = bytes(json.dumps(image_paths, ensure_ascii=False), encoding='utf-8')
container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
container=api_config.STORAGE_CONTAINER_API)
with ContainerClient.from_container_url(container_url,
credential=api_config.STORAGE_ACCOUNT_KEY) as api_container_client:
_ = api_container_client.upload_blob(
name=f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_images.json',
data=images_list_str_as_bytes)
job_status = get_job_status('created', f'{num_images} images listed; submitting the job...')
job_status_table.update_job_status(job_id, job_status)
except Exception as e:
job_status = get_job_status('failed', f'Error occurred while preparing the Batch job: {e}')
job_status_table.update_job_status(job_id, job_status)
log.error(f'server_job, create_batch_job, Error occurred while preparing the Batch job: {e}')
return # do not start monitoring
try:
batch_job_manager = BatchJobManager()
model_rel_path = api_config.MD_VERSIONS_TO_REL_PATH[model_version]
batch_job_manager.create_job(job_id,
model_rel_path,
input_container_sas,
use_url)
num_tasks, task_ids_failed_to_submit = batch_job_manager.submit_tasks(job_id, num_images)
# now request_status moves from created to running
job_status = get_job_status('running',
(f'Submitted {num_images} images to cluster in {num_tasks} shards. '
f'Number of shards failed to be submitted: {len(task_ids_failed_to_submit)}'))
# an extra field to allow the monitoring thread to restart after an API restart: total number of tasks
job_status['num_tasks'] = num_tasks
# also record the number of images to process for reporting
job_status['num_images'] = num_images
job_status_table.update_job_status(job_id, job_status)
except Exception as e:
job_status = get_job_status('problem', f'Please contact us. Error occurred while submitting the Batch job: {e}')
job_status_table.update_job_status(job_id, job_status)
log.error(f'server_job, create_batch_job, Error occurred while submitting the Batch job: {e}')
return
# start the monitor thread with the same name
try:
thread = threading.Thread(
target=monitor_batch_job,
name=f'job_{job_id}',
kwargs={
'job_id': job_id,
'num_tasks': num_tasks,
'model_version': model_version,
'job_name': job_name,
'job_submission_timestamp': job_submission_timestamp
}
)
thread.start()
except Exception as e:
job_status = get_job_status('problem', f'Error occurred while starting the monitoring thread: {e}')
job_status_table.update_job_status(job_id, job_status)
log.error(f'server_job, create_batch_job, Error occurred while starting the monitoring thread: {e}')
return
def monitor_batch_job(job_id: str,
num_tasks: int,
model_version: str,
job_name: str,
job_submission_timestamp: str):
job_status_table = JobStatusTable()
batch_job_manager = BatchJobManager()
try:
num_checks = 0
while True:
time.sleep(api_config.MONITOR_PERIOD_MINUTES * 60)
num_checks += 1
# both succeeded and failed tasks are marked "completed" on Batch
num_tasks_succeeded, num_tasks_failed = batch_job_manager.get_num_completed_tasks(job_id)
job_status = get_job_status('running',
(f'Check number {num_checks}, '
f'{num_tasks_succeeded} out of {num_tasks} shards have completed '
f'successfully, {num_tasks_failed} shards have failed.'))
job_status_table.update_job_status(job_id, job_status)
log.info(f'job_id {job_id}. '
f'Check number {num_checks}, {num_tasks_succeeded} out of {num_tasks} shards completed, '
f'{num_tasks_failed} shards failed.')
if (num_tasks_succeeded + num_tasks_failed) >= num_tasks:
break
if num_checks > api_config.MAX_MONITOR_CYCLES:
job_status = get_job_status('problem',
(
f'Job unfinished after {num_checks} x {api_config.MONITOR_PERIOD_MINUTES} minutes, '
f'please contact us to retrieve the results. Number of succeeded shards: {num_tasks_succeeded}')
)
job_status_table.update_job_status(job_id, job_status)
log.warning(f'server_job, create_batch_job, MAX_MONITOR_CYCLES reached, ending thread')
break # still aggregate the Tasks' outputs
except Exception as e:
job_status = get_job_status('problem', f'Error occurred while monitoring the Batch job: {e}')
job_status_table.update_job_status(job_id, job_status)
log.error(f'server_job, create_batch_job, Error occurred while monitoring the Batch job: {e}')
return
try:
output_sas_url = aggregate_results(job_id, model_version, job_name, job_submission_timestamp)
# preserving format from before, but SAS URL to 'failed_images' and 'images' are no longer provided
# failures should be contained in the output entries, indicated by an 'error' field
msg = {
'num_failed_shards': num_tasks_failed,
'output_file_urls': {
'detections': output_sas_url
}
}
job_status = get_job_status('completed', msg)
job_status_table.update_job_status(job_id, job_status)
except Exception as e:
job_status = get_job_status('problem',
f'Please contact us to retrieve the results. Error occurred while aggregating results: {e}')
job_status_table.update_job_status(job_id, job_status)
log.error(f'server_job, create_batch_job, Error occurred while aggregating results: {e}')
return
def aggregate_results(job_id: str,
model_version: str,
job_name: str,
job_submission_timestamp: str) -> str:
log.info(f'server_job, aggregate_results starting, job_id: {job_id}')
container_url = sas_blob_utils.build_azure_storage_uri(account=api_config.STORAGE_ACCOUNT_NAME,
container=api_config.STORAGE_CONTAINER_API)
# when people download this, the timestamp will have : replaced by _
output_file_path = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/{job_id}_detections_{job_name}_{job_submission_timestamp}.json'
with ContainerClient.from_container_url(container_url,
credential=api_config.STORAGE_ACCOUNT_KEY) as container_client:
# check if the result blob has already been written (could be another instance of the API / worker thread)
# and if so, skip aggregating and uploading the results, and just generate the SAS URL, which
# could be needed still if the previous request_status was `problem`.
blob_client = container_client.get_blob_client(output_file_path)
if blob_client.exists():
log.warning(f'The output file already exists, likely because another monitoring thread already wrote it.')
else:
task_outputs_dir = f'api_{api_config.API_INSTANCE_NAME}/job_{job_id}/task_outputs/'
generator = container_client.list_blobs(name_starts_with=task_outputs_dir)
blobs = [i for i in generator if i.name.endswith('.json')]
all_results = []
for blob_props in tqdm(blobs):
with container_client.get_blob_client(blob_props) as blob_client:
stream = io.BytesIO()
blob_client.download_blob().readinto(stream)
stream.seek(0)
task_results = json.load(stream)
all_results.extend(task_results)
api_output = {
'info': {
'detector': f'megadetector_v{model_version}',
'detection_completion_time': get_utc_time(),
'format_version': api_config.OUTPUT_FORMAT_VERSION
},
'detection_categories': api_config.DETECTOR_LABEL_MAP,
'images': all_results
}
# upload the output JSON to the Job folder
api_output_as_bytes = bytes(json.dumps(api_output, ensure_ascii=False, indent=1), encoding='utf-8')
_ = container_client.upload_blob(name=output_file_path, data=api_output_as_bytes)
output_sas = generate_blob_sas(
account_name=api_config.STORAGE_ACCOUNT_NAME,
container_name=api_config.STORAGE_CONTAINER_API,
blob_name=output_file_path,
account_key=api_config.STORAGE_ACCOUNT_KEY,
permission=BlobSasPermissions(read=True, write=False),
expiry=datetime.utcnow() + timedelta(days=api_config.OUTPUT_SAS_EXPIRATION_DAYS)
)
output_sas_url = sas_blob_utils.build_azure_storage_uri(
account=api_config.STORAGE_ACCOUNT_NAME,
container=api_config.STORAGE_CONTAINER_API,
blob=output_file_path,
sas_token=output_sas
)
log.info(f'server_job, aggregate_results done, job_id: {job_id}')
log.info(f'output_sas_url: {output_sas_url}')
return output_sas_url

Просмотреть файл

@ -1,92 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
Helper functions for the batch processing API.
"""
import logging
import os
from datetime import datetime
from typing import Tuple, Any, Sequence, Optional
import sas_blob_utils # from ai4eutils
log = logging.getLogger(os.environ['FLASK_APP'])
#%% helper classes and functions
def make_error(error_code: int, error_message: str) -> Tuple[dict, int]:
# TODO log exception when we have more telemetry
log.error(f'Error {error_code} - {error_message}')
return {'error': error_message}, error_code
def check_data_container_sas(input_container_sas: str) -> Optional[Tuple[int, str]]:
"""
Returns a tuple (error_code, msg) if not a usable SAS URL, else returns None
"""
# TODO check that the expiry date of input_container_sas is at least a month
# into the future
permissions = sas_blob_utils.get_permissions_from_uri(input_container_sas)
data = sas_blob_utils.get_all_query_parts(input_container_sas)
msg = ('input_container_sas provided does not have both read and list '
'permissions.')
if 'read' not in permissions or 'list' not in permissions:
if 'si' in data:
# if no permission specified explicitly but has an access policy, assumes okay
# TODO - check based on access policy as well
return None
return 400, msg
return None
def get_utc_time() -> str:
# return current UTC time as a string in the ISO 8601 format (so we can query by
# timestamp in the Cosmos DB job status table.
# example: '2021-02-08T20:02:05.699689Z'
return datetime.utcnow().isoformat(timespec='microseconds') + 'Z'
def get_job_status(request_status: str, message: Any) -> dict:
return {
'request_status': request_status,
'message': message
}
def validate_provided_image_paths(image_paths: Sequence[Any]) -> Tuple[Optional[str], bool]:
"""Given a list of image_paths (list length at least 1), validate them and
determine if metadata is available.
Args:
image_paths: a list of string (image_id) or a list of 2-item lists
([image_id, image_metadata])
Returns:
error: None if checks passed, otherwise a string error message
metadata_available: bool, True if available
"""
# image_paths will have length at least 1, otherwise would have ended before this step
first_item = image_paths[0]
metadata_available = False
if isinstance(first_item, str):
for i in image_paths:
if not isinstance(i, str):
error = 'Not all items in image_paths are of type string.'
return error, metadata_available
return None, metadata_available
elif isinstance(first_item, list):
metadata_available = True
for i in image_paths:
if len(i) != 2: # i should be [image_id, metadata_string]
error = ('Items in image_paths are lists, but not all lists '
'are of length 2 [image locator, metadata].')
return error, metadata_available
return None, metadata_available
else:
error = 'image_paths contain items that are not strings nor lists.'
return error, metadata_available

Просмотреть файл

@ -1,63 +0,0 @@
#!/bin/sh
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Get the credentials from KeyVault
# run `source start_batch_api.sh` to persist the credentials as env variables in the
# current shell for easy debugging by launching the Flask app separately.
# need "-o tsv" for the Azure CLI queries to get rid of quote marks
SUBSCRIPTION=74d91980-e5b4-4fd9-adb6-263b8f90ec5b
KEY_VAULT_NAME=cameratraps
# A URL and a code to use for logging in on the browser will be displayed
echo Log in to your Azure account via the CLI. You should be prompted to authenticate shortly...
az login
# service principal to authenticate with Azure Batch
APP_TENANT_ID=$(az keyvault secret show --name batch-api-tenant-id --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
echo APP_TENANT_ID read from KeyVault
export APP_TENANT_ID
APP_CLIENT_ID=$(az keyvault secret show --name batch-api-client-id --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
echo APP_CLIENT_ID read from KeyVault
export APP_CLIENT_ID
APP_CLIENT_SECRET=$(az keyvault secret show --name batch-api-client-secret --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
echo APP_CLIENT_SECRET read from KeyVault
export APP_CLIENT_SECRET
# blob storage account with containers for scripts and job outputs
export STORAGE_ACCOUNT_NAME=cameratrap
STORAGE_ACCOUNT_KEY=$(az keyvault secret show --name cameratrap-storage-account-key --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
echo STORAGE_ACCOUNT_KEY read from KeyVault
export STORAGE_ACCOUNT_KEY
# Azure Container Registry - Azure Batch gets the Docker image from here
export REGISTRY_SERVER=***REMOVED***.azurecr.io
REGISTRY_PASSWORD=$(az keyvault secret show --name registry-password --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
echo REGISTRY_PASSWORD read from KeyVault
export REGISTRY_PASSWORD
# App Configuration
APP_CONFIG_CONNECTION_STR=$(az keyvault secret show --name camera-trap-app-config-connection-str --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
echo APP_CONFIG_CONNECTION_STR read from KeyVault
export APP_CONFIG_CONNECTION_STR
# Cosmos DB for job status tracking
COSMOS_ENDPOINT=$(az keyvault secret show --name cosmos-db-endpoint --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
export COSMOS_ENDPOINT
COSMOS_WRITE_KEY=$(az keyvault secret show --name cosmos-db-read-write-key --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
export COSMOS_WRITE_KEY
echo COSMOS_ENDPOINT and COSMOS_WRITE_KEY read from KeyVault

Просмотреть файл

@ -1,47 +0,0 @@
#
# If a request has been sent to AML for batch scoring but the monitoring thread of the API was
# interrupted (uncaught exception or having to re-start the API container), we could manually
# aggregate results from each shard using this script, assuming all jobs submitted to AML have finished.
#
# Need to have set environment variables STORAGE_ACCOUNT_NAME and STORAGE_ACCOUNT_KEY to those of the
# storage account backing the API. Also need to adjust the INTERNAL_CONTAINER, AML_CONTAINER and
# AML_CONFIG fields in api_core/orchestrator_api/api_config.py to match the instance of the API that this
# request was submitted to.
#
# May need to change the import statement in api_core/orchestrator_api/orchestrator.py
# "from sas_blob_utils import SasBlob" to
# "from .sas_blob_utils import SasBlob" to not confuse with the module in AI4Eutils;
# and change "import api_config" to
# "from api.batch_processing.api_core.orchestrator_api import api_config"
# Execute this script from the root of the repository. You may need to add the repository to PYTHONPATH.
import argparse
import json
from api.batch_processing.api_core.orchestrator_api.orchestrator import AMLMonitor
def main():
parser = argparse.ArgumentParser()
parser.add_argument('shortened_request_id', type=str,
help='the request ID to restart monitoring')
parser.add_argument('model_version', type=str, help='version of megadetector used; this is used to fill in the meta info section of the output file')
parser.add_argument('request_name', type=str, help='easy to remember name for that job, optional', default='')
args = parser.parse_args()
# list_jobs_submitted cannot be serialized ("can't pickle _thread.RLock objects "), but
# do not need it for aggregating results
aml_monitor = AMLMonitor(request_id=args.request_id,
list_jobs_submitted=None,
request_name=args.request_name,
request_submission_timestamp='',
model_version=args.model_version)
output_file_urls = aml_monitor.aggregate_results()
output_file_urls_str = json.dumps(output_file_urls)
print(output_file_urls_str)
if __name__ == '__main__':
main()

Просмотреть файл

@ -1,383 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = 'all' # default is last_expr\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'10.0.0'"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import azure.batch\n",
"azure.batch.__version__"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"from azure.batch import BatchServiceClient\n",
"from azure.batch.models import *\n",
"from azure.common.credentials import ServicePrincipalCredentials"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Set up an instance of the batch processing API\n",
"\n",
"We create one Azure Batch Pool for each instance of the batch processing API.\n",
"\n",
"The limit for the number of Pools in our Batch account is 100."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 1: Create an Azure Batch Pool"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# MODIFY THIS CELL\n",
"\n",
"# POOL_ID should start with the name of the API instance this pool will be used for\n",
"\n",
"POOL_ID = 'internal_1'\n",
"assert len(POOL_ID) <= 64, 'pool_id has more than 64 characters'\n",
"\n",
"# choose the account in East US or South Central US\n",
"BATCH_ACCOUNT_URL = 'https://cameratrapssc.southcentralus.batch.azure.com' "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# secrets read from environment variables\n",
"REGISTRY_PASSWORD = os.environ['REGISTRY_PASSWORD']\n",
"STORAGE_ACCOUNT_KEY = os.environ['STORAGE_ACCOUNT_KEY']\n",
"\n",
"# authenticate with Batch account using the service principle \"camera-trap-async-api\" in our AAD\n",
"APP_CLIENT_ID = os.environ['APP_CLIENT_ID']\n",
"APP_CLIENT_SECRET = os.environ['APP_CLIENT_SECRET']\n",
"APP_TENANT_ID = os.environ['APP_TENANT_ID']\n",
"\n",
"\n",
"# other configuration info\n",
"\n",
"# Docker image\n",
"REGISTRY_SERVER = '***REMOVED***.azurecr.io'\n",
"REGISTRY_USERNAME = REGISTRY_SERVER.split('.')[0]\n",
"\n",
"CONTAINER_IMAGE_NAME = '***REMOVED***.azurecr.io/tensorflow:1.14.0-gpu-py3' # login server/repository:tag\n",
"\n",
"# storage\n",
"STORAGE_ACCOUNT_NAME = 'cameratrap' # in the engineering subscription\n",
"\n",
"# names of two containers supporting the API instances in the above storage account\n",
"STORAGE_CONTAINER_MODELS = 'models'\n",
"STORAGE_CONTAINER_API = 'batch-api'\n",
"\n",
"# Azure Batch node pool VM type\n",
"POOL_VM_SIZE = 'Standard_NC6s_v3' # https://docs.microsoft.com/en-us/azure/virtual-machines/ncv3-series\n",
"\n",
"# auto-scale formula - can be set manually in Azure portal\n",
"# last statement makes sure that nodes aren't removed until their tasks are finished\n",
"# docs: https://docs.microsoft.com/en-us/azure/batch/batch-automatic-scaling\n",
"\n",
"# MODIFY the \"cappedPoolSize\" if it should be other than 16 dedicated nodes\n",
"POOL_AUTO_SCALE_FORMULA = \"\"\"\n",
"// In this formula, the pool size is adjusted based on the number of tasks in the queue. \n",
"// Note that both comments and line breaks are acceptable in formula strings.\n",
"\n",
"// Get pending tasks for the past 15 minutes.\n",
"$samples = $ActiveTasks.GetSamplePercent(TimeInterval_Minute * 15);\n",
"\n",
"// If we have fewer than 70 percent data points, we use the last sample point, otherwise we use the maximum of last sample point and the history average.\n",
"$tasks = $samples < 70 ? max(0, $ActiveTasks.GetSample(1)) : \n",
"max( $ActiveTasks.GetSample(1), avg($ActiveTasks.GetSample(TimeInterval_Minute * 15)));\n",
"\n",
"// If number of pending tasks is not 0, set targetVM to pending tasks, otherwise set to 0, since there is usually long intervals between job submissions.\n",
"$targetVMs = $tasks > 0 ? $tasks : 0;\n",
"\n",
"// The pool size is capped at 16, if target VM value is more than that, set it to 16.\n",
"cappedPoolSize = 16;\n",
"$TargetDedicatedNodes = max(0, min($targetVMs, cappedPoolSize));\n",
"\n",
"// Set node deallocation mode - keep nodes active only until tasks finish\n",
"$NodeDeallocationOption = taskcompletion;\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def print_batch_exception(batch_exception):\n",
" \"\"\"\n",
" Prints the contents of the specified Batch exception.\n",
" \"\"\"\n",
" print('-------------------------------------------')\n",
" print('Exception encountered:')\n",
" if batch_exception.error and \\\n",
" batch_exception.error.message and \\\n",
" batch_exception.error.message.value:\n",
" print(batch_exception.error.message.value)\n",
" if batch_exception.error.values:\n",
" print()\n",
" for msg in batch_exception.error.values:\n",
" print(f'{msg.key}:\\t{msg.value}')\n",
" print('-------------------------------------------')\n",
"\n",
"def create_pool(batch_service_client, pool_id):\n",
" \"\"\"\n",
" Create a pool with pool_id and the Docker image specified by constants in above cells\n",
" \"\"\"\n",
" # we have to use VM images supporting GPU access *and* Docker\n",
" # this VM image will run our custom container\n",
" image_ref = ImageReference(\n",
" publisher='microsoft-azure-batch',\n",
" offer='ubuntu-server-container',\n",
" sku='20-04-lts',\n",
" version='latest' # URN: microsoft-azure-batch:ubuntu-server-container:16-04-lts:1.1.0\n",
" # The Azure Batch container image only accepts 'latest' version\n",
" )\n",
"\n",
" # specify a container registry from which to pull the custom container\n",
" # see the `batch_service` folder on instructions for building the container image\n",
" container_registry = ContainerRegistry(\n",
" registry_server=REGISTRY_SERVER,\n",
" user_name=REGISTRY_USERNAME,\n",
" password=REGISTRY_PASSWORD\n",
" )\n",
"\n",
" container_conf = ContainerConfiguration(\n",
" container_image_names = [CONTAINER_IMAGE_NAME],\n",
" container_registries =[container_registry]\n",
" )\n",
"\n",
" vm_config = VirtualMachineConfiguration(\n",
" image_reference=image_ref,\n",
" container_configuration=container_conf,\n",
" node_agent_sku_id='batch.node.ubuntu 20.04'\n",
" )\n",
"\n",
" # mount the `models` and the `batch-api` blob containers\n",
" container_models = MountConfiguration(\n",
" azure_blob_file_system_configuration=AzureBlobFileSystemConfiguration(\n",
" account_name=STORAGE_ACCOUNT_NAME,\n",
" container_name=STORAGE_CONTAINER_MODELS,\n",
" relative_mount_path=STORAGE_CONTAINER_MODELS, # use container name as relative path\n",
" account_key=STORAGE_ACCOUNT_KEY,\n",
" blobfuse_options='-o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 -o allow_other'\n",
" )\n",
" )\n",
" container_batch_api = MountConfiguration(\n",
" azure_blob_file_system_configuration=AzureBlobFileSystemConfiguration(\n",
" account_name=STORAGE_ACCOUNT_NAME,\n",
" container_name=STORAGE_CONTAINER_API,\n",
" relative_mount_path=STORAGE_CONTAINER_API, # use container name as relative path\n",
" account_key=STORAGE_ACCOUNT_KEY,\n",
" # allow_other needs to be flagged - task running inside container needs to access this blob container\n",
" blobfuse_options='-o attr_timeout=240 -o entry_timeout=240 -o negative_timeout=120 -o allow_other'\n",
" )\n",
" )\n",
"\n",
" new_pool = PoolAddParameter(\n",
" id=POOL_ID,\n",
" display_name=POOL_ID,\n",
"\n",
" vm_size=POOL_VM_SIZE,\n",
" \n",
" enable_auto_scale=True,\n",
" auto_scale_formula=POOL_AUTO_SCALE_FORMULA,\n",
"\n",
" virtual_machine_configuration=vm_config,\n",
"\n",
" # default is 1; each task occupies the entire GPU so we can only run one task at a time on a node\n",
" task_slots_per_node=1,\n",
"\n",
" mount_configuration=[container_models, container_batch_api],\n",
" )\n",
" batch_service_client.pool.add(new_pool)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"credentials = ServicePrincipalCredentials(\n",
" client_id=APP_CLIENT_ID,\n",
" secret=APP_CLIENT_SECRET,\n",
" tenant=APP_TENANT_ID,\n",
" resource='https://batch.core.windows.net/'\n",
")\n",
"\n",
"# if using the Batch quota system, use https://docs.microsoft.com/en-us/python/api/azure-batch/azure.batch.batch_auth.sharedkeycredentials?view=azure-python\n",
"# to authenticate instead of the service principal is also okay.\n",
"\n",
"batch_client = BatchServiceClient(credentials=credentials, batch_url=BATCH_ACCOUNT_URL)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 29 ms, sys: 3.54 ms, total: 32.5 ms\n",
"Wall time: 1.01 s\n"
]
}
],
"source": [
"%%time\n",
"# pool creation should finish in about a minute\n",
"\n",
"try:\n",
" create_pool(batch_client, POOL_ID)\n",
"except BatchErrorException as e:\n",
" print_batch_exception(e)\n",
" raise"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Step 2: Upload the scoring script\n",
"\n",
"Note that all instances share this scoring script!"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# MODIFY THIS CELL\n",
"\n",
"# path to the scoring script; modify if cwd is not `api_core`\n",
"path_scoring_script = 'batch_service/score.py'\n",
"\n",
"# SAS with write permission for uploading output JSONs\n",
"sas_query_str = '' # get a write-enabled SAS for the container below\n",
"\n",
"output_container_url = f'https://cameratrap.blob.core.windows.net/batch-api{sas_query_str}'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# upload the scoring script to the container above; Batch Tasks will retrieve the script from there\n",
"\n",
"output_container_client = ContainerClient.from_container_url(output_container_url)\n",
"\n",
"with open(path_scoring_script, 'rb') as f:\n",
" script_blob_client = output_container_client.upload_blob(name='scripts/score.py', data=f, overwrite=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Useful CLI commands for using Docker images with Batch\n",
"\n",
"List all Batch supported images with their \"capabilities\" (e.g. \"DockerCompatible\", \"NvidiaTeslaDriverInstalled\"):\n",
"```\n",
"az batch pool supported-images list\n",
"```\n",
"with the pool information provided in additional parameters.\n",
"\n",
"Listing all versions of a SKU of image:\n",
"```\n",
"az vm image list --all --publisher microsoft-dsvm\n",
"```\n",
"\n",
"You may need to accept the terms of an image:\n",
"```\n",
"az vm image list --all --publisher <publisher>\n",
"```\n",
"to find the URN for the image you want to use, followed by:\n",
"\n",
"```\n",
"az vm image terms accept --urn <corresponding-urn>\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:cameratraps-batch-api]",
"language": "python",
"name": "conda-env-cameratraps-batch-api-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

Просмотреть файл

@ -1,31 +0,0 @@
#!/bin/sh
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# Get the credentials from KeyVault and run summarize_daily_activity.py
SUBSCRIPTION=74d91980-e5b4-4fd9-adb6-263b8f90ec5b
KEY_VAULT_NAME=cameratraps
# A URL and a code to use for logging in on the browser will be displayed
echo Log in to your Azure account via the CLI. You should be prompted to authenticate shortly...
az login
# Cosmos DB for job status checking
COSMOS_ENDPOINT=$(az keyvault secret show --name cosmos-db-endpoint --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
export COSMOS_ENDPOINT
COSMOS_READ_KEY=$(az keyvault secret show --name cosmos-db-read-only-key --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
export COSMOS_READ_KEY
echo COSMOS_ENDPOINT and COSMOS_READ_KEY read from KeyVault
# Teams webhook
TEAMS_WEBHOOK=$(az keyvault secret show --name teams-webhook-cicd --subscription $SUBSCRIPTION --vault-name $KEY_VAULT_NAME --query value -o tsv)
export TEAMS_WEBHOOK
echo TEAMS_WEBHOOK read from KeyVault
python summarize_daily_activity.py

Просмотреть файл

@ -1,153 +0,0 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
This script can be run in a separate process to monitor all instances of the batch API.
It sends a digest of submissions within the past day to a Teams channel webhook.
It requires the environment variables TEAMS_WEBHOOK, COSMOS_ENDPOINT and COSMOS_READ_KEY to be set.
"""
import time
import os
import json
from datetime import datetime, timezone, timedelta
from collections import defaultdict
import requests
from azure.cosmos.cosmos_client import CosmosClient
# Cosmos DB `batch-api-jobs` table for job status
COSMOS_ENDPOINT = os.environ['COSMOS_ENDPOINT']
COSMOS_READ_KEY = os.environ['COSMOS_READ_KEY']
TEAMS_WEBHOOK = os.environ['TEAMS_WEBHOOK']
def send_message():
cosmos_client = CosmosClient(COSMOS_ENDPOINT, credential=COSMOS_READ_KEY)
db_client = cosmos_client.get_database_client('camera-trap')
db_jobs_client = db_client.get_container_client('batch_api_jobs')
yesterday = datetime.now(timezone.utc).date() - timedelta(days=1)
query = f'''
SELECT *
FROM job
WHERE job.job_submission_time >= "{yesterday.isoformat()}T00:00:00Z"
'''
result_iterable = db_jobs_client.query_items(query=query,
enable_cross_partition_query=True)
# aggregate the number of images, country and organization names info from each job
# submitted during yesterday (UTC time)
instance_num_images = defaultdict(lambda: defaultdict(int))
instance_countries = defaultdict(set)
instance_orgs = defaultdict(set)
total_images_received = 0
for job in result_iterable:
api_instance = job['api_instance']
status = job['status']
call_params = job['call_params']
if status['request_status'] == 'completed':
instance_num_images[api_instance]['num_images_completed'] += status.get('num_images', 0)
instance_num_images[api_instance]['num_images_total'] += status.get('num_images', 0)
total_images_received += status.get('num_images', 0)
instance_countries[api_instance].add(call_params.get('country', 'unknown'))
instance_orgs[api_instance].add(call_params.get('organization_name', 'unknown'))
print(f'send_message, number of images received yesterday: {total_images_received}')
if total_images_received < 1:
print('send_message, no images submitted yesterday, not sending a summary')
print('')
return
# create the card
sections = []
for instance_name, num_images in instance_num_images.items():
entry = {
'activityTitle': f'API instance: {instance_name}',
'facts': [
{
'name': 'Total images',
'value': '{:,}'.format(num_images['num_images_total'])
},
{
'name': 'Images completed',
'value': '{:,}'.format(num_images['num_images_completed'])
},
{
'name': 'Countries',
'value': ', '.join(sorted(list(instance_countries[instance_name])))
},
{
'name': 'Organizations',
'value': ', '.join(sorted(list(instance_orgs[instance_name])))
}
]
}
sections.append(entry)
card = {
'@type': 'MessageCard',
'@context': 'http://schema.org/extensions',
'themeColor': 'ffcdb2',
'summary': 'Digest of batch API activities over the past 24 hours',
'title': f'Camera traps batch API activities on {yesterday.strftime("%b %d, %Y")}',
'sections': sections,
'potentialAction': [
{
'@type': 'OpenUri',
'name': 'View Batch account in Azure Portal',
'targets': [
{
'os': 'default',
'uri': 'https://ms.portal.azure.com/#@microsoft.onmicrosoft.com/resource/subscriptions/74d91980-e5b4-4fd9-adb6-263b8f90ec5b/resourcegroups/camera_trap_api_rg/providers/Microsoft.Batch/batchAccounts/cameratrapssc/accountOverview'
}
]
}
]
}
response = requests.post(TEAMS_WEBHOOK, data=json.dumps(card))
print(f'send_message, card to send:')
print(json.dumps(card, indent=4))
print(f'send_message, sent summary to webhook, response code: {response.status_code}')
print('')
def main():
"""
Wake up at 5 minutes past midnight UTC to send a summary of yesterday's activities if there were any.
Then goes in a loop to wake up and send a summary every 24 hours.
"""
current = datetime.utcnow()
future = current.replace(day=current.day, hour=0, minute=5, second=0, microsecond=0) + timedelta(
days=1) # current has been modified
current = datetime.utcnow()
duration = future - current
duration_hours = duration.seconds / (60 * 60)
print(f'Current time: {current}')
print(f'Will wake up at {future}, in {duration_hours} hours')
print('')
time.sleep(duration.seconds)
while True:
print(f'Woke up at {datetime.utcnow()}')
send_message()
time.sleep(24 * 60 * 60)
if __name__ == '__main__':
main()

Просмотреть файл

@ -1,159 +0,0 @@
# Managing camera trap API tasks
## Overview
This document describes the process for running partner data through our <a href="https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing/">MegaDetector Batch Processing API</a>. It assumes that the target data is in a single blob container to which we have a read-write SAS token.
The requirement for write permissions is only used to write intermediate files and API output, so it would be a very small set of code changes to handle the case where we have read-only access to the source container, but for now, we're assuming r/w access.
The major steps covered here are:
* Enumerating the files that need processing and generating API input
* Calling the API, including any necessary resubmissions due to failed shards
* Postprocessing to generate preview files
* Repeat detection elimination
Repeat detection elimination is a manual step that we do in ~30% of cases, and we typically tune the process so this step takes around 20 minutes of hands-on time. Without this, this whole process should take around five minutes of hands-on time, plus the time required to run the task (which can be anywhere from minutes to days). I know this looks like a lot of steps, but once you get the hang of it, it's really fast. If it's your third time doing this and you find that it's taking more than five minutes of human intervention time &ndash; including generating SAS tokens and uploading results for preview &ndash; email cameratraps@lila.science to let us know!
This document is written 98% for internal use, so you will see some instructions that only make sense internally (like "ask Dan to create a password for blah blah"). But if you find it useful externally, let us know!
## Magic strings you need before following this guide
* Storage account and container name for the data container
* API endpoint URL and required "caller" token... for this document, we'll use "blah.endpoint.com" and "caller", respectively.
* Read-only and read-write SAS tokens for the data container... for this document, we'll use "?st=sas_token"
* Credentials for the VM where we host previews and output data... for this document, we'll use "datavm.com".
* A password for the specific folder you will post the results to on that VM
* Possibly a set of specific folders to process as separate tasks within the target container
## Setting up your environment (one time only)
* Unless otherwise stated, you will want to work on a VM in South Central US. You will not be moving substantial volumes of images, so it's OK to work outside of Azure, but a few steps will be slightly faster with low-latency access. These instructions will also assume you have a graphical/interactive IDE (Spyder, PyCharm, or VS Code) and that you can run a browser on the same machine where you're running Python.
* Probably install <a href="https://www.postman.com/">Postman</a> for task submission
* If you're working on Windows, probably install <a href="https://www.irfanview.com/">IrfanView</a> for repeat detection elimination (the semi-automated step that will require you to look at lots of images).
* If you're working on Windows, probably install <a href="https://www.bitvise.com/">Bitvise</a> for SCP'ing the results to our Web server VM
* Clone the following repos, and be on master/latest on both:
* <a href="https://github.com/ecologize/CameraTraps">github.com/ecologize/CameraTraps</a>
* <a href="https://github.com/microsoft/ai4eutils">github.com/microsoft/ai4eutils</a>
* Put the roots of both of the above repos on your PYTHONPATH; see <a href="https://github.com/ecologize/CameraTraps/#other-notes">instructions on the CameraTraps repo</a> re: setting your PYTHONPATH.
* If you're into using conda environments, cd to the root of the CameraTraps repo and run:
`conda env create --file environment-api-task-management.yml`
## Stuff you do for each task
### Forking the template script
* Make a copy of <a href="https://github.com/ecologize/CameraTraps/blob/master/api/batch_processing/data_preparation/manage_api_submission.py">manage_api_submission.py</a>, <i>outside</i> of the CameraTraps repo. You may or may not end up with credentials in this file, so your working copy should <i>not be on GitHub</i>. Name this file as `organization-YYYYMMDD.py`.
* Fill in all the constants in the "constants I set per task" cell. Specifically:
* storage_account_name
* container_name
* task_set_name, formatted as `organization-YYYYMMDD` (same as the file name)
* base_output_folder_name (this is a local folder... I recommend maintaining a local folder like c:\camera_trap_tasks and putting all task data in subfolders named according to the organization, e.g. c:\camera_trap_tasks\university_of_arendelle, but this isn't critical)
* read_only_sas_token
* read_write_sas_token
* caller
* endpoint_base
If applicable (but usually not applicable):
* container_prefix (restricts image enumeration to specific prefixes in the source container)
* folder_names (splits the overall task up into multiple sub-tasks, typically corresponding to folders that are meaningful to the organization, e.g. "Summer_2018")
* additional_task_args (typically used to specify a model version)
### Preparing the task(s)
I use this file like a notebook, typically running all cells interactively. The cell notation in this file is friendly to Spyder, VS Code, and PyCharm (professional). To prepare the task, run all the cells through "generate API calls for each task".
At this point, the json-formatted API string for all tasks (typically just one, unless you used the "folder_names" feature to create multiple tasks), and you're ready to submit.
### Submitting the task(s)
The next cell is called "run the tasks", and though it doesn't actually work, I don't recommend programmatic submission anyway. You are about to spin up sixteen expensive and power-hungry GPUs, and IMO it's better to do this manually so you can triple-quadruple check that you really want to start a task. I do this through Postman; see <a href="https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing#other-notes-and-example">here</a> for an example. If you are running multiple tasks, you should run them separately in Postman.
You will get back a task ID for each task, enter these in the "manually define task groups" cell in the format indicated in the template code. A "task group" is a logical task; the reason we use a <i>list</i> of task IDs for each task group is that (1) we split tasks over 1M images into multiple tasks, and (2) sometimes shards fail and we resubmit some images later as part of the same task, so we will extend those lists as necessary.
I then typically run the "estimate total time" cell. For very small tasks, this isn't meaningful, since it doesn't include spin-up time. This tells me when I should check back again. I then typically run the "status check" cell to confirm the task is in progress.
### Time passes...
Do other work, watch Netflix (Last Kingdom Season 4 just came out!), go to bed, wake up...
When you're back, run the "status check" cell again, and if it doesn't show "completed", wait longer. If it's been suspiciously long, check in with us.
### Check for failures
Run the "look for failed shards" cell. Most of the time it will say "no resubmissions necessary". If it shows some required resubmissions, look carefully at the "missing images" printout. If it's actually just a small number (but still slightly larger than the `max_tolerable_missing_images` constant, otherwise you wouldn't get this printout), consider just raising the `max_tolerable_missing_images` constant. This is subjective and project-specific.
If you do have to resubmit tasks, the API calls will be in your console. Run them, and see the "Resubmit tasks for failed shards" cell, where you need to add the task IDs for the resubmissions to the appropriate task groups.
Theoretically you could have to do all this again if your resubmissions fail, thinking through this is outside the scope of this README. I've never had this happen.
### Post-processing
Run the next two cells, which should uneventfully pull results and combine results from resubmitted tasks into single .json files.
Now the excitement starts again with the "post-processing" cell: running this will take a minute or two, and browser tabs should open with previews for each task. I typically decide two things here, both subjective:
1. Do we need to adjust the confidence threshold from the 80% default?
2. Do we need to do the repeat detection elimination step?
The latter isn't just about the results; it's about the priority of the task, the time available, the degree to which the collaborator can do this on their own, etc. Guidance for these two decisions is beyond the scope of this document.
### Repeat detection elimination (not typically necessary)
Before reading this, I recommend skimming the <a href="https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing/postprocessing/repeat_detection_elimination">public documentation on the repeat detection elimination (RDE) process</a>, to get a sense of what it's about.
OK, you're back... I run RDE in the following steps:
1. Run the "repeat detection elimination, phase 1" cell.
2. Before actually starting the manual step, get a rough sense of how many images you have in the target folder. If it's more than you have time to deal with (I typically aim for no more than ~2k), adjust parameters and re-run this cell. Also if you see quickly that there are lots of actual true positives with boxes in the output folder (i.e., lots of animals that were just sitting really still), you'll also want Parameter adjustment is also beyond the scope of this document, we'll update in the future with examples for when you might adjust each parameter.
3. Now you're ready to do the manual step, i.e. deleting all the images in the RDE folder with boxes that contain animals. Reminder: it's fine if the <i>image</i> contains an animal, we're deleting images where you see <i>boxes</i> that contain animals. <i>Not</i> deleting an image is equivalent to marking it as a false positive in this process, so if you're unsure, it's always safer to delete the image from the RDE folder, which will leave the image in the final output set. There's rarely harm in deleting a few too many from the RDE folder.
4. For this step, I strongly recommend <a href="https://www.irfanview.com/">IrfanView</a>. I keep one hand on the "page-down" key and one hand on the "delete" key, and I can blast through several images a second this way.
5. OK, you're back, and you just looked at a lot of images with boxes on trees and other annoying stuff. Now run the "post-processing (post-RDE)" cell to generate a new HTML preview. You should see that the number of detections is lower than in the preview you generated earlier, since you just got rid of a bunch of detections.
### Uploading previews to our Web server
For now, ask Dan to create a login and associated folder on our Web server. If the organization associated with this task is called "university_of_arendelle", Dan will create a folder at `/datadrive/html/data/university_of_arendelle`. You should copy (with SCP) (I use <a href="https://www.bitvise.com/">Bitvise</a>) the postprocessing folder(s) there, e.g. if your output base was:
`g:\university_of_arendelle`
...and your task set name was:
`university_of_arendelle-20200409`
You will copy a folder that looks like:
`g:\university_of_arendelle\university_of_arendelle-20200409\postprocessing\university_of_arendelle-20200409_0.800`
This will be externally visible (though password-protected) at:
`http://datavm.com/data/university_of_arendelle/university_of_arendelle-20200409_0.800`
### Uploading results to our file share
The .json results files - including the results before and after repeat detection elimination, if applicable - are generally uploaded to our AI for Earth file share when anything somewhat stable is uploaded to the Web server. This is just a placeholder to add instructions later. Note to self: we generally zip .json files if they're larger than ~50MB.

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -1,167 +0,0 @@
#
# manage_video_batch.py
#
# Notebook-esque script to manage the process of running a local batch of videos
# through MD. Defers most of the heavy lifting to manage_local_batch.py .
#
#%% Imports and constants
import path_utils
import os
from detection import video_utils
input_folder = '/datadrive/data'
output_folder_base = '/datadrive/frames'
assert os.path.isdir(input_folder)
os.makedirs(output_folder_base,exist_ok=True)
#%% Split videos into frames
assert os.path.isdir(input_folder)
os.makedirs(output_folder_base,exist_ok=True)
recursive = True
overwrite = True
n_threads = 5
every_n_frames = 10
frame_filenames_by_video,fs_by_video,video_filenames = \
video_utils.video_folder_to_frames(input_folder=input_folder,
output_folder_base=output_folder_base,
recursive=recursive,
overwrite=overwrite,
n_threads=n_threads,
every_n_frames=every_n_frames)
#%% List frame files, break into folders
from collections import defaultdict
frame_files = path_utils.find_images(output_folder_base,True)
frame_files = [s.replace('\\','/') for s in frame_files]
print('Enumerated {} total frames'.format(len(frame_files)))
# Find unique (relative) folders
folder_to_frame_files = defaultdict(list)
# fn = frame_files[0]
for fn in frame_files:
folder_name = os.path.dirname(fn)
folder_name = os.path.relpath(folder_name,output_folder_base)
folder_to_frame_files[folder_name].append(fn)
print('Found {} folders for {} files'.format(len(folder_to_frame_files),len(frame_files)))
#%% List videos
video_filenames = video_utils.find_videos(input_folder,recursive=True)
video_filenames = [os.path.relpath(fn,input_folder) for fn in video_filenames]
print('Input folder contains {} videos'.format(len(video_filenames)))
#%% Check for videos that are missing entirely
# list(folder_to_frame_files.keys())[0]
# video_filenames[0]
missing_videos = []
# fn = video_filenames[0]
for relative_fn in video_filenames:
if relative_fn not in folder_to_frame_files:
missing_videos.append(relative_fn)
print('{} of {} folders are missing frames entirely'.format(len(missing_videos),
len(video_filenames)))
#%% Check for videos with very few frames
min_frames_for_valid_video = 10
low_frame_videos = []
for folder_name in folder_to_frame_files.keys():
frame_files = folder_to_frame_files[folder_name]
if len(frame_files) < min_frames_for_valid_video:
low_frame_videos.append(folder_name)
print('{} of {} folders have fewer than {} frames'.format(
len(low_frame_videos),len(video_filenames),min_frames_for_valid_video))
#%% Print the list of videos that are problematic
print('Videos that could not be decoded:\n')
for fn in missing_videos:
print(fn)
print('\nVideos with fewer than {} decoded frames:\n'.format(min_frames_for_valid_video))
for fn in low_frame_videos:
print(fn)
#%% Process images like we would for any other camera trap job
# ...typically using manage_local_batch.py, but do this however you like, as long
# as you get a results file at the end.
#
# If you do RDE, remember to use the second folder from the bottom, rather than the
# bottom-most folder.
#%% Convert frame results to video results
from detection.video_utils import frame_results_to_video_results
filtered_output_filename = '/results/organization/stuff.json'
video_output_filename = filtered_output_filename.replace('.json','_aggregated.json')
frame_results_to_video_results(filtered_output_filename,video_output_filename)
#%% Confirm that the videos in the .json file are what we expect them to be
import json
with open(video_output_filename,'r') as f:
video_results = json.load(f)
video_filenames_set = set(video_filenames)
filenames_in_video_results_set = set([im['file'] for im in video_results['images']])
for fn in filenames_in_video_results_set:
assert fn in video_filenames_set
#%% Scrap
if False:
pass
#%% Test a possibly-broken video
fn = '/datadrive/tmp/video.AVI'
fs = video_utils.get_video_fs(fn)
print(fs)
tmpfolder = '/home/user/tmp/frametmp'
os.makedirs(tmpfolder,exist_ok=True)
video_utils.video_to_frames(fn, tmpfolder, verbose=True, every_n_frames=10)
#%% List videos in a folder
input_folder = '/datadrive/tmp/organization/data'
video_filenames = video_utils.find_videos(input_folder,recursive=True)

Просмотреть файл

@ -1,565 +0,0 @@
"""
prepare_api_submission.py
This module defines the Task class and helper methods that are useful for
submitting tasks to the AI for Earth Camera Trap Batch Detection API.
Here's the stuff we usually do before submitting a task:
1) Upload images to Azure Blob Storage... we do this with azcopy, not addressed
in this script.
2) List the files you want the API to process.
ai4eutils.ai4e_azure_utils.enumerate_blobs_to_file()
3) Divide that list into chunks that will become individual API submissions.
divide_files_into_tasks()
3) Put each .json file in a blob container and get a read-only SAS URL for it.
Task.upload_images_list()
4) Generate the API query(ies) you'll submit to the API.
Task.generate_api_request()
5) Submit the API query. This can be done manually with Postman as well.
Task.submit()
6) Monitor task status
Task.check_status()
7) Combine multiple API outputs
8) We're now into what we really call "postprocessing", rather than
"data_preparation", but... possibly do some amount of partner-specific
renaming, folder manipulation, etc. This is very partner-specific, but
generally done via:
find_repeat_detections.py
subset_json_detector_output.py
postprocess_batch_results.py
"""
#%% Imports
from enum import Enum
import json
import os
import posixpath
import string
from typing import Any, ClassVar, Dict, List, Optional, Sequence, Tuple
import urllib
import requests
import ai4e_azure_utils # from ai4eutils
import path_utils # from ai4eutils
#%% Constants
MAX_FILES_PER_API_TASK = 1_000_000
IMAGES_PER_SHARD = 2000
VALID_REQUEST_NAME_CHARS = f'-_{string.ascii_letters}{string.digits}'
REQUEST_NAME_CHAR_LIMIT = 92
#%% Classes
class BatchAPISubmissionError(Exception):
pass
class BatchAPIResponseError(Exception):
pass
class TaskStatus(str, Enum):
RUNNING = 'running'
FAILED = 'failed'
PROBLEM = 'problem'
COMPLETED = 'completed'
class Task:
"""
Represents a Batch Detection API task.
Given the Batch Detection API URL, assumes that the endpoints are:
/request_detections
for submitting tasks
/task/<task.id>
for checking on task status
"""
# class variables
request_endpoint: ClassVar[str] = 'request_detections' # submit tasks
task_status_endpoint: ClassVar[str] = 'task' # check task status
# instance variables, in order of when they are typically set
name: str
api_url: str
local_images_list_path: str
remote_images_list_url: str # includes SAS token if uploaded with one
api_request: Dict[str, Any] # request object before JSON serialization
id: str
response: Dict[str, Any] # decoded response JSON
status: TaskStatus
bypass_status_check: bool # set when we manually complete a task
def __init__(self, name: str, task_id: Optional[str] = None,
images_list_path: Optional[str] = None,
validate: bool = True, api_url: Optional[str] = None):
"""
Initializes a Task.
If desired, validates that the images list does not exceed the maximum
length and that all files in the images list are actually images.
Args:
name: str, name of the request
task_id: optional str, ID of submitted task
images_list_path: str, path or URL to a JSON file containing a list
of image paths, must start with 'http' if a URL
local: bool, set to True if images_list_path is a local path,
set to False if images_list_path is a URL
validate: bool, whether to validate the given images list,
only used if images_list_path is not None
api_url: optional str, Batch Detection API URL,
defaults to environment variable BATCH_DETECTION_API_URL
Raises:
requests.HTTPError: if images_list_path is a URL but an error
occurred trying to fetch it
ValueError: if images_list_path is given, but the file contains more
than MAX_FILES_PER_API_TASK entries, or if one of the entries
is not a supported image file type
"""
self.bypass_status_check = False
clean_name = clean_request_name(name)
if name != clean_name:
print('Warning: renamed {} to {}'.format(name,clean_name))
self.name = clean_name
if api_url is None:
api_url = os.environ['BATCH_DETECTION_API_URL']
assert api_url is not None and api_url != ''
self.api_url = api_url
if task_id is not None:
self.id = task_id
if images_list_path is not None:
if images_list_path.startswith('http'):
self.remote_images_list_url = images_list_path
else:
self.local_images_list_path = images_list_path
if validate:
if images_list_path.startswith('http'):
images_list = requests.get(images_list_path).json()
else:
with open(images_list_path, 'r') as f:
images_list = json.load(f)
if len(images_list) > MAX_FILES_PER_API_TASK:
raise ValueError('Images list has too many files')
# Leaving this commented out to remind us that we don't want this check here; let
# the API fail on these images. It's a huge hassle to remove non-image
# files.
#
# for path_or_url in images_list:
# if not is_image_file_or_url(path_or_url):
# raise ValueError('{} is not an image'.format(path_or_url))
def __repr__(self) -> str:
return 'Task(name={name}, id={id})'.format(
name=self.name,
id=getattr(self, 'id', None))
# Commented out as a reminder: don't check task status (which is a rest API call)
# in __repr__; require the caller to explicitly request status
# status=getattr(self, 'status', None))
def upload_images_list(self, account: str, container: str, sas_token: str,
blob_name: Optional[str] = None, overwrite: bool=False) -> None:
"""
Uploads the local images list to an Azure Blob Storage container.
Sets self.remote_images_list_url to the blob URL of the uploaded file.
Args:
account: str, Azure Storage account name
container: str, Azure Blob Storage container name
sas_token: str, Shared Access Signature (SAS) with write permission,
does not start with '?'
blob_name: optional str, defaults to basename of
self.local_images_list_path if blob_name is not given
"""
if blob_name is None:
blob_name = os.path.basename(self.local_images_list_path)
self.remote_images_list_url = ai4e_azure_utils.upload_file_to_blob(
account_name=account, container_name=container,
local_path=self.local_images_list_path, blob_name=blob_name,
sas_token=sas_token, overwrite=overwrite)
def generate_api_request(self,
caller: str,
input_container_url: Optional[str] = None,
image_path_prefix: Optional[str] = None,
**kwargs: Any
) -> Dict[str, Any]:
"""
Generate API request JSON.
Sets self.api_request to the request JSON. For complete list of API
input parameters, see:
https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing#api-inputs
Args:
caller: str
input_container_url: optional str, URL to Azure Blob Storage
container where images are stored. URL must include SAS token
with read and list permissions if the container is not public.
Only provide this parameter when the image paths in
self.remote_images_list_url are relative to a container.
image_path_prefix: optional str, TODO
kwargs: additional API input parameters
Returns: dict, represents the JSON request to be submitted
"""
request = kwargs
request.update({
'request_name': self.name,
'caller': caller,
'images_requested_json_sas': self.remote_images_list_url
})
if input_container_url is None:
request['use_url'] = True
else:
request['input_container_sas'] = input_container_url
if image_path_prefix is not None:
request['image_path_prefix'] = image_path_prefix
self.api_request = request
return request
def submit(self) -> str:
"""
Submit this task to the Batch Detection API.
Sets self.id to the returned request ID. Only run this method after
generate_api_request().
Returns: str, task ID
Raises:
requests.HTTPError, if an HTTP error occurred
BatchAPISubmissionError, if request returns an error
"""
request_endpoint = posixpath.join(self.api_url, self.request_endpoint)
r = requests.post(request_endpoint, json=self.api_request)
r.raise_for_status()
assert r.status_code == requests.codes.ok
response = r.json()
if 'error' in response:
raise BatchAPISubmissionError(response['error'])
if 'request_id' not in response:
raise BatchAPISubmissionError(
'"request_id" not in API response: {}'.format(response))
self.id = response['request_id']
return self.id
def check_status(self) -> Dict[str, Any]:
"""
Checks the task status.
Sets self.response and self.status.
Returns: dict, contains fields ['Status', 'TaskId'] and possibly others
Raises:
requests.HTTPError, if an HTTP error occurred
BatchAPIResponseError, if response task ID does not match self.id
"""
if self.bypass_status_check:
return self.response
url = posixpath.join(self.api_url, self.task_status_endpoint, self.id)
r = requests.get(url)
r.raise_for_status()
assert r.status_code == requests.codes.ok
self.response = r.json()
if self.response['TaskId'] != self.id:
raise BatchAPIResponseError(
f'Response task ID {self.response["TaskId"]} does not match '
f'expected task ID {self.id}.')
try:
self.status = TaskStatus(self.response['Status']['request_status'])
except Exception as e:
self.status = 'Exception error: {}'.format(str(e))
return self.response
def force_completion(self,response) -> None:
"""
Simulate completion of a task by passing a manually-created response
string.
"""
self.response = response
self.status = TaskStatus(self.response['Status']['request_status'])
self.bypass_status_check = True
def get_output_file_urls(self, verbose: bool = False) -> Dict[str, str]:
"""
Retrieves the dictionary of URLs for the three output files for this task
"""
assert self.status == TaskStatus.COMPLETED
message = self.response['Status']['message']
output_file_urls = message['output_file_urls']
return output_file_urls
def get_missing_images(self, submitted_images, verbose: bool = False) -> List[str]:
"""
Compares the submitted and processed images lists to find missing
images.
"missing": an image from the submitted list that was not processed,
for whatever reason
"failed": a missing image explicitly marked as 'failed' by the
batch detection API
Only run this method when task.status == TaskStatus.COMPLETED.
Returns: list of str, sorted list of missing image paths
Ignores non-image filenames.
"""
assert self.status == TaskStatus.COMPLETED
message = self.response['Status']['message']
# estimate # of failed images from failed shards
if 'num_failed_shards' in message:
n_failed_shards = message['num_failed_shards']
else:
n_failed_shards = 0
# Download all three JSON urls to memory
output_file_urls = message['output_file_urls']
for url in output_file_urls.values():
if self.id not in url:
raise BatchAPIResponseError(
'Task ID missing from output URL: {}'.format(url))
detections = requests.get(output_file_urls['detections']).json()
return get_missing_images_from_json(submitted_images,detections,n_failed_shards,verbose)
def create_response_message(n_failed_shards,detections_url,task_id):
"""
Manually create a response message in the format of the batch API. Used when tasks hang or fail
and we need to simulate their completion by directly pulling the results from the AML output.
"""
output_file_urls = {
'detections':detections_url
}
message = {'num_failed_shards':n_failed_shards,'output_file_urls':output_file_urls}
status = {'message':message,'request_status':str(TaskStatus.COMPLETED.value)}
response = {}
response['Status'] = status
response['request_id'] = task_id
return response
def get_missing_images_from_json(submitted_images,detections,n_failed_shards,verbose=False):
"""
Given the json-encoded results for the lists of submitted images and detections,
find and return the list of images missing in the list of detections. Ignores
non-image filenames.
"""
# Remove files that were submitted but don't appear to be images
# assert all(is_image_file_or_url(s) for s in submitted_images)
non_image_files_submitted = [s for s in submitted_images if not is_image_file_or_url(s)]
if len(non_image_files_submitted) > 0:
print('Warning, {} non-image files submitted:\n'.format(len(non_image_files_submitted)))
for k in range(0,min(10,len(non_image_files_submitted))):
print(non_image_files_submitted[k])
print('...\n')
submitted_images = [s for s in submitted_images if is_image_file_or_url(s)]
# Diff submitted and processed images
processed_images = [d['file'] for d in detections['images']]
missing_images = sorted(set(submitted_images) - set(processed_images))
if verbose:
estimated_failed_shard_images = n_failed_shards * IMAGES_PER_SHARD
print('Submitted {} images'.format(len(submitted_images)))
print('Received results for {} images'.format(len(processed_images)))
print(f'{n_failed_shards} failed shards '
f'(~approx {estimated_failed_shard_images} images)')
print('{} images not in results'.format(len(missing_images)))
# Confirm that the procesed images are a subset of the submitted images
assert set(processed_images) <= set(submitted_images), (
'Failed images should be a subset of missing images')
return missing_images
def divide_chunks(l: Sequence[Any], n: int) -> List[Sequence[Any]]:
"""
Divide list *l* into chunks of size *n*, with the last chunk containing
<= n items.
"""
# https://www.geeksforgeeks.org/break-list-chunks-size-n-python/
chunks = [l[i * n:(i + 1) * n] for i in range((len(l) + n - 1) // n)]
return chunks
def divide_list_into_tasks(file_list: Sequence[str],
save_path: str,
n_files_per_task: int = MAX_FILES_PER_API_TASK
) -> Tuple[List[str], List[Sequence[Any]]]:
"""
Divides a list of filenames into a set of JSON files, each containing a
list of length *n_files_per_task* (the last file will contain <=
*n_files_per_task* files).
Output JSON files are saved to *save_path* except the extension is replaced
with `*.chunkXXX.json`. For example, if *save_path* is `blah.json`, output
files will be `blah.chunk000.json`, `blah.chunk001.json`, etc.
Args:
file_list: list of str, filenames to split across multiple JSON files
save_path: str, base path to save the chunked lists
n_files_per_task: int, max number of files to include in each API task
Returns:
output_files: list of str, output JSON file names
chunks: list of list of str, chunks[i] is the content of output_files[i]
"""
chunks = divide_chunks(file_list, n_files_per_task)
output_files = []
for i_chunk, chunk in enumerate(chunks):
chunk_id = 'chunk{:0>3d}'.format(i_chunk)
output_file = path_utils.insert_before_extension(
save_path, chunk_id)
output_files.append(output_file)
with open(output_file, 'w') as f:
json.dump(chunk, f, indent=1)
return output_files, chunks
def divide_files_into_tasks(file_list_json: str,
n_files_per_task: int = MAX_FILES_PER_API_TASK
) -> Tuple[List[str], List[Sequence[Any]]]:
"""
Convenience wrapper around divide_list_into_tasks() when the file_list
itself is already saved as a JSON file.
"""
with open(file_list_json) as f:
file_list = json.load(f)
return divide_list_into_tasks(file_list, save_path=file_list_json,
n_files_per_task=n_files_per_task)
def clean_request_name(request_name: str,
whitelist: str = VALID_REQUEST_NAME_CHARS,
char_limit: int = REQUEST_NAME_CHAR_LIMIT) -> str:
"""
Removes invalid characters from an API request name.
"""
return path_utils.clean_filename(
filename=request_name, whitelist=whitelist, char_limit=char_limit).replace(':','_')
def download_url(url: str, save_path: str, verbose: bool = False) -> None:
"""
Download a URL to a local file.
"""
if verbose:
print('Downloading {} to {}'.format(url,save_path))
urllib.request.urlretrieve(url, save_path)
assert os.path.isfile(save_path)
def is_image_file_or_url(path_or_url: str) -> bool:
"""
Checks (via file extension) whether a file path or URL is an image.
If path_or_url is a URL, strip away any query strings '?...'. This should
have no adverse effect on local paths.
"""
stripped_path_or_url = urllib.parse.urlparse(path_or_url).path
return path_utils.is_image_file(stripped_path_or_url)
#%% Interactive driver
if False:
#%%
account_name = ''
sas_token = 'st=...'
container_name = ''
rsearch = None # '^Y53'
output_file = r'output.json'
blobs = ai4e_azure_utils.enumerate_blobs_to_file(
output_file=output_file,
account_name=account_name,
sas_token=sas_token,
container_name=container_name,
rsearch=rsearch)
#%%
file_list_json = r"D:\temp\idfg_20190801-hddrop_image_list.json"
task_files = divide_files_into_tasks(file_list_json)
#%%
file_list_sas_urls = [
'','',''
]
input_container_sas_url = ''
request_name_base = ''
caller = 'blah@blah.com'
request_strings,request_dicts = generate_api_queries(
input_container_sas_url,
file_list_sas_urls,
request_name_base,
caller)
for s in request_strings:
print(s)

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 109 KiB

Двоичные данные
api/batch_processing/images/SAS_screenshot.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 95 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 29 KiB

Просмотреть файл

@ -1,23 +0,0 @@
## MegaDetector batch processing workflow integration
This folder contains information about ways to use MegaDetector output files in various workflows. Specifically...
### Timelapse2
[Timelapse2](http://saul.cpsc.ucalgary.ca/timelapse/) can read the results produced by the [MegaDetector batch processing API](https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing) and/or [run_tf_detector_batch.py](https://github.com/ecologize/CameraTraps/blob/master/detection/run_tf_detector_batch.py), as well as the species classification results produced by our [classification pipeline](https://github.com/ecologize/CameraTraps/tree/master/classification). For information about how to use this feature, see [timelapse.md](timelapse.md), but mostly see the section in the Timelapse manual called "Automatic Image Recognition". If you're a Timelapse user, you may also want to check out our [guide to configuring Azure virtual machines](remote_desktop.md) to run Timelapse in the cloud, which can make it easier to split annotation workloads across your team.
### eMammal
A [standalone application](https://github.com/ecologize/CameraTraps/tree/master/api/batch_processing/integration/eMammal) is available to transfer MegaDetector results from our .json results format into the [eMammal desktop client](https://emammal.si.edu/eyes-wildlife/content/downloading-desktop-application). Many eMammal users also work with our results by splitting images into separate folders for animal/empty/vehicle/person using [this script](https://github.com/ecologize/CameraTraps/blob/master/api/batch_processing/postprocessing/separate_detections_into_folders.py), then either discarding the blanks or creating separate deployments for animal/empty/human.
### digiKam
[Python tools](digiKam/README.md) (which can be run with a GUI) to transfer MegaDetector results from our .json results format into XMP image metadata, specifically for use with[digiKam](https://www.digikam.org/).
### Data preparation
For any of these use cases, you may also want to check out our [Camera Trap JSON Manager App](https://github.com/ecologize/CameraTraps/blob/master/api/batch_processing/postprocessing/CameraTrapJsonManagerApp.md), which can help you split/modify our .json results files to break into smaller projects, adjust relative paths, etc.
If you use any of these tools &ndash; or if we're missing an important one &ndash; <a href="mailto:cameratraps@lila.science">email us</a>!

Просмотреть файл

@ -1 +0,0 @@
.spyproject/

Просмотреть файл

@ -1,59 +0,0 @@
# MegaDetector integration with digiKam
This folder contains a Python tool to transfer annotations produced by MegaDetector to the hierarchicalSubject field of XMP data in JPG images, to support the ingestion and review of those results in <a href="https://www.digikam.org/">digiKam</a>.
The tool can be run from the command line or as a GUI-based application.
## Running the command-line tool
Run the script as:
`python xmp_integration.py --input_file [input_file] --image_folder [image_folder] --path_to_remove [path_to_remove]`
* `input_file` is the .json file produced by the MegaDetector batch API or by run_tf_detector_batch.py
* `image_folder` is the root folder where your images are
`path_to_remove` (optional) is a string that should be removed from the head of all the image paths in the .json file. For example, let's say you ran MegaDetector on paths that looked like:
`images_for_megadetector/camera1/image01.jpg`
...but now your images look like:
`c:\my_images\camera1\image01.jpg`
In this case, you would want to specify `images_for_megadetector/` for `path_to_remove` and `c:\my_images` for `image_folder`.
## Running the GUI-based tool
Run the script as:
`python xmp_integration.py --gui`
* Select the folder that contains the image
* Select the .json file from the MegaDetector API
* Optional specify a leading string to remove from image paths (see above)
* Click "Submit"
![](images/screenshot.png)
## Validating the XMP data in digiKam
* Open <a href="https://www.digikam.org/">digiKam</a>
* Load the images folder into the album
* Click on an image to view the XMP metadata
* Click on the `Metadata` tab and then the `XMP` tab in the right-side panel
* You should see the `hierarchicalSubject` field in `XMP metadata`, populated with your MegaDetector outputs
![](images/digikam.png)
## Compiling to an .exe
If you want to compile the tool to an executable (e.g. to make it easier to distribute within your organization), run:
`python setup.py build`
This will create a `build/exe.win-amd64-3.7`, in which you'll find `xmp_integration.exe`.

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 7.5 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 160 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 76 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 295 KiB

Просмотреть файл

@ -1,6 +0,0 @@
from cx_Freeze import setup, Executable
setup(name = "XMP Integration" ,
version = "3.0" ,
description = "XMP metadata writer" ,
executables = [Executable("xmp_integration.py")])

Просмотреть файл

@ -1,466 +0,0 @@
#
# xmp_integration.py
#
# Tools for loading MegaDetector batch API results into XMP metadata, specifically
# for consumption in digiKam:
#
# https://cran.r-project.org/web/packages/camtrapR/vignettes/camtrapr2.html
#
#%% Imports and constants
import argparse
import tkinter
from tkinter import ttk, messagebox, filedialog
import inspect
import os
import sys
import json
import pyexiv2
import ntpath
import threading
import traceback
from tqdm import tqdm
from multiprocessing import Pool
from multiprocessing.pool import ThreadPool
from functools import partial
category_mapping = {'person': 'Human', 'animal': 'Animal', 'vehicle': 'Vehicle'}
#%% Class definitions
class xmp_gui:
root = None
textarea_min_threshold = None
textarea_status = None
textarea_remove_path = None
textarea_rename_conf = None
textarea_rename_cats = None
num_threads = 1
class xmp_integration_options:
# Folder where images are stored
image_folder = None
# .json file containing MegaDetector output
input_file = None
# String to remove from all path names, typically representing a
# prefix that was added during MegaDetector processing
remove_path = None
# Optionally *rename* (not copy) all images that have no detections
# above [rename_conf] for the categories in rename_cats from x.jpg to
# x.check.jpg
rename_conf = None
# Comma-deleted list of category names (or "all") to apply the rename_conf
# behavior to.
rename_cats = None
# Minimum detection threshold (applies to all classes, defaults to None,
# i.e. 0.0
min_threshold = None
num_threads = 1
xmp_gui = None
#%% Functions
def write_status(options,s):
if options.xmp_gui is None:
return
options.xmp_gui.textarea_status.configure(state="normal")
options.xmp_gui.textarea_status.insert(tkinter.END, s + '\n')
options.xmp_gui.textarea_status.configure(state="disabled")
n_images_processed = 0
def update_xmp_metadata(categories, options, rename_cats, n_images, image):
"""
Update the XMP metadata for a single image
"""
# Relative image path
filename = ''
# Absolute image path
img_path = ''
global n_images_processed
try:
filename = image['file']
if options.remove_path != None and len(options.remove_path) > 0:
filename = filename.replace(options.remove_path, '')
img_path = os.path.join(options.image_folder, filename)
assert os.path.isfile(img_path), 'Image {} not found'.format(img_path)
# List of categories to write to XMP metadata
image_categories = []
# Categories with above-threshold detections present for
# this image
original_image_cats = []
# Maximum confidence for each category
original_image_cats_conf = {}
for detection in image['detections']:
cat = category_mapping[categories[detection['category']]]
# Have we already added this to the list of categories to
# write out to this image?
if cat not in image_categories:
# If we're supposed to compare to a threshold...
if len(options.min_threshold) > 0 and \
options.min_threshold != None:
if float(detection['conf']) > float(options.min_threshold):
image_categories.append(cat)
original_image_cats.append(
categories[detection['category']])
# Else we treat *any* detection as valid...
else:
image_categories.append(cat)
original_image_cats.append(categories[detection['category']])
# Keep track of the highest-confidence detection for this class
if options.min_threshold != None and \
len(options.min_threshold) > 0 and \
detection['conf'] > \
original_image_cats_conf.get(
categories[detection['category']], 0):
original_image_cats_conf[categories[detection['category']]] = \
detection['conf']
img = pyexiv2.Image(r'{0}'.format(img_path))
img.modify_xmp({'Xmp.lr.hierarchicalSubject': image_categories})
# If we're doing the rename/.check behavior...
if not (options.rename_conf is None and options.rename_cats is None):
matching_cats = set(rename_cats).intersection(set(original_image_cats))
is_conf_low = False
if options.min_threshold != None and len(options.min_threshold) > 0:
for matching_cat in matching_cats:
if original_image_cats_conf[matching_cat] < float(options.rename_conf):
is_conf_low = True
if options.min_threshold != None and \
len(options.min_threshold) > 0 and \
len(image['detections']) == 0 or \
(len(options.rename_conf) > 0 and \
is_conf_low is True and \
len(matching_cats) > 0):
parent_folder = os.path.dirname(img_path)
file_name = ntpath.basename(img_path)
manual_file_name = file_name.split('.')[0]+'_check' + '.' + file_name.split('.')[1]
os.rename(img_path, os.path.join(parent_folder, manual_file_name))
if options.xmp_gui is not None:
n_images_processed += 1
percentage = round((n_images_processed)/n_images*100)
options.xmp_gui.progress_bar['value'] = percentage
options.xmp_gui.root.update_idletasks()
options.xmp_gui.style.configure('text.Horizontal.Tprogress_bar',
text='{:g} %'.format(percentage))
except Exception as e:
s = 'Error processing image {}: {}'.format(filename,str(e))
print(s)
traceback.print_exc()
write_status(options,s)
if False:
# Legacy code to rename files where XMP writing failed
parent_folder = os.path.dirname(img_path)
file_name = ntpath.basename(img_path)
failed_file_name = file_name.split('.')[0]+'_failed' + '.' + file_name.split('.')[1]
os.rename(img_path, os.path.join(
parent_folder, failed_file_name))
def process_input_data(options):
"""
Main function to loop over images and modify XMP data
"""
if options.xmp_gui is not None:
if (options.image_folder is None) or (len(options.image_folder) == 0):
tkinter.messagebox.showerror(title='Error', message='Image folder is not selected')
sys.exit()
if (options.input_file is None) or (len(options.input_file) == 0):
tkinter.messagebox.showerror(
title='Error', message='No MegaDetector .json file selected')
sys.exit()
options.remove_path = options.xmp_gui.textarea_remove_path.get()
options.rename_conf = options.xmp_gui.textarea_rename_conf.get()
options.rename_cats = options.xmp_gui.textarea_rename_cats.get()
options.num_threads = options.xmp_gui.textarea_num_threads.get()
options.min_threshold = options.xmp_gui.textarea_min_threshold.get()
try:
with open(options.input_file, 'r') as f:
data = f.read()
data = json.loads(data)
categories = data['detection_categories']
images = data['images']
n_images = len(images)
if not (options.rename_conf is None and options.rename_cats is None):
rename_cats = options.rename_cats.split(",")
if rename_cats[0] == 'all':
rename_cats = list(category_mapping.keys())
else:
rename_cats = []
if len(options.num_threads) > 0:
num_threads = int(options.num_threads)
else:
num_threads = 1
print(num_threads)
if options.xmp_gui is None:
func = partial(update_xmp_metadata, categories, options, rename_cats, n_images)
with Pool(num_threads) as p:
with tqdm(total=n_images) as pbar:
for i, _ in enumerate(p.imap_unordered(func, images)):
pbar.update()
else:
func = partial(update_xmp_metadata, categories, options, rename_cats, n_images)
with ThreadPool(num_threads) as p:
p.map(func, images)
s = 'Successfully processed {} images'.format(n_images)
print(s)
write_status(options,s)
except Exception as e:
print('Error processing input data: {}'.format(str(e)))
traceback.print_exc()
if options.xmp_gui is not None:
tkinter.messagebox.showerror(title='Error',
message='Make Sure you selected the proper image folder and JSON files')
sys.exit()
def start_input_processing(options):
t = threading.Thread(target=lambda: process_input_data(options))
t.start()
def browse_folder(options,folder_path_var):
filename = tkinter.filedialog.askdirectory()
options.image_folder = r'{0}'.format(filename)
folder_path_var.set(filename)
def browse_file(options,file_path_var):
filename = tkinter.filedialog.askopenfilename()
options.input_file = r'{0}'.format(filename)
file_path_var.set(filename)
def create_gui(options):
root = tkinter.Tk()
root.resizable(False, False)
root.configure(background='white')
root.title('DigiKam Integration')
group = tkinter.LabelFrame(root, padx=5, pady=5)
group.configure(background = 'white')
group.pack(padx=10, pady=10, fill='both', expand='yes')
canvas = tkinter.Canvas(group, width = 800, height = 150)
canvas.configure(background = 'white')
canvas.pack()
img1 = tkinter.PhotoImage(file='images/aiforearth.png')
canvas.create_image(0,0, anchor=tkinter.NW, image=img1)
img2 = tkinter.PhotoImage(file='images/bg.png')
canvas.create_image(0,20, anchor=tkinter.NW, image=img2)
frame = tkinter.Frame(root)
frame.configure(background='white')
frame.pack()
l1 = tkinter.Label(frame, text='Folder containing images')
l1.configure(background='white')
l1.grid(row=0, column=0)
folder_path_var = tkinter.StringVar()
e1 = tkinter.Entry(frame, width=50, textvariable=folder_path_var, highlightthickness=1)
e1.configure(highlightbackground='grey', highlightcolor='grey')
e1.grid(row=0, column=2)
b1 = tkinter.Button(frame, text='Browse', fg='blue', command=lambda: browse_folder(options,folder_path_var))
b1.grid(row=0, column=5, padx=10)
l2 = tkinter.Label(frame, text='Path to MegaDetector output .json file')
l2.configure(background='white')
l2.grid(row=1, column=0)
file_path_var = tkinter.StringVar()
e2 = tkinter.Entry(frame, width=50, textvariable=file_path_var, highlightthickness=1)
e2.configure(highlightbackground='grey', highlightcolor='grey')
e2.grid(row=1, column=2)
b2 = tkinter.Button(frame, text='Browse', fg='blue', command=lambda: browse_file(options,file_path_var))
b2.grid(row=1, column=5, padx=10)
l6 = tkinter.Label(frame, text='Minimum confidence to consider a category')
l6.configure(background='white')
l6.grid(row=2, column=0)
textarea_min_threshold = tkinter.Entry(frame, width=50, highlightthickness=1)
textarea_min_threshold.configure(highlightbackground='grey', highlightcolor='grey')
textarea_min_threshold.grid(row=2, column=2)
l3 = tkinter.Label(frame, text='Prefix to remove from image paths (optional)')
l3.configure(background='white')
l3.grid(row=3, column=0)
textarea_remove_path = tkinter.Entry(frame, width=50, highlightthickness=1)
textarea_remove_path.configure(highlightbackground='grey', highlightcolor='grey')
textarea_remove_path.grid(row=3, column=2)
l4 = tkinter.Label(frame, text='Confidence level to move images requires manual check (optional)')
l4.configure(background='white')
l4.grid(row=4, column=0)
textarea_rename_conf = tkinter.Entry(frame, width=50, highlightthickness=1)
textarea_rename_conf.configure(highlightbackground='grey', highlightcolor='grey')
textarea_rename_conf.grid(row=4, column=2)
l5 = tkinter.Label(frame, text='Categories to check for the confidence (optional)')
l5.configure(background='white')
l5.grid(row=5, column=0)
textarea_rename_cats = tkinter.Entry(frame, width=50, highlightthickness=1)
textarea_rename_cats.configure(highlightbackground='grey', highlightcolor='grey')
textarea_rename_cats.grid(row=5, column=2)
l6 = tkinter.Label(frame, text='Number of threads to run (optional)')
l6.configure(background='white')
l6.grid(row=6, column=0)
textarea_num_threads = tkinter.Entry(frame, width=50, highlightthickness=1)
textarea_num_threads.configure(highlightbackground='grey', highlightcolor='grey')
textarea_num_threads.grid(row=6, column=2)
sb = tkinter.Button(frame, text='Submit', fg='black',
command=lambda: start_input_processing(options), padx=10)
sb.grid(row=7, column=2, padx=10, pady=10)
style = tkinter.ttk.Style(root)
style.layout('text.Horizontal.Tprogress_bar',
[('Horizontal.progress_bar.trough',
{'children': [('Horizontal.progress_bar.pbar',
{'side': 'left', 'sticky': 'ns'})],
'sticky': 'nswe'}),
('Horizontal.progress_bar.label', {'sticky': ''})])
style.configure('text.Horizontal.Tprogress_bar', text='0 %')
progress_bar = tkinter.ttk.Progressbar(root, style='text.Horizontal.Tprogress_bar', length=700,
maximum=100, value=0, mode='determinate')
progress_bar.pack(pady=10)
group2 = tkinter.LabelFrame(root, text='Status', padx=5, pady=5)
group2.pack(padx=10, pady=10, fill='both', expand='yes')
textarea_status = tkinter.Text(group2, height=10, width=100)
textarea_status.configure(state="disabled")
textarea_status.pack()
options.xmp_gui = xmp_gui()
options.xmp_gui.root = root
options.xmp_gui.textarea_min_threshold = textarea_min_threshold
options.xmp_gui.textarea_remove_path = textarea_remove_path
options.xmp_gui.textarea_rename_conf = textarea_rename_conf
options.xmp_gui.textarea_rename_cats = textarea_rename_cats
options.xmp_gui.textarea_num_threads = textarea_num_threads
options.xmp_gui.textarea_status = textarea_status
options.xmp_gui.progress_bar = progress_bar
options.xmp_gui.style = style
root.mainloop()
#%% Interactive/test driver
if False:
#%%
options = xmp_integration_options()
options.input_file = r"C:\temp\demo_images\ssmini_xmp_test_orig\ssmini.mdv4.json"
options.image_folder = r"C:\temp\demo_images\ssmini_xmp_test"
options.remove_path = 'my_images/'
process_input_data(options)
#%% Command-line driver
def args_to_object(args,obj):
"""
Copy all fields from the argparse table "args" to the object "obj"
"""
for n, v in inspect.getmembers(args):
if not n.startswith('_'):
setattr(obj, n, v)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--input_file', help = 'Path to the MegaDetector .json file', default=None)
parser.add_argument('--image_folder', help = 'Path to the folder containing images', default=None)
parser.add_argument('--min_threshold', help = 'Minimum detection confidence that will be treated as a detection event', default=None)
parser.add_argument('--remove_path', help = 'Prefix to remove from image paths in the .json file', default=None)
parser.add_argument('--rename_conf', help = 'Below this confidence level, images will be renamed for manual check', default=None)
parser.add_argument('--rename_cat', help = 'Category (or comma-delimited categories) to apply renaming behavior to', default=None)
parser.add_argument('--num_threads', help = 'Number of threads to use for image processing', default=1)
parser.add_argument('--gui', help = 'Run in GUI mode', action='store_true')
options = xmp_integration_options()
args = parser.parse_args()
args_to_object(args,options)
if options.gui:
assert options.input_file is None, 'Command-line argument specified in GUI mode'
assert options.image_folder is None, 'Command-line argument specified in GUI mode'
assert options.min_threshold is None, 'Command-line argument specified in GUI mode'
assert options.remove_path is None, 'Command-line argument specified in GUI mode'
assert options.rename_conf is None, 'Command-line argument specified in GUI mode'
assert options.rename_cat is None, 'Command-line argument specified in GUI mode'
assert options.num_threads == 1, 'Command-line argument specified in GUI mode'
create_gui(options)
else:
process_input_data(options)
if __name__ == '__main__':
main()

Просмотреть файл

@ -1,70 +0,0 @@
# MegaDetector/eMammal integration app: introduction
This app takes as input an output file from the AI for Earth MegaDetector batch API and transfers those annotations to the eMammal desktop app running on the local machine. This is very very very very beta, so if you're interested in trying this out, we recommend that you <a href="mailto:cameratraps@lila.science">email us</a>!
We have also worked with a number of eMammal users to use MegaDetector in a somewhat less elegant way, specifically to process all their images before they ever get to the eMammal desktop app, and use the MegaDetector results move all the empty images into a separate folder, which users can then either upload to a separate deployment or not upload at all. We can do the same with human/vehicle images. If you're interested in trying this approach out, also <a href="mailto:cameratraps@lila.science">email us</a>!
# Downloading the eMammal integration app
Download from <a href="https://lilablobssc.blob.core.windows.net/models/apps/megadetector-eMammal-integration-app.1.00.zip">here</a>.
# Downloading the eMammal desktop app
If you're reading this, you probably already have the eMammal desktop app, and we're not going to post a link here, but the installer you downloaded should look something like "EMammal_Desktop-0.9.32.exe". 0.9.32 is the most recent version we've tested our integration tool against.
User-facing documentation for the eMammal desktop app (for a slightly different version, but close) is available here:
* <https://emammal.si.edu/content/emammal-training><br/>
* <https://www.youtube.com/watch?v=3x4JwHEMtFg&feature=emb_logo>
If you face any issues during installation of the eMammal desktop app, refer to [eMammal-Client-App-Troubleshooting.md](eMammal-Client-App-Troubleshooting.md).
# Using the eMammal integration app
## Run the eMammal desktop app and load images
1. Run the eMammal desktop app, click "Load New Photos", then select the project, sub-project, and deployment. It doesn't matter which subj-project/deployment you select; you won't be pushing anything to the cloud during testing.
2. Load the images into the eMammal desktop app by selecting a folder, which eMammal will recursively search for images. eMammal will remember the path of each image relative to the base folder you pick, and these relative paths need to match the .json file. So if your .json file has images like:
`a/b/c/d/image0001.jpg`
You should make sure that the "a" folder is somewhere all by itself, and select the folder above it, even if all the images are way down in the "c" folder. Of course you can also manipulate the .json file to match, but one way or another they need to line up.
3. Leave the app running.
## Run the AI for Earth eMammal integration app
1. Run the AI for Earth eMammal integration app.
2. Select the .json detection file for the images you loaded above.
3. Select the eMammal project and deployment you selected above.
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img src="images/eMammal-integration-app-project-details.jpg" width="500"><br/>
4. Click "next", then in the next screen for <i>Category Mapping</i>, select the eMammal categories from the drop-down list to which you want to match each of the four MegaDetector categories ("Animal", "Person", "Vehicle", and "Blank"). These will by default be mapped to the eMammal categories "Unknown Animal", "Homo sapiens", "Vehicle", and "No Animal", respectively.
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img src="images/eMammal-integration-app-category-mapping.jpg" width="500"><br/>
5. Click "next" to add annotations to the eMammal database. Once all the annotations have been added to the database, you will see a message confirming the successful deployment, asking you to close and re-open the eMammal desktop app (step (1) below).
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img src="images/successful-deployment.jpg" width="500"></br>
6. Once you've closed and re-opened the eMammal desktop app, you can click the "verify" button to confirm that all of the image assignments worked correctly:
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img src="images/verified-push.jpg" width="500"></br>
## View annotations in the eMammal desktop app
1. Close and re-open the eMammal desktop app
2. Sign in and and click "Load New Photos"
3. Select the same project and deployment you selected above
4. Click "continue", then in the next window click "Save Deployment Info". In the main window of the eMammal app, annotations for the images should appear reflecting the maximum confidence value for each image in a sequence.
5. Play around with the annotations, but - if you're an AI for Earth person reading this during development - <b>do not click "upload"</b>; we have been asked not to push annotations to the test deployment. It won't be catastrophic if you do, but we said we wouldn't. If you're an eMammal user, by all means, upload away!

Просмотреть файл

@ -1,9 +0,0 @@
<Application x:Class="eMammal_integration_application.App"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:local="clr-namespace:eMammal_integration_application"
StartupUri="eMammalIntegrationWindow.xaml">
<Application.Resources>
</Application.Resources>
</Application>

Просмотреть файл

@ -1,17 +0,0 @@
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Data;
using System.Linq;
using System.Threading.Tasks;
using System.Windows;
namespace eMammal_integration_application
{
/// <summary>
/// Interaction logic for App.xaml
/// </summary>
public partial class App : Application
{
}
}

Просмотреть файл

@ -1,16 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace eMammal_integration_application
{
public class Category
{
public int blank { get; set; }
public int animal { get; set; }
public int person { get; set; }
public int vehicle { get; set; }
}
}

Просмотреть файл

@ -1,131 +0,0 @@
using System;
using System.Drawing;
using System.Threading;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Media;
using System.Windows.Threading;
namespace eMammal_integration_application
{
public class Common
{
public static void CheckConnection(eMammalIntegrationWindow window, bool loadProject = false)
{
eMammalMySQLOps db = new eMammalMySQLOps();
bool isConnectionOpen = false;
while (isConnectionOpen == false)
{
Thread.Sleep(200);
isConnectionOpen = db.OpenConnectionIfNotOpen(true);
}
window.Dispatcher.BeginInvoke(new Action(() =>
{
Common.SetMessage(window, Constants.DATABASE_AVAILABLE, false, false);
window.Tab.Visibility = Visibility.Visible;
if (loadProject)
window.Loadproject();
window.Tab.SelectedIndex = 0;
window.Tab.IsEnabled = true;
window.IsEnabled = true;
window.ButtonBack.Visibility = Visibility.Hidden;
window.ReactivateButton(window.ButtonNext);
window.ReactivateButton(window.ButtonBrowse);
}));
}
public static void SetMessage(eMammalIntegrationWindow window, string msg, bool isError = false, bool showMessageBox = true)
{
//window.Visibility = Visibility.Visible;
//window.TextBlockInfo.Text = msg;
//TextBlock.Text = msg;
window.TextBlockInfo.Dispatcher.Invoke(() => window.TextBlockInfo.Visibility = Visibility.Visible, DispatcherPriority.Background);
window.TextBlockInfo.Dispatcher.Invoke(() => window.TextBlockInfo.Text = msg, DispatcherPriority.Normal);
if (isError)
window.Foreground = new SolidColorBrush(Colors.Red);
else
window.Foreground = new SolidColorBrush(Colors.Blue);
if (showMessageBox)
SetMessageBox(msg, isError);
}
public static void SetMessageBox(string msg, bool error = false)
{
//CustomMessageBox w = new CustomMessageBox();
//w.LabelInfo.Content = msg;
//w.ShowDialog();
if (error)
MessageBox.Show(msg, "", MessageBoxButton.OK,
MessageBoxImage.Error,
MessageBoxResult.OK,
MessageBoxOptions.DefaultDesktopOnly);
else
MessageBox.Show(msg, "", MessageBoxButton.OK,
MessageBoxImage.Information,
MessageBoxResult.OK,
MessageBoxOptions.DefaultDesktopOnly);
}
public static void ShowProgress(eMammalIntegrationWindow window, string msg, int progressCount,
bool isLast = true, bool showProgressBar = true)
{
window.LabelProgress.Content = msg;
window.LabelProgress.Dispatcher.Invoke(() =>
window.LabelProgress.Visibility = Visibility.Visible, DispatcherPriority.Background);
if (showProgressBar)
{
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
window.ProgressbarUpdateProgress.Visibility = Visibility.Visible, DispatcherPriority.Background);
}
if (isLast)
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
window.ProgressbarUpdateProgress.Value = progressCount, DispatcherPriority.Normal);
else
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
window.ProgressbarUpdateProgress.Value = progressCount, DispatcherPriority.Background);
}
public static void HideProgress(eMammalIntegrationWindow window)
{
window.LabelProgress.Content = "";
window.LabelProgress.Visibility = Visibility.Hidden;
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
window.ProgressbarUpdateProgress.Visibility = Visibility.Hidden, DispatcherPriority.Normal);
window.ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
window.ProgressbarUpdateProgress.Value = 0, DispatcherPriority.Background);
}
public static void delay(int maxCount = 1000000)
{
int count = 0;
while (count < 1000000)
count++;
}
public static int GetShowProgressCount(int showProgressCount, int totalImages)
{
if (totalImages < 10)
showProgressCount = 1;
else if (totalImages > 1000 && totalImages < 100000)
showProgressCount = 100;
else if (totalImages > 100000)
showProgressCount = 1000;
return showProgressCount;
}
}
}

Просмотреть файл

@ -1,65 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Security.RightsManagement;
using System.Text;
using System.Threading.Tasks;
namespace eMammal_integration_application
{
static class Constants
{
// Category constants
public const string animal = "1";
public const string person = "2";
public const string vehicle = "3";
// Message constants
public const string DATABASE_CONNECTION_ERROR = "Cannot connect to the eMammal database. Please ensure that you have opened the eMammal app and you have logged into the app. " +
"Once you opened the eMammal application, this application will automatically refresh.";
//public const string DATABASE_CONNECTION_ERROR = "Cannot connect to the eMammal database. Please ensure that you have opened the eMammal app and you have logged into the app. ";
public const string NO_JSON_FILE_ERROR = "Please select a JSON detections file";
public const string DATABASE_AVAILABLE = "Application is now able to connect to the eMammal database";
//log messages
public const string LOG_MESSAGE_APP_CONNECTED_TO_DATABASE = "App successfully connected to the eMammal database";
public const string LOG_MESSAGE_PROJECT_LOADED = "Projects loaded";
public const string LOG_APP_COULD_NOT_CONNECT_TO_DATABASE = "App could not connect to the eMammal database";
public const string LOG_APP_CLOSING = "App Closing";
public const string LOG_CLOSING_OPEN_DATABASE_CONNECTION = "Closing open database connection";
public const string LOG_DATABASE_CONNECTION_NOT_OPEN = "Database connection not open";
public const string LOG_ERROR_WHILE_CLOSING_DATABASE_CONNECTION = "Error occurred while trying to close database connection";
public const string LOG_OPEN_CLOSED_DATABASE_CONNECTION = "Opening closed connection";
public const string LOG_OPENING_CLOSED_DATABASE_CONNECTION_SUCCESSFULL = "Opening closed database connection was successfull";
public const string LOG_ERROR_WHILE_OPENING_DATABASE_CONNECTION = "Error occurred while opening database connection";
public const string LOG_ADDING_UNIQUE_KEY_CONSTRAINT = "Adding unique key constraint";
public const string LOG_CHECKING_IF_UNIQUE_KEY_ALREADY_EXISTS = "Checking if unique key already exists in the database";
public const string LOG_UNIQUE_KEY_ALREADY_EXISTS = "Unique key already exists in the database";
public const string LOG_START_PROCESSING_IMAGES = "Starting image processing";
public const string LOG_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB = "Getting image sequence data from the database";
public const string LOG_COULD_NOT_RETRIEVE_IMAGE_SEQUENCES_FROM_DATABASE = "Could not retrive image sequences from the database";
public const string LOG_NUM_IMAGE_SEQUENCES = "Number of image sequences returned from DB: ";
public const string LOG_ITERATING_IMAGES_IN_JSON_FILE = "Iterating through the images in the JSON file";
//Progress messages
public const string PROCESSING_IMAGES = "Processing images...";
public const string PROGRESS_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB = "Getting image sequence data from the database";
public const string PROGRESS_UPDATING_ANNOTATIONS_IN_DB = "Updating annotations in the database";
public const string PROGRESS_CONTINUING_WITH_NEXT_IMAGE = "Continuing with next image";
//Log and Progress messages
public const string INSERTING_DETECTIONS = "Inserting detections";
public const string INSERTING_REMAINING_DETECTIONS = "Inserting remaining detections";
public const string ANNOTATIONS_ADDED_FOR_ALL_IMAGES = "Annotations added for all images in eMammal database";
//Error messages
public const string ERROR_WHILE_VERIFYING_ANNOTATIONS_IN_DB = "Error occurred while verifying annotations in eMammal database";
}
}

Просмотреть файл

@ -1,17 +0,0 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace eMammal_integration_application
{
/// <summary>
/// Class for generating image sequence annotation list
/// </summary>
public class ImageTaxa
{
public int sequenceId { get; set; }
public int projectTaxaId { get; set; }
}
}

Просмотреть файл

@ -1,52 +0,0 @@
using System;
using System.Collections.Generic;
using System.Text;
using Newtonsoft.Json;
namespace eMammal_integration_application
{
public class JsonData
{
public Info info { get; set; }
public Dictionary<string, string> detection_categories { get; set; }
public Classification_Categories classification_categories { get; set; }
public List<Image> images { get; set; }
}
public class Info
{
public string detector { get; set; }
public string detection_completion_time { get; set; }
public string format_version { get; set; }
}
public class Detection_Categories
{
public string _1 { get; set; }
public string _2 { get; set; }
}
public class Classification_Categories
{
}
public class Image
{
public Detection[] detections { get; set; }
public string file { get; set; }
public dynamic max_detection_conf { get; set; }
}
public class Detection
{
[JsonProperty(Order = 1)]
public string category { get; set; }
[JsonProperty(Order = 2)]
public dynamic conf { get; set; }
[JsonProperty(Order = 3)]
public float[] bbox { get; set; }
}
}

Просмотреть файл

@ -1,55 +0,0 @@
using System.Reflection;
using System.Resources;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Windows;
// General Information about an assembly is controlled through the following
// set of attributes. Change these attribute values to modify the information
// associated with an assembly.
[assembly: AssemblyTitle("eMammal-integration-application")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("eMammal-integration-application")]
[assembly: AssemblyCopyright("Copyright © 2020")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
// Setting ComVisible to false makes the types in this assembly not visible
// to COM components. If you need to access a type in this assembly from
// COM, set the ComVisible attribute to true on that type.
[assembly: ComVisible(false)]
//In order to begin building localizable applications, set
//<UICulture>CultureYouAreCodingWith</UICulture> in your .csproj file
//inside a <PropertyGroup>. For example, if you are using US english
//in your source files, set the <UICulture> to en-US. Then uncomment
//the NeutralResourceLanguage attribute below. Update the "en-US" in
//the line below to match the UICulture setting in the project file.
//[assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.Satellite)]
[assembly: ThemeInfo(
ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located
//(used if a resource is not found in the page,
// or application resource dictionaries)
ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located
//(used if a resource is not found in the page,
// app, or any theme specific resource dictionaries)
)]
// Version information for an assembly consists of the following four values:
//
// Major Version
// Minor Version
// Build Number
// Revision
//
// You can specify all the values or you can default the Build and Revision Numbers
// by using the '*' as shown below:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]

Просмотреть файл

@ -1,71 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace eMammal_integration_application.Properties
{
/// <summary>
/// A strongly-typed resource class, for looking up localized strings, etc.
/// </summary>
// This class was auto-generated by the StronglyTypedResourceBuilder
// class via a tool like ResGen or Visual Studio.
// To add or remove a member, edit your .ResX file then rerun ResGen
// with the /str option, or rebuild your VS project.
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
[global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
internal class Resources
{
private static global::System.Resources.ResourceManager resourceMan;
private static global::System.Globalization.CultureInfo resourceCulture;
[global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
internal Resources()
{
}
/// <summary>
/// Returns the cached ResourceManager instance used by this class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Resources.ResourceManager ResourceManager
{
get
{
if ((resourceMan == null))
{
global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("eMammal_integration_application.Properties.Resources", typeof(Resources).Assembly);
resourceMan = temp;
}
return resourceMan;
}
}
/// <summary>
/// Overrides the current thread's CurrentUICulture property for all
/// resource lookups using this strongly typed resource class.
/// </summary>
[global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
internal static global::System.Globalization.CultureInfo Culture
{
get
{
return resourceCulture;
}
set
{
resourceCulture = value;
}
}
}
}

Просмотреть файл

@ -1,117 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<root>
<!--
Microsoft ResX Schema
Version 2.0
The primary goals of this format is to allow a simple XML format
that is mostly human readable. The generation and parsing of the
various data types are done through the TypeConverter classes
associated with the data types.
Example:
... ado.net/XML headers & schema ...
<resheader name="resmimetype">text/microsoft-resx</resheader>
<resheader name="version">2.0</resheader>
<resheader name="reader">System.Resources.ResXResourceReader, System.Windows.Forms, ...</resheader>
<resheader name="writer">System.Resources.ResXResourceWriter, System.Windows.Forms, ...</resheader>
<data name="Name1"><value>this is my long string</value><comment>this is a comment</comment></data>
<data name="Color1" type="System.Drawing.Color, System.Drawing">Blue</data>
<data name="Bitmap1" mimetype="application/x-microsoft.net.object.binary.base64">
<value>[base64 mime encoded serialized .NET Framework object]</value>
</data>
<data name="Icon1" type="System.Drawing.Icon, System.Drawing" mimetype="application/x-microsoft.net.object.bytearray.base64">
<value>[base64 mime encoded string representing a byte array form of the .NET Framework object]</value>
<comment>This is a comment</comment>
</data>
There are any number of "resheader" rows that contain simple
name/value pairs.
Each data row contains a name, and value. The row also contains a
type or mimetype. Type corresponds to a .NET class that support
text/value conversion through the TypeConverter architecture.
Classes that don't support this are serialized and stored with the
mimetype set.
The mimetype is used for serialized objects, and tells the
ResXResourceReader how to depersist the object. This is currently not
extensible. For a given mimetype the value must be set accordingly:
Note - application/x-microsoft.net.object.binary.base64 is the format
that the ResXResourceWriter will generate, however the reader can
read any of the formats listed below.
mimetype: application/x-microsoft.net.object.binary.base64
value : The object must be serialized with
: System.Serialization.Formatters.Binary.BinaryFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.soap.base64
value : The object must be serialized with
: System.Runtime.Serialization.Formatters.Soap.SoapFormatter
: and then encoded with base64 encoding.
mimetype: application/x-microsoft.net.object.bytearray.base64
value : The object must be serialized into a byte array
: using a System.ComponentModel.TypeConverter
: and then encoded with base64 encoding.
-->
<xsd:schema id="root" xmlns="" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:msdata="urn:schemas-microsoft-com:xml-msdata">
<xsd:element name="root" msdata:IsDataSet="true">
<xsd:complexType>
<xsd:choice maxOccurs="unbounded">
<xsd:element name="metadata">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" />
<xsd:attribute name="type" type="xsd:string" />
<xsd:attribute name="mimetype" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="assembly">
<xsd:complexType>
<xsd:attribute name="alias" type="xsd:string" />
<xsd:attribute name="name" type="xsd:string" />
</xsd:complexType>
</xsd:element>
<xsd:element name="data">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
<xsd:element name="comment" type="xsd:string" minOccurs="0" msdata:Ordinal="2" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" msdata:Ordinal="1" />
<xsd:attribute name="type" type="xsd:string" msdata:Ordinal="3" />
<xsd:attribute name="mimetype" type="xsd:string" msdata:Ordinal="4" />
</xsd:complexType>
</xsd:element>
<xsd:element name="resheader">
<xsd:complexType>
<xsd:sequence>
<xsd:element name="value" type="xsd:string" minOccurs="0" msdata:Ordinal="1" />
</xsd:sequence>
<xsd:attribute name="name" type="xsd:string" use="required" />
</xsd:complexType>
</xsd:element>
</xsd:choice>
</xsd:complexType>
</xsd:element>
</xsd:schema>
<resheader name="resmimetype">
<value>text/microsoft-resx</value>
</resheader>
<resheader name="version">
<value>2.0</value>
</resheader>
<resheader name="reader">
<value>System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
<resheader name="writer">
<value>System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089</value>
</resheader>
</root>

Просмотреть файл

@ -1,30 +0,0 @@
//------------------------------------------------------------------------------
// <auto-generated>
// This code was generated by a tool.
// Runtime Version:4.0.30319.42000
//
// Changes to this file may cause incorrect behavior and will be lost if
// the code is regenerated.
// </auto-generated>
//------------------------------------------------------------------------------
namespace eMammal_integration_application.Properties
{
[global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
[global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "11.0.0.0")]
internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase
{
private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
public static Settings Default
{
get
{
return defaultInstance;
}
}
}
}

Просмотреть файл

@ -1,7 +0,0 @@
<?xml version='1.0' encoding='utf-8'?>
<SettingsFile xmlns="uri:settings" CurrentProfile="(Default)">
<Profiles>
<Profile Name="(Default)" />
</Profiles>
<Settings />
</SettingsFile>

Просмотреть файл

@ -1,34 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<startup>
<supportedRuntime version="v4.0" sku=".NETFramework,Version=v4.7.2" />
</startup>
<appSettings configProtectionProvider="RsaProtectedConfigurationProvider">
<EncryptedData Type="http://www.w3.org/2001/04/xmlenc#Element"
xmlns="http://www.w3.org/2001/04/xmlenc#">
<EncryptionMethod Algorithm="http://www.w3.org/2001/04/xmlenc#aes256-cbc" />
<KeyInfo xmlns="http://www.w3.org/2000/09/xmldsig#">
<EncryptedKey xmlns="http://www.w3.org/2001/04/xmlenc#">
<EncryptionMethod Algorithm="http://www.w3.org/2001/04/xmlenc#rsa-oaep-mgf1p" />
<KeyInfo xmlns="http://www.w3.org/2000/09/xmldsig#">
<KeyName>Rsa Key</KeyName>
</KeyInfo>
<CipherData>
<CipherValue>V8N7vxvIXIVMARjoh9X2tVnXfPmUZVPLPnTIM+vwAy2R4cCM5SUBvqAma7LswAXPbrTBBHpgmUDbA5gaHhB9X0chT/SbvOgaST+OUWCV6h4T+fJbk3inh4JO+XE/jDcrYJXxkVhp3B5uNyuJVWRk/2SahliUA1Hp3AWZzBDhPlOSztgJYtqBCZIHuj9QIajstexXVC7CSpczPrfNy3Tb4ZHWt86L/xLFiIDE/r01gfxo/QadQClD6/7SQrvZRcPVTkNegjXkwgcZOz6fnVQeyj9O/yILOHk3HjO3vArfIk/RWtY5JVyxRmw4aRbV6ej1mhxopr0K8ZJSH6DwZLYWKg==</CipherValue>
</CipherData>
</EncryptedKey>
</KeyInfo>
<CipherData>
<CipherValue>Aj23BmHyt8L+8RaGZ3RmhuQ3s5+ua2pPCp1/uoNksaAePtnvvuNGBjWLX6m9p8KbJVwsyXJBjrS9jk19TPc4gJVRLJCZvXC4iVB45egUb9bIoB0E/erRykgskejMmVyjmS8lehIhO99oOezM8X8kvgXb+OMTQ/zG79T3k4GD8uUxqgfPgcWKD34Zq2rs35foZbg85mbIb8GPXM/P6I0F9h3LhItZodwq7+xtWSTmc2kSPt5wZk0UqIaGAEuh4fMrn/MKUpVsDkgkGmFiQMQoKg==</CipherValue>
</CipherData>
</EncryptedData>
</appSettings>
<runtime>
<assemblyBinding xmlns="urn:schemas-microsoft-com:asm.v1">
<dependentAssembly>
<assemblyIdentity name="System.Buffers" publicKeyToken="cc7b13ffcd2ddd51" culture="neutral" />
<bindingRedirect oldVersion="0.0.0.0-4.0.2.0" newVersion="4.0.2.0" />
</dependentAssembly>
</assemblyBinding>
</runtime>
</configuration>

Просмотреть файл

@ -1,182 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{D09FFD0B-7666-4974-86F3-7E130EC56E48}</ProjectGuid>
<OutputType>WinExe</OutputType>
<RootNamespace>eMammal_integration_application</RootNamespace>
<AssemblyName>eMammal-integration-application</AssemblyName>
<TargetFrameworkVersion>v4.7.2</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<ProjectTypeGuids>{60dc8134-eba5-43b8-bcc9-bb4bc16c2548};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
<WarningLevel>4</WarningLevel>
<AutoGenerateBindingRedirects>true</AutoGenerateBindingRedirects>
<Deterministic>true</Deterministic>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<PlatformTarget>AnyCPU</PlatformTarget>
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup>
<ApplicationIcon>favicon.ico</ApplicationIcon>
</PropertyGroup>
<ItemGroup>
<Reference Include="BouncyCastle.Crypto">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\BouncyCastle.Crypto.dll</HintPath>
</Reference>
<Reference Include="Google.Protobuf">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\Google.Protobuf.dll</HintPath>
</Reference>
<Reference Include="K4os.Compression.LZ4">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\K4os.Compression.LZ4.dll</HintPath>
</Reference>
<Reference Include="K4os.Compression.LZ4.Streams">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\K4os.Compression.LZ4.Streams.dll</HintPath>
</Reference>
<Reference Include="K4os.Hash.xxHash">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\K4os.Hash.xxHash.dll</HintPath>
</Reference>
<Reference Include="MySql.Data, Version=8.0.21.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL">
<HintPath>packages\MySql.Data.8.0.21\lib\net452\MySql.Data.dll</HintPath>
</Reference>
<Reference Include="Newtonsoft.Json, Version=12.0.0.0, Culture=neutral, PublicKeyToken=30ad4fe6b2a6aeed, processorArchitecture=MSIL">
<HintPath>packages\Newtonsoft.Json.12.0.3\lib\net45\Newtonsoft.Json.dll</HintPath>
</Reference>
<Reference Include="NLog, Version=4.0.0.0, Culture=neutral, PublicKeyToken=5120e14c03d0593c, processorArchitecture=MSIL">
<HintPath>packages\NLog.4.7.2\lib\net45\NLog.dll</HintPath>
</Reference>
<Reference Include="Renci.SshNet">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\Renci.SshNet.dll</HintPath>
</Reference>
<Reference Include="Serilog">
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\Serilog.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Buffers, Version=4.0.2.0, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Buffers.dll</HintPath>
</Reference>
<Reference Include="System.ComponentModel" />
<Reference Include="System.ComponentModel.DataAnnotations" />
<Reference Include="System.Configuration" />
<Reference Include="System.Configuration.Install" />
<Reference Include="System.Data" />
<Reference Include="System.Drawing" />
<Reference Include="System.Drawing.Design" />
<Reference Include="System.IO.Compression" />
<Reference Include="System.Management" />
<Reference Include="System.Memory, Version=4.0.1.1, Culture=neutral, PublicKeyToken=cc7b13ffcd2ddd51, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Memory.dll</HintPath>
</Reference>
<Reference Include="System.Numerics.Vectors, Version=4.1.3.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Numerics.Vectors.dll</HintPath>
</Reference>
<Reference Include="System.Runtime.CompilerServices.Unsafe, Version=4.0.4.1, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a, processorArchitecture=MSIL">
<SpecificVersion>False</SpecificVersion>
<HintPath>..\eMammal-integration\WPF-integration-app\bin\Debug\System.Runtime.CompilerServices.Unsafe.dll</HintPath>
</Reference>
<Reference Include="System.Runtime.Serialization" />
<Reference Include="System.ServiceModel" />
<Reference Include="System.Transactions" />
<Reference Include="System.Xml" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xaml">
<RequiredTargetFramework>4.0</RequiredTargetFramework>
</Reference>
<Reference Include="Ubiety.Dns.Core, Version=2.2.1.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL">
<HintPath>packages\MySql.Data.8.0.21\lib\net452\Ubiety.Dns.Core.dll</HintPath>
</Reference>
<Reference Include="WindowsBase" />
<Reference Include="PresentationCore" />
<Reference Include="PresentationFramework" />
<Reference Include="Zstandard.Net, Version=1.1.7.0, Culture=neutral, PublicKeyToken=c5687fc88969c44d, processorArchitecture=MSIL">
<HintPath>packages\MySql.Data.8.0.21\lib\net452\Zstandard.Net.dll</HintPath>
</Reference>
</ItemGroup>
<ItemGroup>
<ApplicationDefinition Include="App.xaml">
<Generator>MSBuild:Compile</Generator>
<SubType>Designer</SubType>
</ApplicationDefinition>
<Page Include="eMammalIntegrationWindow.xaml">
<Generator>MSBuild:Compile</Generator>
<SubType>Designer</SubType>
</Page>
<Compile Include="App.xaml.cs">
<DependentUpon>App.xaml</DependentUpon>
<SubType>Code</SubType>
</Compile>
<Compile Include="Category.cs" />
<Compile Include="Common.cs" />
<Compile Include="Constants.cs" />
<Compile Include="eMammalIntegration.cs" />
<Compile Include="eMammalIntegrationWindow.xaml.cs">
<DependentUpon>eMammalIntegrationWindow.xaml</DependentUpon>
<SubType>Code</SubType>
</Compile>
</ItemGroup>
<ItemGroup>
<Compile Include="eMammalMySQLOps.cs" />
<Compile Include="ImageTaxa.cs" />
<Compile Include="JsonData.cs" />
<Compile Include="Properties\AssemblyInfo.cs">
<SubType>Code</SubType>
</Compile>
<Compile Include="Properties\Resources.Designer.cs">
<AutoGen>True</AutoGen>
<DesignTime>True</DesignTime>
<DependentUpon>Resources.resx</DependentUpon>
</Compile>
<Compile Include="Properties\Settings.Designer.cs">
<AutoGen>True</AutoGen>
<DependentUpon>Settings.settings</DependentUpon>
<DesignTimeSharedInput>True</DesignTimeSharedInput>
</Compile>
<EmbeddedResource Include="Properties\Resources.resx">
<Generator>ResXFileCodeGenerator</Generator>
<LastGenOutput>Resources.Designer.cs</LastGenOutput>
</EmbeddedResource>
<None Include="nlog.config">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="packages.config" />
<None Include="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
</None>
</ItemGroup>
<ItemGroup>
<None Include="App.config" />
</ItemGroup>
<ItemGroup>
<Resource Include="favicon.ico" />
</ItemGroup>
<ItemGroup>
<Resource Include="images\elephants-277329_1280.jpg" />
<Resource Include="images\MS-AIforEarth.JPG" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>

Просмотреть файл

@ -1,25 +0,0 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.30309.148
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "eMammal-integration-application", "eMammal-integration-application.csproj", "{D09FFD0B-7666-4974-86F3-7E130EC56E48}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D09FFD0B-7666-4974-86F3-7E130EC56E48}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {10B34E61-E305-42E9-B53B-F1055C67A690}
EndGlobalSection
EndGlobal

Просмотреть файл

@ -1,314 +0,0 @@
using NLog;
using System.Data;
using System.Linq;
using System.Text;
using System.Windows;
namespace eMammal_integration_application
{
public class eMammalIntegration
{
eMammalIntegrationWindow window;
Logger logger = LogManager.GetCurrentClassLogger();
// Category constants
const string animal = "1";
const string person = "2";
const string vehicle = "3";
eMammalMySQLOps db;
public eMammalIntegration(eMammalIntegrationWindow window)
{
this.window = window;
db = new eMammalMySQLOps(window);
}
/// <summary>
/// Create list of image sequence and annotations for bulk insertion
/// Call function to insert or update annotations
/// Update progress bar and message
/// </summary>
/// <param name="data"></param>
/// <param name="deploymentId"></param>
/// <param name="eMammalCategory"></param>
public bool ProcessDetections(JsonData data, int deploymentId, string deploymentName, Category eMammalCategory)
{
StringBuilder logImages = new StringBuilder();
int totalImages = data.images.Count();
window.ProgressbarUpdateProgress.Maximum = totalImages;
logger.Info(Constants.LOG_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB);
Common.ShowProgress(window, Constants.PROGRESS_GETTING_IMAGE_SEQUENCE_DATA_FROM_DB, 1);
DataTable dtImageSequences = db.GetsequenceIDsfromDB(deploymentId);
int imageSequenceCount = -1;
if (dtImageSequences == null)
{
logger.Info(Constants.LOG_COULD_NOT_RETRIEVE_IMAGE_SEQUENCES_FROM_DATABASE);
return false;
}
else
imageSequenceCount = dtImageSequences.Rows.Count;
logger.Info(Constants.LOG_NUM_IMAGE_SEQUENCES + " " + dtImageSequences.Rows.Count.ToString());
if (imageSequenceCount == 0)
{
string msg = string.Format("The selected eMammal deployment {0} does not contain any images", deploymentName);
logger.Info(msg);
Common.SetMessage(window,msg,true);
return false;
}
int showProgressCount = 10;
showProgressCount = Common.GetShowProgressCount(showProgressCount, totalImages);
logger.Info(Constants.LOG_ITERATING_IMAGES_IN_JSON_FILE);
Common.ShowProgress(window, Constants.PROCESSING_IMAGES, 1);
// This variable will be set to true if there is atleast one matching image that matches the image (by name)
// in eMammal database that is in the
bool foundImage = false;
int logCount = 0;
int maxBulkInsertCount = 10000;
int count = 0;
int progressCount = 1;
bool recordsAdded = false;
bool imageNotFoundProgressSet = false;
StringBuilder sql = db.GetBulkInsertInitialString();
foreach (var image in data.images)
{
recordsAdded = false;
string filePath = image.file.Replace("/", "\\");
string imageName = System.IO.Path.GetFileName(filePath);
string imageplusLastFolderName = "";
var folders = filePath.Split(System.IO.Path.DirectorySeparatorChar);
var detections = image.detections;
float max_confidence = (float)image.max_detection_conf;
int currenteMammalCategory = eMammalCategory.blank;
logImages.Append(imageName + "\n");
logCount++;
LogProcessedImages(ref logImages, ref logCount);
if (folders.Length > 1)
imageplusLastFolderName = folders[folders.Length - 2].ToString() + "_" + imageName;
int imageSequenceId = FindSequenceId(dtImageSequences, imageName, imageplusLastFolderName);
progressCount++;
// if the image is not in the eMammal database continue to next image
if (imageSequenceId == -1)
{
Common.ShowProgress(window, string.Format("image: {0} not found in deployment {1}",
imageName, deploymentName), progressCount);
continue;
}
else
{
foundImage = true;
if (imageNotFoundProgressSet == true)
Common.ShowProgress(window, Constants.PROGRESS_CONTINUING_WITH_NEXT_IMAGE,
progressCount);
}
if (detections.Count() == 0)
{
sql.AppendFormat("('{0}', '{1}', '{2}'),", imageSequenceId, currenteMammalCategory, 1);
count++;
}
if (progressCount % showProgressCount == 0)
{
if (totalImages > imageSequenceCount)
Common.ShowProgress(window, string.Format("Processed {0} images",
progressCount.ToString(), totalImages.ToString()), progressCount);
else
Common.ShowProgress(window, string.Format("Processed {0} out of {1} images",
progressCount.ToString(), totalImages.ToString()), progressCount);
}
EnumerateDetections(eMammalCategory, ref count, ref sql, detections, max_confidence, ref currenteMammalCategory, imageSequenceId);
if (count >= maxBulkInsertCount)
{
logger.Info("Inserting {0} detections", maxBulkInsertCount.ToString());
count = 0;
bool success = db.BulkInsertAnnotations(sql);
if (!success)
return false;
sql = db.GetBulkInsertInitialString();
recordsAdded = true;
Common.ShowProgress(window,
string.Format("Inserting {0} detections", maxBulkInsertCount.ToString()),
progressCount, false);
}
}
if (logCount > 0)
logger.Info(logImages.ToString());
// Add remaining detections
if (!recordsAdded & foundImage)
{
Common.ShowProgress(window, Constants.PROGRESS_UPDATING_ANNOTATIONS_IN_DB, progressCount);
db.BulkInsertAnnotations(sql);
progressCount++;
if (data.images.Count < maxBulkInsertCount)
{
logger.Info(Constants.INSERTING_DETECTIONS);
Common.ShowProgress(window, Constants.INSERTING_DETECTIONS, progressCount);
}
else
{
logger.Info(Constants.INSERTING_REMAINING_DETECTIONS);
Common.ShowProgress(window, Constants.INSERTING_REMAINING_DETECTIONS, progressCount);
}
}
// The deployment does not contain any images that is within the provided JSON file
if (!foundImage)
{
logger.Info("No matching images found in " + deploymentName + " that match the image names in the provided JSON file");
Common.SetMessage(window, "No matching images found in " + deploymentName + " that match the image names in the provided JSON file", true);
return false;
}
logger.Info(Constants.ANNOTATIONS_ADDED_FOR_ALL_IMAGES);
//ShowProgress((int)window.ProgressbarUpdateProgress.Maximum, Constants.ANNOTATIONS_ADDED_FOR_ALL_IMAGES, true, true);
Common.ShowProgress(window, Constants.ANNOTATIONS_ADDED_FOR_ALL_IMAGES, (int)window.ProgressbarUpdateProgress.Maximum);
Common.delay();
db.CloseConnection();
return true;
}
private void LogProcessedImages(ref StringBuilder logImages, ref int logCount)
{
if (logCount > 100)
{
logger.Info(logImages.ToString());
logImages = new StringBuilder();
logCount = 0;
}
}
/// <summary>
/// Enumerate detections and udpate sql query
/// </summary>
/// <param name="eMammalCategory"></param>
/// <param name="count"></param>
/// <param name="sql"></param>
/// <param name="detections"></param>
/// <param name="max_confidence"></param>
/// <param name="currenteMammalCategory"></param>
/// <param name="imageSequenceId"></param>
private static void EnumerateDetections(Category eMammalCategory, ref int count, ref StringBuilder sql,
Detection[] detections, float max_confidence, ref int currenteMammalCategory, int imageSequenceId)
{
foreach (var d in detections)
{
// TODO: confirm json file is reading in detections correctly
if ((float)d.conf != max_confidence)
continue;
// map to selected eMammal categories
if (d.category == animal)
currenteMammalCategory = eMammalCategory.animal;
else if (d.category == person)
currenteMammalCategory = eMammalCategory.person;
else if (d.category == vehicle)
currenteMammalCategory = eMammalCategory.vehicle;
sql.AppendFormat("('{0}', '{1}', '{2}'),", imageSequenceId, currenteMammalCategory, 1);
count++;
}
}
private int FindSequenceId(DataTable imageSequences, string imageName, string lastFolderName)
{
foreach (DataRow row in imageSequences.Rows)
{
if (row["raw_name"].ToString() == imageName)
return (int)row["image_sequence_id"];
if (row["raw_name"].ToString() == lastFolderName)
return (int)row["image_sequence_id"];
}
return -1;
}
public bool VerifyAnnotations(int deploymentId)
{
DataTable dt = db.GetImagesForDeployment(deploymentId);
StringBuilder logInfo = new StringBuilder();
int count = 0;
int totalImages = dt.Rows.Count;
window.ProgressbarUpdateProgress.Maximum = dt.Rows.Count;
int progressCount = 0;
int showProgressCount = 10;
window.TextBlockInfo.Visibility = Visibility.Hidden;
showProgressCount = Common.GetShowProgressCount(showProgressCount, totalImages);
foreach (DataRow row in dt.Rows)
{
progressCount++;
string annotation = row[0].ToString() + " - " + row[3].ToString();
window.RichTextBoxResults.AppendText(annotation + "\n");
logInfo.Append(annotation);
count++;
if (count > showProgressCount)
{
logger.Info(logInfo.ToString());
logInfo = new StringBuilder();
count = 0;
Common.ShowProgress(window,
string.Format("Enumerating {0} annotations out of {1}",
progressCount.ToString(),
totalImages.ToString()),
progressCount);
}
}
if (logInfo.Length > 0)
{
logger.Info(logInfo.ToString());
}
return true;
}
}
}

Просмотреть файл

@ -1,95 +0,0 @@
<Window x:Class="eMammal_integration_application.eMammalIntegrationWindow"
xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"
xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"
xmlns:d="http://schemas.microsoft.com/expression/blend/2008"
xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006"
xmlns:local="clr-namespace:eMammal_integration_application"
mc:Ignorable="d"
Title="Microsoft AI for Earth - eMammal Integration App" Height="740" Width="795" Loaded="WindowLoaded" Initialized="WindowInitialized" ResizeMode="CanMinimize" Closing="WindowClosing">
<Grid Margin="0,0,0,0">
<Canvas Height="75" Width="710" Margin="0,0,0,330" Background="#fafafa">
<!--<Label Name="LabelError" Content="Erricror" HorizontalAlignment="Left" Margin="42,153,0,0" VerticalAlignment="Top" Foreground="red" FontSize="14" Width="700" Visibility="Hidden"></Label>-->
<TextBlock Name="TextBlockInfo" HorizontalAlignment="Left" Canvas.Left="0" Canvas.Top="5" TextWrapping="Wrap" Text="TextBlock" VerticalAlignment="Top" Foreground="Blue"
FontSize="14" Width="700" Visibility="Hidden" Padding="7"/>
<ProgressBar Name="ProgressbarUpdateProgress" Height="15" Width="690" Canvas.Top="20" Canvas.Left="5" Visibility="Hidden" />
<Label Name="LabelProgress" Content="" HorizontalAlignment="Left" Canvas.Top="30" VerticalAlignment="Top" Canvas.Left="5" Visibility="Hidden" FontSize="12"/>
</Canvas>
<Canvas Height="120" Width="790" Margin="0,0,0,570">
<Image Source="/images/MS-AIforEarth.JPG" Canvas.Left="30" Canvas.Top="12"/>
<Image Source="/images/elephants-277329_1280.jpg" Canvas.Left="35" Canvas.Top="-7" Width="710" Height="196"/>
<Button Name="ButtonVerify" Content="VERIFY" HorizontalAlignment="left" VerticalAlignment="Top" Height="35" Width="100" Canvas.Top="620"
Canvas.Left="50" Click="ButtonVerifyClick" Foreground="#005ce6" FontWeight="Bold" FontStretch="ExtraExpanded" BorderBrush="LightGray" Visibility="Hidden"/>
<Button Name="ButtonNext" Content="NEXT" HorizontalAlignment="Left" VerticalAlignment="Top" Height="35" Width="100" Canvas.Top="620"
Canvas.Left="640" Click="ButtonNextClick" Foreground="#005ce6" FontWeight="Bold" FontStretch="ExtraExpanded" BorderBrush="LightGray"/>
<Button Name="ButtonBack" Content="BACK" HorizontalAlignment="Left" VerticalAlignment="Top" Height="35" Width="100" Canvas.Top="620"
Canvas.Left="524" Click="ButtonBackClick" Foreground="#005ce6" FontWeight="Bold" FontStretch="ExtraExpanded" Visibility="Hidden" BorderBrush="LightGray"/>
<!--<Button Content="__" Canvas.Left="670" Canvas.Top="8" Background="White" Padding="10,2,10,5" BorderBrush="LightGray" Click="ButtonMinimizeClick"/>
<Button Content="X" Canvas.Left="710" Canvas.Top="8" Background="White" Padding="10,5,10,5" FontFamily="serif" BorderBrush="LightGray" Click="ButtonCloseClick"/>-->
</Canvas>
<TabControl Name="Tab" Width="690" Height="390" Margin="35,210,42,72" BorderBrush="Gray" BorderThickness="1" SelectionChanged="TabSelectionChanged">
<TabItem Name="TabDetails" Header="Details" Height="30" Width="80" BorderBrush="White" BorderThickness="2" FontWeight="DemiBold">
<Canvas>
<TextBox Name="TextBoxJsonFile" Text="" Margin="40,50,0,15" TextWrapping="Wrap" Width="480" Height="30" VerticalContentAlignment="Center" IsReadOnly="True" TextChanged="TextBoxJsonTextChanged"/>
<Label Name="LabelJsonFileError" Content="Error" Canvas.Top="77" Canvas.Left="34" Foreground="Red" Visibility="Hidden"/>
<Button Name="ButtonBrowse" Content="BROWSE" HorizontalAlignment="Left" Margin="550,50,0,0" VerticalAlignment="Top" Height="30" Width="100" Click="ButtonBrowseJsonClick"
Foreground="#005ce6" FontWeight="DemiBold" BorderBrush="LightGray"/>
<GroupBox Header="eMammal details" Height="220" Width="600" Margin="45,110,0,15">
<Canvas>
<ComboBox Name="comboBoxProject" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="180,30,0,0" Width="300" Height="30"
SelectionChanged="ComboBoxProjectSelectionChanged" FontWeight="DemiBold"/>
<ComboBox Name="comboBoxSubProject" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="180,90,0,0" Width="300" Height="30"
SelectionChanged="ComboBoxSubProjectSelectionChanged" FontWeight="DemiBold"/>
<ComboBox Name="comboBoxDeployment" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="180,150,0,0" Width="300" Height="30" FontWeight="DemiBold"/>
<Label Content="Project :" Canvas.Left="100" Canvas.Top="30"/>
<Label Content="Sub project :" Canvas.Left="90" Canvas.Top="90"/>
<Label Content="Deployment :" Canvas.Left="90" Canvas.Top="150"/>
</Canvas>
</GroupBox>
</Canvas>
</TabItem>
<TabItem Name="TabClassMapping" Header="Category Mapping" Width="140" FontWeight="DemiBold" BorderBrush="White" IsEnabled="False">
<Grid Background="White">
<Grid.ColumnDefinitions>
<ColumnDefinition Width="13*"/>
<ColumnDefinition Width="334*"/>
</Grid.ColumnDefinitions>
<Canvas Margin="20,50,0,0" Name="CanvasClassMapping">
<ComboBox Name="cmbProjectTaxaMappingAnimal" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,50,0,0" Width="150" Height="30" />
<ComboBox Name="cmbProjectTaxaMappingPerson" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,100,0,0" Width="150" Height="30"/>
<ComboBox Name="cmbProjectTaxaMappingVehicle" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,150,0,0" Width="150" Height="30"/>
<ComboBox Name="cmbProjectTaxaMappingBlank" HorizontalAlignment="Left" VerticalContentAlignment="Center" Margin="310,200,0,0" Width="150" Height="30"/>
<Label Content="Animal" Margin="170,50,0,0" Width="100" Height="30"/>
<Label Content="Person" Margin="170,100,0,0" Width="100" Height="30"/>
<Label Content="Vehicle" Margin="170,150,0,0" Width="100" Height="30"/>
<Label Content="Blank" Margin="170,200,0,0" Width="100" Height="30"/>
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,60,0,0"/>
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,110,0,0"/>
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,160,0,0"/>
<Path Stroke="Black" Data="M 0 4 L 16 4 L 10 0 M 16 4 L 10 8" Margin="250,210,0,0"/>
</Canvas>
</Grid>
</TabItem>
<TabItem Name="TabResults" Header="Results" Width="100" FontWeight="DemiBold" BorderBrush="White" IsEnabled="False"
Visibility="Hidden" Height="32" Margin="0,-2,0,0" VerticalAlignment="Top">
<Grid Background="White">
<Canvas Margin="0,0,0,0" Grid.ColumnSpan="2">
<RichTextBox Name="RichTextBoxResults" ScrollViewer.VerticalScrollBarVisibility="Auto" Grid.Column="1" HorizontalAlignment="Left" Height="350" Margin="0,0,0,0" VerticalAlignment="Top" Width="678" BorderBrush="White">
<FlowDocument>
<Paragraph>
</Paragraph>
</FlowDocument>
</RichTextBox>
</Canvas>
</Grid>
</TabItem>
</TabControl>
</Grid>
</Window>

Просмотреть файл

@ -1,565 +0,0 @@
using MySql.Data.MySqlClient;
using Newtonsoft.Json;
using NLog;
using System;
using System.Collections.Generic;
using System.Data;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows;
using System.Windows.Controls;
using System.Windows.Data;
using System.Windows.Documents;
using System.Windows.Input;
using System.Windows.Media;
using System.Windows.Media.Imaging;
using System.Windows.Navigation;
using System.Windows.Shapes;
using System.Windows.Threading;
namespace eMammal_integration_application
{
/// <summary>
/// Interaction logic for eMammalIntegrationWindow.xaml
/// </summary>
public partial class eMammalIntegrationWindow : Window
{
Logger logger = LogManager.GetCurrentClassLogger();
eMammalMySQLOps db;
eMammalIntegration eMammalIntegration;
//double tabTopOriginalMargin;
//double originalHeight;
public eMammalIntegrationWindow()
{
InitializeComponent();
db = new eMammalMySQLOps(this);
//tabTopOriginalMargin = Tab.Margin.Top;
eMammalIntegration = new eMammalIntegration(this);
}
private void WindowInitialized(object sender, EventArgs e)
{
WindowStartupLocation = System.Windows.WindowStartupLocation.CenterScreen;
}
private void WindowLoaded(object sender, RoutedEventArgs e)
{
if (db.OpenConnectionIfNotOpen(true))
{
logger.Info(Constants.LOG_MESSAGE_APP_CONNECTED_TO_DATABASE);
Loadproject();
logger.Info(Constants.LOG_MESSAGE_PROJECT_LOADED);
}
else
{
logger.Info(Constants.LOG_APP_COULD_NOT_CONNECT_TO_DATABASE);
Common.SetMessage(this, Constants.DATABASE_CONNECTION_ERROR, true, true);
this.IsEnabled = false;
DisableButton(ButtonNext);
DisableButton(ButtonBack);
DisableButton(ButtonBrowse);
this.Activate();
Thread thread = new Thread(() => Common.CheckConnection(this, true));
thread.Start();
}
}
private void WindowClosing(object sender, System.ComponentModel.CancelEventArgs e)
{
logger.Info(Constants.LOG_APP_CLOSING);
db.CloseConnection();
}
private void TabSelectionChanged(object sender, SelectionChangedEventArgs e)
{
if (Tab.SelectedIndex == 0 | Tab.SelectedIndex == 1)
{
TabResults.Visibility = Visibility.Hidden;
}
if (Tab.SelectedIndex == 0)
{
ButtonBack.Visibility = Visibility.Hidden;
}
if (Tab.SelectedIndex == 1)
{
ButtonBack.Visibility = Visibility.Visible;
}
}
private void ButtonNextClick(object sender, RoutedEventArgs e)
{
try
{
//this.Tab.Margin = new Thickness(Tab.Margin.Left, tabTopOriginalMargin, Tab.Margin.Right, Tab.Margin.Bottom);
TabResults.Visibility = Visibility.Hidden;
ResetControlsAfterProcessing();
TextBlockInfo.Text = "";
TextBlockInfo.Visibility = Visibility.Hidden;
if (Tab.SelectedIndex == 0)
{
if (String.IsNullOrEmpty(TextBoxJsonFile.Text))
{
SetInvalidJsonError(Constants.NO_JSON_FILE_ERROR);
return;
}
else
{
if (!IsJsonFile())
return;
}
TabClassMapping.IsEnabled = true;
Tab.SelectedIndex = 1;
LoadCategoryMappings();
ButtonBack.Visibility = Visibility.Visible;
}
else
{
TabClassMapping.IsEnabled = false;
CanvasClassMapping.IsEnabled = false;
TabDetails.IsEnabled = false;
ButtonBack.IsEnabled = false;
ButtonNext.IsEnabled = false;
ButtonBack.Foreground = new SolidColorBrush(Colors.Gray);
ButtonNext.Foreground = new SolidColorBrush(Colors.Gray);
// Invoking change in one element to Refresh UI with the above changes
ButtonBack.Dispatcher.Invoke(() => ButtonBack.Foreground = new SolidColorBrush(Colors.Gray), DispatcherPriority.Background);
var data = LoadJson(TextBoxJsonFile.Text);
int deploymentId = (int)comboBoxDeployment.SelectedValue;
int eMammalBlankCategory = (int)cmbProjectTaxaMappingBlank.SelectedValue;
int eMammalAnimalCategory = (int)cmbProjectTaxaMappingAnimal.SelectedValue;
int eMammalPersonCategory = (int)cmbProjectTaxaMappingPerson.SelectedValue;
int eMammalVehicleCategory = (int)cmbProjectTaxaMappingVehicle.SelectedValue;
if (ProgressbarUpdateProgress.Maximum == 0)
ProgressbarUpdateProgress.Maximum = 1;
// This makes inserts into the eMammal app much faster
db.AddUniqueKeySequenceTaxa();
logger.Info(Constants.LOG_START_PROCESSING_IMAGES);
Common.ShowProgress(this, Constants.PROCESSING_IMAGES, 1);
bool success = eMammalIntegration.ProcessDetections(data, deploymentId, comboBoxDeployment.Text, new Category()
{
blank = eMammalBlankCategory,
animal = eMammalAnimalCategory,
person = eMammalPersonCategory,
vehicle = eMammalVehicleCategory
});
if (success)
{
ButtonVerify.Visibility = Visibility.Visible;
//Tab.Margin = new Thickness(Tab.Margin.Left, Tab.Margin.Top + 50, Tab.Margin.Right, Tab.Margin.Bottom);
//Tab.Visibility = Visibility.Hidden;
TextBlockInfo.Text = "";
TextBlockInfo.Inlines.Add("Processed all images in the JSON file.");
TextBlockInfo.Inlines.Add(" Open and close the eMammal application, then in the eMammal application select ");
TextBlockInfo.Inlines.Add("project >");
Run run = new Run(comboBoxProject.Text);
run.FontWeight = FontWeights.Bold;
TextBlockInfo.Inlines.Add(run);
TextBlockInfo.Inlines.Add(" sub-project >");
run = new Run(comboBoxSubProject.Text);
run.FontWeight = FontWeights.Bold;
TextBlockInfo.Inlines.Add(run);
TextBlockInfo.Inlines.Add(" deployment > ");
run = new Run(comboBoxDeployment.Text);
run.FontWeight = FontWeights.Bold;
TextBlockInfo.Inlines.Add(run);
TextBlockInfo.Foreground = new SolidColorBrush(Colors.Blue);
TextBlockInfo.Visibility = Visibility.Visible;
ReactivateButton(ButtonNext);
ReactivateButton(ButtonBack);
DisableButton(ButtonNext);
//this.Activate();
ResetControlsAfterProcessing();
DisableButton(ButtonNext);
}
else
{
ResetControlsAfterProcessing();
DisableButton(ButtonNext);
}
}
}
catch (Exception ex)
{
Common.HideProgress(this);
HandleExceptions(ex);
}
}
public void ReactivateButton(Button button)
{
button.IsEnabled = true;
button.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
}
private void DisableButton(Button button)
{
button.IsEnabled = false;
button.Foreground = new SolidColorBrush(Colors.Gray);
}
/// <summary>, ta
/// Remove progress bar and message, re-enable back and next buttons after processing
/// </summary>
private void ResetControlsAfterProcessing()
{
LabelProgress.Dispatcher.Invoke(() => LabelProgress.Content = "", DispatcherPriority.Background);
LabelProgress.Dispatcher.Invoke(() => LabelProgress.Visibility
= Visibility.Hidden, DispatcherPriority.Background);
ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
ProgressbarUpdateProgress.Value = 0, DispatcherPriority.Background);
ProgressbarUpdateProgress.Dispatcher.Invoke(() =>
ProgressbarUpdateProgress.Visibility = Visibility.Hidden, DispatcherPriority.Background);
TabDetails.IsEnabled = true;
TabClassMapping.IsEnabled = true;
CanvasClassMapping.IsEnabled = true;
ButtonBack.IsEnabled = true;
ButtonNext.IsEnabled = true;
ButtonBack.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
ButtonNext.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
}
/// <summary>
/// Browse button for selecting a json file
/// When the button is clicked file dialog opens from which user can
/// select a json file
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void ButtonBrowseJsonClick(object sender, RoutedEventArgs e)
{
// TODO: change this code copied from web
Microsoft.Win32.OpenFileDialog openFileDlg = new Microsoft.Win32.OpenFileDialog();
Nullable<bool> result = openFileDlg.ShowDialog();
if (result == true)
TextBoxJsonFile.Text = openFileDlg.FileName;
}
private void ComboBoxProjectSelectionChanged(object sender, SelectionChangedEventArgs e)
{
if (IsComboBoxLoaded(sender))
LoadSubProject();
}
private void ComboBoxSubProjectSelectionChanged(object sender, SelectionChangedEventArgs e)
{
if (IsComboBoxLoaded(sender))
LoadDeployment();
}
/// <summary>
/// Switch back to details tab
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void ButtonBackClick(object sender, RoutedEventArgs e)
{
Tab.SelectedIndex = 0;
Tab.Visibility = Visibility.Visible;
ButtonNext.IsEnabled = true;
ButtonNext.Foreground = new System.Windows.Media.SolidColorBrush((Color)ColorConverter.ConvertFromString("#005ce6"));
ButtonBack.Visibility = Visibility.Hidden;
ButtonVerify.Visibility = Visibility.Hidden;
TabResults.Visibility = Visibility.Hidden;
}
/// <summary>
/// Text box changed event for json file textbox
/// Hide error message and change border of textbox from red to black
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
private void TextBoxJsonTextChanged(object sender, TextChangedEventArgs e)
{
LabelJsonFileError.Visibility = Visibility.Hidden;
TextBoxJsonFile.BorderBrush = Brushes.Black;
}
/// <summary>
/// Loads the eMammal project id and names
/// </summary>
public void Loadproject()
{
DataTable dt = db.GetProjectDetails();
FillDrodownLists(comboBoxProject, dt, "name", "project_id");
}
/// <summary>
/// Loads the eMammal sub project id and names
/// </summary>
private void LoadSubProject()
{
DataTable dt = db.GetSubProjectDetails(comboBoxProject.SelectedValue.ToString());
FillDrodownLists(comboBoxSubProject, dt, "name", "event_id");
}
/// <summary>
/// Loads the eMammal deployment id and names
/// </summary>
private void LoadDeployment()
{
bool success;
DataTable dt = db.GetDeploymentDetails(out success, comboBoxSubProject.SelectedValue.ToString());
FillDrodownLists(comboBoxDeployment, dt, "name", "deployment_id");
}
private void FillDrodownLists(ComboBox combobox, DataTable dt, string displayMemberPath,
string SelectedValuePath)
{
combobox.ItemsSource = dt.DefaultView;
combobox.DisplayMemberPath = displayMemberPath;
combobox.SelectedValuePath = SelectedValuePath;
combobox.SelectedIndex = 0;
}
private bool IsComboBoxLoaded(object sender)
{
var comboBox = (ComboBox)sender;
if (!comboBox.IsLoaded)
return false;
return true;
}
/// <summary>
/// Sets error message in a label
/// </summary>
/// <param name="message"></param>
private void SetInvalidJsonError(string message)
{
TextBoxJsonFile.BorderBrush = Brushes.Red;
LabelJsonFileError.Content = message;
LabelJsonFileError.Visibility = Visibility.Visible;
}
/// <summary>
/// Checks if a file provided is a JSON file
/// </summary>
/// <returns></returns>
private bool IsJsonFile()
{
string ext = System.IO.Path.GetExtension(TextBoxJsonFile.Text);
if (ext.ToLower() != ".json")
{
SetInvalidJsonError("Please select a valid JSON file");
return false;
}
return true;
}
/// <summary>
///
/// </summary>
private void LoadCategoryMappings()
{
if (!cmbProjectTaxaMappingAnimal.HasItems)
{
var taxas = db.GetEmammalTaxas((int)comboBoxProject.SelectedValue);
FillDrodownLists(cmbProjectTaxaMappingAnimal, taxas, "species", "emammal_project_taxa_id");
FillDrodownLists(cmbProjectTaxaMappingPerson, taxas, "species", "emammal_project_taxa_id");
FillDrodownLists(cmbProjectTaxaMappingVehicle, taxas, "species", "emammal_project_taxa_id");
FillDrodownLists(cmbProjectTaxaMappingBlank, taxas, "species", "emammal_project_taxa_id");
// Set the initial category in the category mapping dropdown lists
SetPossibleCategory(cmbProjectTaxaMappingAnimal, "unknown animal");
SetPossibleCategory(cmbProjectTaxaMappingPerson, "homo sapiens");
SetPossibleCategory(cmbProjectTaxaMappingVehicle, "vehicle");
SetPossibleCategory(cmbProjectTaxaMappingBlank, "no animal");
}
}
///<summary>
/// Sets the initial category mapping in comboboxes
/// in the category mapping section
/// </summary>
/// <param name="comboBox"></param>
/// <param name="text"></param>
private void SetPossibleCategory(ComboBox comboBox, string text)
{
foreach (Object item in comboBox.Items)
{
DataRowView row = item as DataRowView;
if (row != null)
{
string displayValue = row["species"].ToString();
if (displayValue.ToLower() == text)
comboBox.SelectedIndex = comboBox.Items.IndexOf(item);
}
}
}
/// <summary>
/// Loads json file into JsonData object
/// </summary>
/// <param name="inputFileName"></param>
/// <returns></returns>
private JsonData LoadJson(string inputFileName)
{
string json = File.ReadAllText(TextBoxJsonFile.Text);
var data = JsonConvert.DeserializeObject<JsonData>(json);
return data;
}
private void DisableTabs()
{
TabClassMapping.IsEnabled = false;
TabDetails.IsEnabled = false;
}
private void EnableTabs()
{
TabClassMapping.IsEnabled = true;
TabDetails.IsEnabled = true;
}
private void ButtonVerifyClick(object sender, RoutedEventArgs e)
{
try
{
logger.Info("Verifying images...");
Mouse.OverrideCursor = System.Windows.Input.Cursors.Wait;
//Common.delay(100);
DisableButton(ButtonBack);
DisableButton(ButtonNext);
ButtonVerify.Visibility = Visibility.Hidden;
TabDetails.IsEnabled = false;
TabClassMapping.IsEnabled = false;
TabResults.IsEnabled = true;
int deploymentId = (int)comboBoxDeployment.SelectedValue;
RichTextBoxResults.AppendText("\n");
bool success = eMammalIntegration.VerifyAnnotations(deploymentId);
Tab.SelectedIndex = 2;
TabResults.Visibility = Visibility.Visible;
TabResults.IsEnabled = true;
ButtonVerify.Visibility = Visibility.Hidden;
ResetControlsAfterProcessing();
ReactivateButton(ButtonNext);
ReactivateButton(ButtonBack);
TabDetails.IsEnabled = true;
TabClassMapping.IsEnabled = true;
TextBlockInfo.Visibility = Visibility.Visible;
if (!success)
TabResults.Visibility = Visibility.Hidden;
}
catch (Exception ex)
{
HandleExceptions(ex);
}
finally
{
Mouse.OverrideCursor = System.Windows.Input.Cursors.Arrow;
}
}
private void HandleExceptions(Exception ex)
{
logger.Error(ex.ToString());
if (ex is MySqlException)
{
HandleSQLExceptions(ex as MySqlException);
Common.HideProgress(this);
Thread thread = new Thread(() => Common.CheckConnection(this));
thread.Start();
return;
}
else
{
Common.HideProgress(this);
}
MessageBox.Show(ex.Message);
}
private void HandleSQLExceptions(MySqlException ex)
{
Tab.IsEnabled = false;
Common.HideProgress(this);
int number = -1;
if (ex.InnerException != null && ex.InnerException is MySqlException)
{
number = ((MySqlException)ex.InnerException).Number;
logger.Error(ex.InnerException.ToString());
}
if (number == 0 || ex.Number == 1042)
Common.SetMessage(this, Constants.DATABASE_CONNECTION_ERROR, true, true);
else if (ex.InnerException != null)
{
// no way to get the errorcode from inner exception therefore using this method
if (ex.InnerException.InnerException != null)
{
string errmsg = ex.InnerException.InnerException.Message;
if (errmsg.Contains("120.0.0.1:3307") && errmsg.Contains("No connection could be made"))
Common.SetMessage(this, Constants.DATABASE_CONNECTION_ERROR, true, true);
}
else
Common.SetMessage(this, ex.InnerException.Message, true, true);
}
else
Common.SetMessage(this, ex.Message, true, true);
}
}
}

Просмотреть файл

@ -1,315 +0,0 @@
using System;
using System.Configuration;
using System.Text;
using System.Data;
using MySql.Data.MySqlClient;
using NLog;
namespace eMammal_integration_application
{
public class eMammalMySQLOps
{
Logger logger = LogManager.GetCurrentClassLogger();
eMammalIntegrationWindow window;
private string mysqlConnectionstring = ConfigurationManager.AppSettings["mysqlConnectionstring"].ToString();
MySqlConnection connection = new MySqlConnection();
public eMammalMySQLOps(eMammalIntegrationWindow window)
{
this.window = window;
connection = new MySqlConnection(mysqlConnectionstring);
}
public eMammalMySQLOps()
{
connection = new MySqlConnection(mysqlConnectionstring);
}
public bool OpenConnectionIfNotOpen(bool returnOnError = false)
{
try
{
if (connection.State == ConnectionState.Closed)
{
logger.Info(Constants.LOG_OPEN_CLOSED_DATABASE_CONNECTION);
connection.Open();
}
logger.Info(Constants.LOG_OPENING_CLOSED_DATABASE_CONNECTION_SUCCESSFULL);
return true;
}
catch (Exception ex)
{
if (returnOnError)
return false;
throw;
}
}
public void CloseConnection()
{
try
{
if (connection.State == ConnectionState.Open)
{
connection.Close();
logger.Info(Constants.LOG_CLOSING_OPEN_DATABASE_CONNECTION);
}
else
logger.Info(Constants.LOG_DATABASE_CONNECTION_NOT_OPEN);
}
catch (Exception ex)
{
logger.Info(Constants.LOG_ERROR_WHILE_CLOSING_DATABASE_CONNECTION);
logger.Error(ex.ToString());
}
}
public bool IsConnectionOpen()
{
try
{
if (connection.State == ConnectionState.Open)
return true;
return false;
}
catch (Exception ex)
{
return false;
}
}
// TODO: add error checking return null on error
/// <summary>
/// This function called for select statements, returning multiple rows
/// </summary>
/// <param name="query"></param>
/// <returns></returns>
public DataTable GeData(string query)
{
DataTable dt = new DataTable();
using (MySqlCommand command = new MySqlCommand(query, connection))
{
command.CommandType = CommandType.Text;
dt.Load(command.ExecuteReader());
}
return dt;
}
/// <summary>
/// This function is called for inserting or updating data in the DB
/// </summary>
/// <param name="query">SQL query string</param>
public void ExecuteQuery(string query)
{
//using (MySqlConnection connection = new MySqlConnection(mysqlConnectionstring))
//{
//connection.Open();
using (MySqlCommand command = new MySqlCommand(query, connection))
{
command.CommandType = CommandType.Text;
command.CommandText = query;
int result = command.ExecuteNonQuery();
}
}
/// <summary>
/// This function is called for returning a single value from DB
/// </summary>
/// <param name="query">SQL query string</param>
/// <returns></returns>
public object ExecuteScalar(string query)
{
OpenConnectionIfNotOpen();
using (MySqlCommand command = new MySqlCommand(query, connection))
{
command.CommandType = CommandType.Text;
command.CommandText = query;
Object result = null;
result = command.ExecuteScalar();
return result;
}
}
/// <summary>
/// Add unique for sequenceid, projecttaxaid to prevent duplicate inserts
/// </summary>
public void AddUniqueKeySequenceTaxa()
{
string sql = " SELECT constraint_name" +
" FROM information_schema.TABLE_CONSTRAINTS" +
" WHERE table_name = 'emammal_sequence_annotation'" +
" AND constraint_name = 'ai4e_unique_key'";
logger.Info(Constants.LOG_CHECKING_IF_UNIQUE_KEY_ALREADY_EXISTS);
var result = ExecuteScalar(sql);
if (result == null)
{
sql = " ALTER TABLE emammal_sequence_annotation " +
" ADD CONSTRAINT ai4e_unique_key UNIQUE KEY(sequence_id, project_taxa_id); ";
logger.Info(Constants.LOG_ADDING_UNIQUE_KEY_CONSTRAINT);
logger.Info(sql);
ExecuteQuery(sql);
}
else
{
logger.Info(Constants.LOG_UNIQUE_KEY_ALREADY_EXISTS);
}
}
/// <summary>
/// Get sequenceids for all the images in a deployment
/// </summary>
/// <param name="deploymentId"></param>
/// <returns></returns>
public DataTable GetsequenceIDsfromDB(int deploymentId)
{
string sql = string.Format(" SELECT b.raw_name, b.image_sequence_id " +
" FROM wild_ID.image_sequence a, wild_id.image b " +
" WHERE a.image_sequence_id = b.image_sequence_id " +
" AND a.deployment_id = {0}; ", deploymentId);
string mysqlConnectionstring = ConfigurationManager.AppSettings["mysqlConnectionstring"].ToString();
DataTable dt = new DataTable("imageSequences");
using (MySqlConnection connection = new MySqlConnection(mysqlConnectionstring))
{
OpenConnectionIfNotOpen();
using (MySqlDataAdapter adapter = new MySqlDataAdapter(sql, connection))
{
adapter.Fill(dt);
return dt;
}
}
}
public DataTable GetEmammalTaxas(int projectId)
{
string sql = string.Format(" SELECT species, emammal_project_taxa_id FROM wild_id.emammal_project_taxa " +
" WHERE project_id = {0}", projectId);
DataTable dt = GetDataTable(sql, "ProjectDetails");
return dt;
}
public DataTable GetProjectDetails()
{
// Get eMammal project name and ids
string sql = " SELECT e.project_id, " +
" CONCAT('p', '-', e.project_id, ' ', p.name ) as name " +
" FROM wild_id.project p, wild_id.emammal_project e " +
" WHERE p.project_id = e.project_id ";
DataTable dt = GetDataTable(sql, "ProjectDetails");
return dt;
}
public DataTable GetSubProjectDetails(string projectId)
{
// Get eMammal project name and ids
string sql = string.Format(" SELECT e.event_id, " +
" CONCAT('sp', '-', e.event_id, ' ', e.name ) as name " +
" FROM wild_id.event e " +
" WHERE e.project_id = {0} ", projectId);
DataTable dt = GetDataTable(sql, "SubProjectDetails");
return dt;
}
public DataTable GetDeploymentDetails(out bool success, string eventId)
{
success = false;
// Get eMammal project name and ids
string sql = string.Format(" SELECT d.deployment_id, " +
" CONCAT('d', '-', d.deployment_id, ' ', d.name ) as name " +
" FROM deployment d, emammal_deployment e " +
" WHERE d.deployment_id = e.deployment_id " +
" AND event_id = {0} ", eventId);
DataTable dt = GetDataTable(sql, "DeploymentsDetails");
return dt;
}
public DataTable GetDataTable(string sql, string type)
{
DataTable dt = new DataTable();
OpenConnectionIfNotOpen();
using (MySqlDataAdapter adapter = new MySqlDataAdapter(sql, connection))
{
adapter.Fill(dt);
return dt;
}
}
public StringBuilder GetBulkInsertInitialString()
{
StringBuilder sql = new StringBuilder("INSERT INTO wild_id.emammal_sequence_annotation(sequence_id, project_taxa_id, total_count) VALUES ");
return sql;
}
public bool BulkInsertAnnotations(StringBuilder sql)
{
string loginfo = "";
string sqlString = sql.ToString().Remove(sql.Length - 1);
sqlString += " ON DUPLICATE KEY UPDATE " +
" sequence_id = VALUES(sequence_id)," +
" project_taxa_id = VALUES(project_taxa_id), " +
" total_count = VALUES(total_count);";
loginfo += "\n" + sqlString;
OpenConnectionIfNotOpen();
using (MySqlCommand cmd = new MySqlCommand(sql.ToString(), connection))
{
cmd.CommandType = CommandType.Text;
cmd.CommandText = sqlString;
cmd.ExecuteNonQuery();
}
return true;
}
public DataTable GetImagesForDeployment(int deploymentId)
{
logger.Info("Starting verification...");
string sql = string.Format(" SELECT b.raw_name, b.image_sequence_id, deployment_id, d.common_name " +
" FROM wild_ID.image_sequence a, wild_id.image b, " +
" wild_id.emammal_sequence_annotation c, " +
" wild_id.emammal_project_taxa d " +
" WHERE a.image_sequence_id = b.image_sequence_id " +
" AND c.sequence_id = a.image_sequence_id " +
" AND c.project_taxa_id = d.emammal_project_taxa_id " +
" AND a.deployment_id = {0} order by b.raw_name", deploymentId);
logger.Info(sql);
DataTable dt = new DataTable();
dt = GetDataTable(sql, "");
return dt;
}
}
}

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 4.2 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 5.5 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 91 KiB

Просмотреть файл

@ -1,18 +0,0 @@
<?xml version="1.0" encoding="utf-8" ?>
<nlog
xmlns="http://www.nlog-project.org/schemas/NLog.xsd"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<targets>
<target name="file" xsi:type="File"
layout="-------------- ${level} (${longdate}) --------------${newline}
${newline}
Call Site: ${callsite}${newline}
${newline}
${message}${newline}" fileName="log.log" archiveOldFileOnStartup="true" maxArchiveFiles="1"/>
</targets>
<rules>
<logger name="*" minlevel="Trace" writeTo="file" />
</rules>
</nlog>

Просмотреть файл

@ -1,17 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="BouncyCastle" version="1.8.3.1" targetFramework="net472" />
<package id="Google.Protobuf" version="3.15.0" targetFramework="net472" />
<package id="K4os.Compression.LZ4" version="1.1.11" targetFramework="net472" />
<package id="K4os.Compression.LZ4.Streams" version="1.1.11" targetFramework="net472" />
<package id="K4os.Hash.xxHash" version="1.0.6" targetFramework="net472" />
<package id="MySql.Data" version="8.0.21" targetFramework="net472" />
<package id="Newtonsoft.Json" version="13.0.1" targetFramework="net472" />
<package id="NLog" version="4.7.2" targetFramework="net472" />
<package id="Serilog" version="2.9.0" targetFramework="net472" />
<package id="SSH.NET" version="2020.0.2" targetFramework="net472" />
<package id="System.Buffers" version="4.5.0" targetFramework="net472" />
<package id="System.Memory" version="4.5.3" targetFramework="net472" />
<package id="System.Numerics.Vectors" version="4.4.0" targetFramework="net472" />
<package id="System.Runtime.CompilerServices.Unsafe" version="4.6.0" targetFramework="net472" />
</packages>

Просмотреть файл

@ -1,23 +0,0 @@
# Troubleshooting eMammal app installation errors
If you get the following error during installation:
![](images/error-invoking-method.jpg)
![](images/failed-to-launch-JVM.jpg)
...try the following:
1. If the JDK is not installed in the computer, install the latest JDK version (be sure to install it as administrator).
2. If JDK is already installed, uninstall and reinstall the JDK (be sure to install it as administrator).
3. After JDK is installed, make sure that the JDK path is include in the system path. [This tutorial](https://javatutorial.net/set-java-home-windows-10) shows how to add the JDK path
to the system path in Windows 10.
4. If you have MySQL server installed on your machine, uninstall MySQL server or turn off the MySQL service. To turn off MySQL server, open
the services.msc app in Windows, find the MySQL service, right-click on the service, and click "stop".

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 132 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 76 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 74 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 105 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 87 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 11 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 9.5 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 111 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 70 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 91 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 131 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 110 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 124 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 100 KiB

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 118 KiB

Просмотреть файл

@ -1,5 +0,0 @@
host="localhost"
username="root"
password=""
port=3307
database="wild_id"

Просмотреть файл

@ -1,127 +0,0 @@
#
# Test script for pushing annotations to the eMammal db
#
#%% Imports
import sys
import json
import argparse
import pymysql
import config as cfg
from tqdm import tqdm
from enum import Enum
#%% Database functions
class Categories(Enum):
animal = 1
person = 2
vehicle = 3
mysql_connection = pymysql.connect( host=cfg.host,
user=cfg.username,
passwd=cfg.password,
db=cfg.database,
port=cfg.port)
def update_data(sql):
with mysql_connection.cursor() as cursor:
cursor.execute(sql)
def get_records_all(sql):
with mysql_connection.cursor() as cursor:
sql = sql
cursor.execute(sql)
rows = cursor.fetchall()
return rows
def format_data_print_deployments(rows):
count = 0
result = []
for row in rows:
count += 1
print("{}. {}-{}".format(str(count), row[0],row[1]))
result.append((count, row[0], row[1]))
return result
#%% Command-line driver
def main():
parser = argparse.ArgumentParser()
parser.add_argument('input_file', type=str, help='Input .json filename')
if len(sys.argv[1:]) == 0:
parser.print_help()
parser.exit()
args = parser.parse_args()
print("Enter the number of the deployment:")
rows = get_records_all(''' select * from deployment ''')
deployments = format_data_print_deployments(rows)
print("\n")
deployment_choice = input()
deployment_id = deployments[int(deployment_choice)][1]
print(deployment_id)
# TODO: check project ID ?
sql = ''' SELECT emammal_project_taxa_id FROM wild_id.emammal_project_taxa
where species in ("No Animal", "Unknown Animal", "Homo sapiens", "Vehicle") '''
emammal_categories = get_records_all(sql)
with open(args.input_file) as f:
data = json.load(f)
images = data['images']
emammal_category = 0
for index, im in tqdm(enumerate(images), total=len(images)):
fn = im['file']
if len(im['detections']) <= 0:
image_type_id = 2
# No-animal category
emammal_categories = emammal_categories[0]
else:
max_conf = im['max_detection_conf']
detection = [k for k in im['detections'] if k['conf'] == max_conf]
category= int(detection[0]['category'])
if category == Categories.animal:
image_type_id = 1
emammal_category = emammal_categories[1]
else:
image_type_id = 5
if category == Categories.person:
emammal_category = emammal_categories[2]
elif category == Categories.vehicle:
emammal_category = emammal_categories[3]
sql = """ UPDATE wild_id.emammal_sequence_annotation,
wild_id.image,
wild_id.image_sequence,
wild_id.deployment
SET wild_id.emammal_sequence_annotation.project_taxa_id = 4
WHERE wild_id.image.image_sequence_id = wild_id.emammal_sequence_annotation.sequence_id
AND wild_id.image_sequence.deployment_id = wild_id.deployment.deployment_id
AND wild_id.image.raw_name = '{}' """.format(fn)
print(sql)
update_data(sql)
mysql_connection.commit()
if __name__ == '__main__':
main()

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше