diff --git a/MLHUB.yaml b/MLHUB.yaml index 1fcbc20..e3f41b7 100644 --- a/MLHUB.yaml +++ b/MLHUB.yaml @@ -3,7 +3,7 @@ meta: name : azcv title : Azure Computer Vision. keywords : azure, computer vision, ocr, thumbnail, image to text, cloud - version : 2.3.4 + version : 2.4.0 languages : py display : demo license : mit @@ -21,10 +21,18 @@ dependencies: - tags.py - describe.py - thumbnail.py + - category.py + - objects.py + - brands.py + - faces.py commands: demo : Demonstrate the functionality of computer vision models. tag : Extract tags for an image. describe : Describe the main element(s) of the image. + category : Image category based on a taxonomy of 86 concepts. + objects : Detect objects within the image. + brands : Identify brands in an image. ocr : Extract text from images. landmark : Identify landmark in a photo. - thumbnail : Create a good/informative thumbnail from an image. \ No newline at end of file + thumbnail : Create a good/informative thumbnail from an image. + faces : Detect faces in an image. \ No newline at end of file diff --git a/brands.py b/brands.py new file mode 100644 index 0000000..211ef8d --- /dev/null +++ b/brands.py @@ -0,0 +1,76 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# Author: Graham.Williams@togaware.com +# +# A script to identify brands in an image. +# +# ml brands azcv + +from msrest.authentication import CognitiveServicesCredentials +from azure.cognitiveservices.vision.computervision import ComputerVisionClient + +import os +import argparse +from textwrap import fill + +from mlhub.pkg import azkey, is_url +from mlhub.utils import get_cmd_cwd + +# ---------------------------------------------------------------------- +# Parse command line arguments +# ---------------------------------------------------------------------- + +option_parser = argparse.ArgumentParser(add_help=False) + +option_parser.add_argument( + 'path', + help='path or url to image') + +args = option_parser.parse_args() + +# ---------------------------------------------------------------------- + +SERVICE = "Computer Vision" +KEY_FILE = os.path.join(os.getcwd(), "private.txt") + +# Request subscription key and endpoint from user. + +subscription_key, endpoint = azkey(KEY_FILE, SERVICE, verbose=False) + +# Set credentials. + +credentials = CognitiveServicesCredentials(subscription_key) + +# Create client. + +client = ComputerVisionClient(endpoint, credentials) + +# ---------------------------------------------------------------------- +# URL or path +# ---------------------------------------------------------------------- + +path = args.path + +# Check the URL supplied or path exists and is an image. + +# ---------------------------------------------------------------------- +# Analyze +# ---------------------------------------------------------------------- + +image_features = ["brands"] + +# Send provided image (url or path) to azure to analyse. + +if is_url(path): + analysis = client.analyze_image(path, image_features) +else: + path = os.path.join(get_cmd_cwd(), path) + with open(path, 'rb') as fstream: + analysis = client.analyze_image_in_stream(fstream, image_features) + +for brand in analysis.brands: + print(f"{brand.confidence:.2f},{brand.name}," + + f"{brand.rectangle.x},{brand.rectangle.x + brand.rectangle.w}," + + f"{brand.rectangle.y},{brand.rectangle.y + brand.rectangle.h}") diff --git a/category.py b/category.py new file mode 100644 index 0000000..b1ba3e9 --- /dev/null +++ b/category.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# Author: Graham.Williams@togaware.com +# +# A script to categorize an image. +# +# ml category azcv + +from msrest.authentication import CognitiveServicesCredentials +from azure.cognitiveservices.vision.computervision import ComputerVisionClient + +import os +import argparse +from textwrap import fill + +from mlhub.pkg import azkey, is_url +from mlhub.utils import get_cmd_cwd + +# ---------------------------------------------------------------------- +# Parse command line arguments +# ---------------------------------------------------------------------- + +option_parser = argparse.ArgumentParser(add_help=False) + +option_parser.add_argument( + 'path', + help='path or url to image') + +args = option_parser.parse_args() + +# ---------------------------------------------------------------------- + +SERVICE = "Computer Vision" +KEY_FILE = os.path.join(os.getcwd(), "private.txt") + +# Request subscription key and endpoint from user. + +subscription_key, endpoint = azkey(KEY_FILE, SERVICE, verbose=False) + +# Set credentials. + +credentials = CognitiveServicesCredentials(subscription_key) + +# Create client. + +client = ComputerVisionClient(endpoint, credentials) + +# ---------------------------------------------------------------------- +# URL or path +# ---------------------------------------------------------------------- + +path = args.path + +# Check the URL supplied or path exists and is an image. + +# ---------------------------------------------------------------------- +# Analyze +# ---------------------------------------------------------------------- + +image_features = ["categories"] + +if is_url(path): + analysis = client.analyze_image(path, image_features) +else: + path = os.path.join(get_cmd_cwd(), path) + with open(path, 'rb') as fstream: + analysis = client.analyze_image_in_stream(fstream, image_features) + +for category in analysis.categories: + print(f"{category.score:.2f},{category.name}") diff --git a/docs/README.md b/docs/README.md index 9c37e41..cd52904 100644 --- a/docs/README.md +++ b/docs/README.md @@ -49,11 +49,25 @@ $ ml configure azcv ## Command Line Tools In addition to the *demo* presented below, the *azcv* package provides -a number of useful command line tools. +a number of useful command line tools. Below we demonstrate a number +of these. Most commands take an image as a parameter which may be a +url or a path to a local file. + +**Image Category** + +The *category* command takes an image and categorises it based on a +taxonomy-based of 86 concepts. + +```console +ml categroy azcv https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/landmark.jpg +0.32,building_ +0.00,others_ +0.04,outdoor_ +``` **Landmarks and Tags** -The *landmark* command takes an image (url or path to a local file) +The *landmark* command takes an image and identifies the main landmark contained within the image. The confidence of the identification is also returned. @@ -86,6 +100,19 @@ $ ml tag azcv https://www.wayoutback.com.au/assets/Uploads/Uluru.jpg ``` See [Landmarks and Tags](TAGS.md) for further details and examples. +**Object Bounding Boxes** + +The *objects* command returns the bounding box of any identified +objects within the image. + +```console +ml objects azcv https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/objects.jpg +213,85,365,208 +218,179,402,384 +238,298,417,416 +116,60,419,386 +``` + **Optical Character Recognition to Read Street Signs** The *ocr* command is useful for extracting text from a variety of @@ -146,6 +173,26 @@ img-thumbnail.jpg ``` See [Thumbnail Examples](THUMBNAIL.md) for further details and examples. +**Detecting Brands** + +```console +ml brands azcv https://docs.microsoft.com/en-us/azure/cognitive-services/computer-vision/images/gray-shirt-logo.jpg +0.62,Microsoft,58,113,106,152 +0.70,Microsoft,58,260,86,149 +``` + +**Detecting Faces** + +```console +ml faces azcv https://raw.githubusercontent.com/Azure-Samples/cognitive-services-sample-data-files/master/ComputerVision/Images/faces.jpg +Male,39,118,159,212,253 +Male,54,492,111,582,201 +Female,55,18,153,102,237 +Female,33,386,166,467,247 +Female,18,235,158,311,234 +Female,8,323,163,391,231 +``` + ## Demonstration ```console @@ -415,31 +462,46 @@ Thank you for exploring the 'azcv' package. # Contributing -This project welcomes contributions and suggestions. Most contributions require you to agree to a -Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us -the rights to use your contribution. For details, visit https://cla.microsoft.com. +This project welcomes contributions and suggestions. Most +contributions require you to agree to a Contributor License Agreement +(CLA) declaring that you have the right to, and actually do, grant us +the rights to use your contribution. For details, visit +https://cla.microsoft.com. -When you submit a pull request, a CLA-bot will automatically determine whether you need to provide -a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions -provided by the bot. You will only need to do this once across all repos using our CLA. +When you submit a pull request, a CLA-bot will automatically determine +whether you need to provide a CLA and decorate the PR appropriately +(e.g., label, comment). Simply follow the instructions provided by the +bot. You will only need to do this once across all repos using our +CLA. -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or -contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. +This project has adopted the [Microsoft Open Source Code of +Conduct](https://opensource.microsoft.com/codeofconduct/). For more +information see the [Code of Conduct +FAQ](https://opensource.microsoft.com/codeofconduct/faq/) or contact +[opencode@microsoft.com](mailto:opencode@microsoft.com) with any +additional questions or comments. # Legal Notices -Microsoft and any contributors grant you a license to the Microsoft documentation and other content -in this repository under the [Creative Commons Attribution 4.0 International Public License](https://creativecommons.org/licenses/by/4.0/legalcode), -see the [LICENSE](LICENSE) file, and grant you a license to any code in the repository under the [MIT License](https://opensource.org/licenses/MIT), see the +Microsoft and any contributors grant you a license to the Microsoft +documentation and other content in this repository under the [Creative +Commons Attribution 4.0 International Public +License](https://creativecommons.org/licenses/by/4.0/legalcode), see +the [LICENSE](LICENSE) file, and grant you a license to any code in +the repository under the [MIT +License](https://opensource.org/licenses/MIT), see the [LICENSE-CODE](LICENSE-CODE) file. -Microsoft, Windows, Microsoft Azure and/or other Microsoft products and services referenced in the documentation -may be either trademarks or registered trademarks of Microsoft in the United States and/or other countries. -The licenses for this project do not grant you rights to use any Microsoft names, logos, or trademarks. -Microsoft's general trademark guidelines can be found at http://go.microsoft.com/fwlink/?LinkID=254653. +Microsoft, Windows, Microsoft Azure and/or other Microsoft products +and services referenced in the documentation may be either trademarks +or registered trademarks of Microsoft in the United States and/or +other countries. The licenses for this project do not grant you +rights to use any Microsoft names, logos, or trademarks. Microsoft's +general trademark guidelines can be found at +http://go.microsoft.com/fwlink/?LinkID=254653. Privacy information can be found at https://privacy.microsoft.com/en-us/ -Microsoft and any contributors reserve all other rights, whether under their respective copyrights, patents, -or trademarks, whether by implication, estoppel or otherwise. +Microsoft and any contributors reserve all other rights, whether under +their respective copyrights, patents, or trademarks, whether by +implication, estoppel or otherwise. diff --git a/faces.py b/faces.py new file mode 100644 index 0000000..ea85425 --- /dev/null +++ b/faces.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# Author: Graham.Williams@togaware.com +# +# A script to detect faces in an image. +# +# ml faces azcv + +from msrest.authentication import CognitiveServicesCredentials +from azure.cognitiveservices.vision.computervision import ComputerVisionClient + +import os +import argparse +from textwrap import fill + +from mlhub.pkg import azkey, is_url +from mlhub.utils import get_cmd_cwd + +# ---------------------------------------------------------------------- +# Parse command line arguments +# ---------------------------------------------------------------------- + +option_parser = argparse.ArgumentParser(add_help=False) + +option_parser.add_argument( + 'path', + help='path or url to image') + +args = option_parser.parse_args() + +# ---------------------------------------------------------------------- + +SERVICE = "Computer Vision" +KEY_FILE = os.path.join(os.getcwd(), "private.txt") + +# Request subscription key and endpoint from user. + +subscription_key, endpoint = azkey(KEY_FILE, SERVICE, verbose=False) + +# Set credentials. + +credentials = CognitiveServicesCredentials(subscription_key) + +# Create client. + +client = ComputerVisionClient(endpoint, credentials) + +# ---------------------------------------------------------------------- +# URL or path +# ---------------------------------------------------------------------- + +path = args.path + +# Check the URL supplied or path exists and is an image. + +# ---------------------------------------------------------------------- +# Analyze +# ---------------------------------------------------------------------- + +image_features = ["faces"] + +# Send provided image (url or path) to azure to analyse. + +if is_url(path): + analysis = client.analyze_image(path, image_features) +else: + path = os.path.join(get_cmd_cwd(), path) + with open(path, 'rb') as fstream: + analysis = client.analyze_image_in_stream(fstream, image_features) + +for face in analysis.faces: + print(f"{face.gender},{face.age}," + + f"{face.face_rectangle.left},{face.face_rectangle.top}," + + f"{face.face_rectangle.left + face.face_rectangle.width}," + + f"{face.face_rectangle.top + face.face_rectangle.height}") diff --git a/objects.py b/objects.py new file mode 100644 index 0000000..9c0201b --- /dev/null +++ b/objects.py @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# Author: Graham.Williams@togaware.com +# +# A script to identify objects in an image. +# +# ml objects azcv + +from msrest.authentication import CognitiveServicesCredentials +from azure.cognitiveservices.vision.computervision import ComputerVisionClient + +import os +import argparse +from textwrap import fill + +from mlhub.pkg import azkey, is_url +from mlhub.utils import get_cmd_cwd + +# ---------------------------------------------------------------------- +# Parse command line arguments +# ---------------------------------------------------------------------- + +option_parser = argparse.ArgumentParser(add_help=False) + +option_parser.add_argument( + 'path', + help='path or url to image') + +args = option_parser.parse_args() + +# ---------------------------------------------------------------------- + +SERVICE = "Computer Vision" +KEY_FILE = os.path.join(os.getcwd(), "private.txt") + +# Request subscription key and endpoint from user. + +subscription_key, endpoint = azkey(KEY_FILE, SERVICE, verbose=False) + +# Set credentials. + +credentials = CognitiveServicesCredentials(subscription_key) + +# Create client. + +client = ComputerVisionClient(endpoint, credentials) + +# Check the URL supplied or path exists and is an image. + +# Send provided image (url or path) to azure to extract text. + +# ---------------------------------------------------------------------- +# URL or path +# ---------------------------------------------------------------------- + +path = args.path + +# ---------------------------------------------------------------------- +# Objects +# ---------------------------------------------------------------------- + +if is_url(path): + analysis = client.detect_objects(path) +else: + path = os.path.join(get_cmd_cwd(), path) + with open(path, 'rb') as fstream: + analysis = client.detect_objects_in_stream(fstream) + +for object in analysis.objects: + print(f"{object.rectangle.x},{object.rectangle.y}," + + f"{object.rectangle.x + object.rectangle.w}," + + f"{object.rectangle.y + object.rectangle.h}")