This commit is contained in:
angusrtaylor 2020-02-10 16:20:16 +00:00
Родитель c6743a9c0f
Коммит e78a1e6448
4 изменённых файлов: 429 добавлений и 94 удалений

Просмотреть файл

@ -21,10 +21,10 @@ How to use the tool for action recognition:
## Scripts for use with the VIA Tool
The VIA tool outputs annotations as a csv file. Often however we need each annotated action to be written as its own clip and into separate files. For this, we provide some utility functions (in this folder) which help with:
- Extraction of each action as "positive" clip, and "negative" clips defined as video segments where no action-of-interest occurs.
- Conversion of the video clips to a format which the VIA tool knows how to read.
The VIA tool outputs annotations as a csv file. Often however we need each annotated action to be written as its own clip and into separate files. These clips can then serve as training examples for action recognition models. We provide some scripts to aid in the construction of such datasets:
- [video_conversion.py](./video_conversion.py) - Conversion of the video clips to a format which the VIA tool knows how to read.
- [clip_extraction.py](./clip_extraction.py) - Extraction of each annotated action as a separate clip. Optionally, "negative" clips can be generated, in which no action-of-interest occurs. Negative clips can be extracted in two ways: either all contiguous non-overlapping negative clips can be extracted or a specified number of negative examples can be randomly sampled. This behaviour can be controlled using the `contiguous` flag. The script outputs clips into directories specific to each class and generates a label file that maps each filename to the clip's class label.
- [split_examples.py](./split_examples.py) - Splits generated example clips into training and evaluation sets. Optionally, a negative candidate set and negative test set can be generated for hard negative mining.
## Annotation Tools Comparison

Просмотреть файл

@ -4,145 +4,224 @@
# prerequisite:
# (1) download and extract ffmpeg: https://github.com/adaptlearning/adapt_authoring/wiki/Installing-FFmpeg
# (2) make sure the ffmpeg is in your system's env variable: path
# the script depend on the following fixed things of the csv
# skiprows=1
"""
Extracts clips from videos stored in video_dir. Clips are defined from the annotated actions recorded in
annotation_filepath, which is the raw output csv from the VIA video annotator tool. This csv should have the format:
# Exported using VGG Image Annotator (http://www.robots.ox.ac.uk/~vgg/software/via)
# CSV_HEADER = metadata_id, file_list, temporal_segment_start, temporal_segment_end, metadata
The script will generate clips for actions (classes) that appear in label_filepath, a mapping of class labels
to class ID numbers. The label_filepath file should have the format:
Action1 0
Action2 1
Action3 2
Optionally, "negative" examples can be extracted in which no action-of-interest occurs. To generate negative
examples, the name to give to the negative class must be provided with no_action_class. Negative clips can be
extracted in two ways: either all contiguous non-overlapping negative clips can be extracted or a specified
number of negative examples can be randomly sampled. This behaviour can be controlled using the `contiguous`
flag. The sample_annotated_only flag can be used to specify whether negative samples are extracted from any
video in video_dir, or only those with annotations. The script outputs clips into subdirectories of clip_dir
specific to each class and generates a label file that maps each filename to the clip's class label.
"""
import argparse
import ast
import os
import sys
import pandas as pd
sys.path.append("../../../utils_cv/action_recognition/")
from video_annotation_utils import (
parse_video_file_name,
read_classes_file,
create_clip_file_name,
get_clip_action_label,
extract_clip,
extract_negative_samples_per_file,
extract_contiguous_negative_clips,
extract_sampled_negative_clips
)
def main(
annotation_filepath,
has_header,
video_dir,
clip_dir,
classes_filepath,
label_filepath,
clip_format,
clip_margin,
clip_length,
no_action_class,
contiguous,
negative_clip_length,
negative_clip_margin,
sample_annotated_only,
num_negative_samples,
):
# set pandas display
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
if has_header:
skiprows = 1
else:
skiprows = 0
# read in the start time and end time of the clips while removing the records with no label related
video_info_df = pd.read_csv(annotation_filepath, skiprows=skiprows)
video_info_df = pd.read_csv(annotation_filepath, skiprows=1)
video_info_df = video_info_df.loc[video_info_df["metadata"] != "{}"]
video_info_df["file_list"] = video_info_df.apply(
lambda x: parse_video_file_name(x),
axis=1,
)
# create clip file name and label
video_info_df["clip_file_name"] = video_info_df.apply(
lambda x: create_clip_file_name(x, clip_file_format=clip_format),
axis=1,
)
video_info_df["clip_action_label"] = video_info_df.apply(
lambda x: get_clip_action_label(x), axis=1
)
# remove the clips with action label as '_DEFAULT'
video_info_df = video_info_df.loc[
video_info_df["clip_action_label"] != "_DEFAULT"
]
# read list of classes
classes = read_classes_file(classes_filepath)
if no_action_class is not None:
if no_action_class not in classes:
raise Exception("no_action_class does not appear in list of classes.")
# script-input
# filter annotations to only include actions that appear in the classes file
video_info_df = video_info_df[video_info_df["clip_action_label"].isin(classes.keys())]
# extract all positive examples
video_info_df.apply(lambda x: extract_clip(x, video_dir, clip_dir), axis=1)
# write the label
video_info_df[["clip_file_name", "clip_action_label"]].to_csv(
label_filepath, index=False
# write the labels for positive examples
video_info_df["clip_file_path"] = video_info_df.apply(lambda row: os.path.join(row.clip_action_label, row.clip_file_name), axis=1)
video_info_df["clip_file_path"] = video_info_df["clip_file_path"].apply(lambda x: os.path.splitext(x)[0])
video_info_df["clip_class_id"] = video_info_df["clip_action_label"].apply(lambda x: classes[x])
video_info_df[["clip_file_path", "clip_class_id"]].to_csv(
label_filepath, header=None, index=False, sep=' '
)
# Extract negative samples
# add column with file name
video_info_df["video_file"] = video_info_df.apply(
lambda x: ast.literal_eval(x.file_list)[0], axis=1
)
negative_clip_dir = os.path.join(clip_dir, "negative_samples")
video_file_list = list(video_info_df["video_file"].unique())
negative_sample_info_df = pd.DataFrame()
for video_file in video_file_list:
res_df = extract_negative_samples_per_file(
video_file,
video_dir,
video_info_df,
negative_clip_dir,
clip_format,
ignore_clip_length=clip_margin,
clip_length=clip_length,
skip_clip_length=clip_margin,
)
# Extract negative samples if required
if no_action_class:
negative_clip_dir = os.path.join(clip_dir, no_action_class)
if not os.path.exists(negative_clip_dir):
os.makedirs(negative_clip_dir)
if contiguous:
video_files = list(video_info_df["file_list"].unique())
negative_sample_info_df = pd.DataFrame()
for video_file in video_files:
res_df = extract_contiguous_negative_clips(
video_file,
video_dir,
video_info_df,
negative_clip_dir,
clip_format,
no_action_class,
ignore_clip_length=negative_clip_margin,
clip_length=negative_clip_length,
skip_clip_length=negative_clip_margin,
)
negative_sample_info_df = negative_sample_info_df.append(res_df)
with open(label_filepath, 'a') as f:
for index, row in negative_sample_info_df.iterrows():
f.write("\""+row.negative_clip_file_name+"\""+" "+str(classes[no_action_class])+"\n")
else:
# get list of original video files
video_files = os.listdir(video_dir)
negative_sample_info_df = negative_sample_info_df.append(res_df)
negative_sample_info_df.to_csv(
os.path.join(negative_clip_dir, "negative_clip_info.csv"), index=False
)
if sample_annotated_only:
video_files = list(set(video_info_df["file_list"]) & set(video_files))
extract_sampled_negative_clips(
video_info_df,
num_negative_samples,
video_files,
video_dir,
clip_dir,
classes,
no_action_class,
negative_clip_length,
clip_format,
label_filepath,
)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"-A", "--annotation_filepath", help="CSV filepath from the annotator"
"--annotation_filepath", help="CSV filepath from the annotator", required=True,
)
parser.add_argument(
"-H",
"--has_header",
help="Set if the annotation file has header",
action="store_true",
)
parser.add_argument("-I", "--input_dir", help="Input video dir")
parser.add_argument(
"-O",
"--output_dir",
help="Output dir where the extracted clips will be stored",
default="./outputs",
"--video_dir", help="Input video dirictory", required=True
)
parser.add_argument(
"--clip_dir",
help="Output directory where the extracted clips will be stored",
required=True,
)
parser.add_argument(
"--classes_filepath", help="Path to file defining classes and class IDs", required=True
)
parser.add_argument(
"-L",
"--label_filepath",
help="Path where the label csv will be stored",
default="./outputs/labels.csv",
help="Path where the label file will be stored",
required=True,
)
parser.add_argument("-F", "--clip_format", default="mp4")
parser.add_argument(
"-M",
"--clip_margin",
"--clip_format", default="mp4"
)
parser.add_argument(
"--no_action_class",
help="Label for the no action class. Provide this argument to create negative examples."
)
parser.add_argument(
"--contiguous",
help="Set to true to extract all non-overlapping negative samples. Otherwise extract num_negative_samples randomly sampled negative clips.",
action="store_true",
default=False,
)
parser.add_argument(
"--negative_clip_length",
type=float,
help="The length of negative samples to extract",
default=2.0,
)
parser.add_argument(
"--negative_clip_margin",
type=float,
help="The length around the positive samples to be ignored for negative sampling",
default=3.0,
)
parser.add_argument(
"-T",
"--clip_length",
"--sample_annotated_only",
help="Source negative clips only from videos that have at least one positive action (only for non-contiguous samples)",
action="store_true",
default=True,
)
parser.add_argument(
"--num_negative_samples",
type=float,
help="The length of negative samples to extract",
default=2.0,
help="The number of negative clips to sample. This only applies to non-contiguous sampling.",
default=0.0
)
args = parser.parse_args()
main(
annotation_filepath=args.annotation_filepath,
has_header=args.has_header,
video_dir=args.input_dir,
clip_dir=args.output_dir,
video_dir=args.video_dir,
clip_dir=args.clip_dir,
classes_filepath=args.classes_filepath,
label_filepath=args.label_filepath,
clip_format=args.clip_format,
clip_margin=args.clip_margin,
clip_length=args.clip_length,
no_action_class=args.no_action_class,
contiguous=args.contiguous,
negative_clip_length=args.negative_clip_length,
negative_clip_margin=args.negative_clip_margin,
sample_annotated_only=args.sample_annotated_only,
num_negative_samples=args.num_negative_samples
)

Просмотреть файл

@ -0,0 +1,116 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
"""
Splits a video dataset created with clip_extraction.py into training and evaluation sets. Provide the output
label file in label_filepath and the proportion of examples to go to the training split with train_proportion.
Optionally, if hard negative mining is required, a specified number of negative examples can be reserved for
a negative candidates set or negative test set with the num_negatives_set and num_negatives_test parameters.
"""
import argparse
import random
import math
import csv
def output_split(labels_list, output_file):
with open(output_file, 'a') as f:
for label in labels_list:
f.write("\""+label[0]+"\""+" "+label[1]+"\n")
def main(
label_filepath,
train_proportion,
num_negatives_set,
num_negatives_test,
negative_label_id,
):
labels = {}
num_negative = 0
num_non_negative = 0
with open(label_filepath) as f:
freader = csv.reader(f, delimiter=" ", skipinitialspace=True)
for line in freader:
video, label = line[0], line[1]
labels[video] = label
if negative_label_id:
if label == negative_label_id:
num_negative += 1
else:
num_non_negative += 1
if num_negatives_set:
if num_negatives_set + num_negatives_test > num_negative:
raise Exception("Number of examples for negative candidate set and test set exceed number of negative examples")
negative_label_ids = {}
negative_test_labels = {}
train_val_labels = {}
negatives_sampled = 0
negatives_test_sampled = 0
for k, v in labels.items():
if (v == str(negative_label_id)) & (negatives_sampled < num_negatives_set):
negative_label_ids[k] = v
negatives_sampled += 1
elif (v == str(negative_label_id)) & (negatives_test_sampled < num_negatives_test):
negative_test_labels[k] = v
negatives_test_sampled += 1
else:
train_val_labels[k] = v
negatives_samples = random.sample(negative_label_ids.items(), k=len(negative_label_ids))
output_split(negatives_samples, "neg_set.txt")
negatives_test_samples = random.sample(negative_test_labels.items(), k=len(negative_test_labels))
output_split(negatives_test_samples, "neg_test.txt")
else:
train_val_labels = labels
samples = random.sample(train_val_labels.items(), k=len(train_val_labels))
split_point = math.floor(train_proportion*len(samples))
train = samples[:split_point]
val = samples[(split_point+1):]
output_split(train, "train.txt")
output_split(val, "val.txt")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--label_filepath",
help="Path to the label file",
required=True,
)
parser.add_argument(
"--train_proportion",
help="Proportion of examples to go in training set",
type=float,
required=True,
)
parser.add_argument(
"--num_negatives_set",
help="Number of negative samples to include in a negative condidate set for hard negative mining",
type=float,
default=0,
)
parser.add_argument(
"--num_negatives_test",
help="Number of negative samples to reserve for negative test set for hard negative mining",
type=float,
default=0,
)
parser.add_argument(
"--negative_label_id",
help="Label of the negative class if applicable"
)
args = parser.parse_args()
main(
label_filepath=args.label_filepath,
train_proportion=args.train_proportion,
num_negatives_set=args.num_negatives_set,
num_negatives_test=args.num_negatives_test,
negative_label_id=args.negative_label_id
)

Просмотреть файл

@ -3,9 +3,12 @@
import ast
import os
import subprocess
from collections import defaultdict
import random
import numpy as np
import pandas as pd
import subprocess
# transform the encoded video:
def video_format_conversion(video_path, output_path, h264_format=False):
@ -40,6 +43,38 @@ def video_format_conversion(video_path, output_path, h264_format=False):
)
def parse_video_file_name(row):
"""
Extract file basename from file path
:param row: Pandas.Series.
One row of the video annotation output from the VIA tool.
:return: str.
The file basename
"""
video_file = ast.literal_eval(row.file_list)[0]
return os.path.basename(video_file).replace("%20", " ")
def read_classes_file(classes_filepath):
"""
Read file that maps class names to class IDs. The file should be in the format:
ActionName1 0
ActionName2 1
:param classes_filepath: str
The filepath of the classes file
:return: dict
Mapping of class names to class IDs
"""
classes = {}
with open(classes_filepath) as class_file:
for line in class_file:
class_name, class_id = line.split(' ')
classes[class_name] = class_id.rstrip()
return classes
def create_clip_file_name(row, clip_file_format="mp4"):
"""
Create the output clip file name.
@ -52,7 +87,8 @@ def create_clip_file_name(row, clip_file_format="mp4"):
:return: str.
The output clip file name.
"""
video_file = ast.literal_eval(row.file_list)[0]
#video_file = ast.literal_eval(row.file_list)[0]
video_file = os.path.splitext(row["file_list"])[0]
clip_id = row["# CSV_HEADER = metadata_id"]
clip_file = "{}_{}.{}".format(video_file, clip_id, clip_file_format)
return clip_file
@ -151,11 +187,16 @@ def extract_clip(row, video_dir, clip_dir, ffmpeg_path=None):
os.makedirs(clip_sub_dir)
duration = end_time - start_time
video_file = ast.literal_eval(row.file_list)[0]
video_file = row.file_list
video_path = os.path.join(video_dir, video_file)
clip_file = row.clip_file_name
clip_path = os.path.join(clip_sub_dir, clip_file)
# skip if video already extracted
if os.path.exists(clip_path):
print("Extracted clip already exists. Skipping extraction.")
return
if not os.path.exists(video_path):
raise ValueError(
"The video path '{}' is not valid.".format(video_path)
@ -195,6 +236,26 @@ def get_video_length(video_file_path):
return float(result.stdout)
def check_interval_overlaps(clip_start, clip_end, interval_list):
"""
Check whether a clip overlaps any intervals from a list of intervals.
param clip_start: float
Time in seconds of the start of the clip.
param clip_end: float
Time in seconds of the end of the clip.
param interval_list: list of tuples (float, float)
List of time intervals
return: Boolean
True if the clip overlaps any of the intervals in interval list.
"""
overlapping = False
for interval in interval_list:
if (clip_start < interval[1]) and (clip_end > interval[0]):
overlapping = True
return overlapping
def _merge_temporal_interval(temporal_interval_list):
"""
Merge the temporal intervals in the input temporal interval list. This is for situations
@ -308,16 +369,17 @@ def _split_interval_list(
return interval_res
def extract_negative_samples_per_file(
def extract_contiguous_negative_clips(
video_file,
video_dir,
video_info_df,
negative_clip_dir,
clip_file_format,
clip_format,
no_action_class,
ignore_clip_length,
clip_length,
ffmpeg_path=None,
skip_clip_length=0,
ffmpeg_path=None,
):
"""
Extract the negative sample for a single video file.
@ -330,7 +392,7 @@ def extract_negative_samples_per_file(
The data frame which contains the video annotation output.
:param negative_clip_dir: str.
The directory of the output negative clips.
:param clip_file_format: str.
:param clip_format: str.
The format of the output negative clips.
:param ignore_clip_length: float.
The clip length to ignore in the left/start of the interval. This is used to avoid creating
@ -354,18 +416,18 @@ def extract_negative_samples_per_file(
# get the actions intervals
if "temporal_coordinates" in video_info_df.columns:
temporal_interval_series = video_info_df.loc[
video_info_df["video_file"] == video_file, "temporal_coordinates"
video_info_df["file_list"] == video_file, "temporal_coordinates"
]
temporal_interval_list = temporal_interval_series.apply(
lambda x: ast.literal_eval(x)
).tolist()
elif "temporal_segment_start" in video_info_df.columns:
video_start_list = video_info_df.loc[
video_info_df["video_file"] == video_file, "temporal_segment_start"
].to_list()
video_info_df["file_list"] == video_file, "temporal_segment_start"
].tolist()
video_end_list = video_info_df.loc[
video_info_df["video_file"] == video_file, "temporal_segment_end"
].to_list()
video_info_df["file_list"] == video_file, "temporal_segment_end"
].tolist()
temporal_interval_list = list(zip(video_start_list, video_end_list))
else:
raise Exception("There is no temporal information in the csv.")
@ -411,16 +473,18 @@ def extract_negative_samples_per_file(
for i, clip_interval in enumerate(clip_interval_list):
start_time = clip_interval[0]
duration = clip_interval[1] - clip_interval[0]
negative_clip_file = "{}_{}.{}".format(video_file, i, clip_file_format)
negative_clip_file_list.append(negative_clip_file)
negative_clip_path = os.path.join(
negative_clip_dir, negative_clip_file
)
# negative_clip_file = "{}_{}.{}".format(video_file, i, clip_file_format)
# video_path = os.path.join(video_dir, negative_sample_file)
video_fname = os.path.splitext(os.path.basename(video_file_path))[0]
clip_fname = video_fname+no_action_class+str(i)
clip_subdir_fname = os.path.join(no_action_class, clip_fname)
negative_clip_file_list.append(clip_subdir_fname)
_extract_clip_ffmpeg(
start_time,
duration,
video_file_path,
negative_clip_path,
os.path.join(negative_clip_dir, clip_fname+"."+clip_format),
ffmpeg_path,
)
@ -431,3 +495,79 @@ def extract_negative_samples_per_file(
"video_file": video_file,
}
)
def extract_sampled_negative_clips(
video_info_df,
num_negative_samples,
video_files,
video_dir,
clip_dir,
classes,
no_action_class,
negative_clip_length,
clip_format,
label_filepath,
):
"""
Extract randomly sampled negative clips from a set of videos.
param video_info_df: Pandas.DataFrame
DataFrame containing annotated video information
param num_negative_samples: int
Number of negative samples to extract
param video_files: listof str
List of original video files
param video_dir: str
Directory of original videos
param clip_dir: str
Directory of extracted clips
param classes: dict
Classes dictionary
param no_action_class: str
Name of no action class
param negative_clip_length: float
Length of clips in seconds
param clip_format: str
Format for video files
param label_filepath: str
Path to the label file
return: None
"""
# find video lengths
video_len = {}
for video in video_files:
video_len[video] = get_video_length(os.path.join(video_dir, video))
positive_intervals = defaultdict(list)
# get temporal interval of positive samples
for index, row in video_info_df.iterrows():
clip_file = row.file_list
int_start = row.temporal_segment_start
int_end = row.temporal_segment_end
segment_int = (int_start, int_end)
positive_intervals[clip_file].append(segment_int)
clips_sampled = 0
while clips_sampled < num_negative_samples:
# pick random file in list of videos
negative_sample_file = video_files[random.randint(0, len(video_files)-1)]
# get video duration
duration = video_len[negative_sample_file]
# pick random start time for clip
clip_start = random.uniform(0.0, duration)
clip_end = clip_start + negative_clip_length
if clip_end > duration:
continue
# check to ensure negative clip doesn't overlap a positive clip or pick another file
if negative_sample_file in positive_intervals.keys():
clip_positive_intervals = positive_intervals[negative_sample_file]
if check_interval_overlaps(clip_start, clip_end, clip_positive_intervals):
continue
video_path = os.path.join(video_dir, negative_sample_file)
video_fname = os.path.splitext(negative_sample_file)[0]
clip_fname = video_fname+no_action_class+str(clips_sampled)
clip_subdir_fname = os.path.join(no_action_class, clip_fname)
_extract_clip_ffmpeg(
clip_start, negative_clip_length, video_path, os.path.join(clip_dir, clip_subdir_fname+"."+clip_format),
)
with open(label_filepath, 'a') as f:
f.write("\""+clip_subdir_fname+"\""+" "+str(classes[no_action_class])+"\n")
clips_sampled += 1