update video annotation code

2020-02-10 16:20:16 +00:00 · 2020-02-10 16:20:16 +00:00 · e78a1e6448
--- a/scenarios/action_recognition/video_annotation/README.md
+++ b/scenarios/action_recognition/video_annotation/README.md
@ -21,10 +21,10 @@ How to use the tool for action recognition:

 ## Scripts for use with the VIA Tool

-The VIA tool outputs annotations as a csv file. Often however we need each annotated action to be written as its own clip and into separate files. For this, we provide some utility functions (in this folder) which help with:
- Extraction of each action as "positive" clip, and "negative" clips defined as video segments where no action-of-interest occurs.
- Conversion of the video clips to a format which the VIA tool knows how to read.
-
+The VIA tool outputs annotations as a csv file. Often however we need each annotated action to be written as its own clip and into separate files. These clips can then serve as training examples for action recognition models. We provide some scripts to aid in the construction of such datasets:
+- [video_conversion.py](./video_conversion.py) - Conversion of the video clips to a format which the VIA tool knows how to read.
+- [clip_extraction.py](./clip_extraction.py) - Extraction of each annotated action as a separate clip. Optionally, "negative" clips can be generated, in which no action-of-interest occurs. Negative clips can be extracted in two ways: either all contiguous non-overlapping negative clips can be extracted or a specified number of negative examples can be randomly sampled. This behaviour can be controlled using the `contiguous` flag. The script outputs clips into directories specific to each class and generates a label file that maps each filename to the clip's class label.
+- [split_examples.py](./split_examples.py) - Splits generated example clips into training and evaluation sets. Optionally, a negative candidate set and negative test set can be generated for hard negative mining.


 ## Annotation Tools Comparison
--- a/scenarios/action_recognition/video_annotation/clip_extraction.py
+++ b/scenarios/action_recognition/video_annotation/clip_extraction.py
@ -4,145 +4,224 @@
 # prerequisite:
 # (1) download and extract ffmpeg: https://github.com/adaptlearning/adapt_authoring/wiki/Installing-FFmpeg
 # (2) make sure the ffmpeg is in your system's env variable: path
-# the script depend on the following fixed things of the csv
-# skiprows=1
+
+"""
+Extracts clips from videos stored in video_dir. Clips are defined from the annotated actions recorded in
+annotation_filepath, which is the raw output csv from the VIA video annotator tool. This csv should have the format:
+
+    # Exported using VGG Image Annotator (http://www.robots.ox.ac.uk/~vgg/software/via)				
+    # CSV_HEADER = metadata_id,	file_list,	temporal_segment_start,	temporal_segment_end,	metadata
+
+The script will generate clips for actions (classes) that appear in label_filepath, a mapping of class labels
+to class ID numbers. The label_filepath file should have the format:
+
+    Action1 0
+    Action2 1
+    Action3 2
+
+Optionally, "negative" examples can be extracted in which no action-of-interest occurs. To generate negative
+examples, the name to give to the negative class must be provided with no_action_class. Negative clips can be
+extracted in two ways: either all contiguous non-overlapping negative clips can be extracted or a specified
+number of negative examples can be randomly sampled. This behaviour can be controlled using the `contiguous`
+flag. The sample_annotated_only flag can be used to specify whether negative samples are extracted from any
+video in video_dir, or only those with annotations. The script outputs clips into subdirectories of clip_dir 
+specific to each class and generates a label file that maps each filename to the clip's class label.
+"""

 import argparse
 import ast
 import os
 import sys
-
 import pandas as pd

 sys.path.append("../../../utils_cv/action_recognition/")
 from video_annotation_utils import (
+    parse_video_file_name,
+    read_classes_file,
    create_clip_file_name,
    get_clip_action_label,
    extract_clip,
-    extract_negative_samples_per_file,
+    extract_contiguous_negative_clips,
+    extract_sampled_negative_clips
 )


 def main(
    annotation_filepath,
-    has_header,
    video_dir,
    clip_dir,
+    classes_filepath,
    label_filepath,
    clip_format,
-    clip_margin,
-    clip_length,
+    no_action_class,
+    contiguous,
+    negative_clip_length,
+    negative_clip_margin,
+    sample_annotated_only,
+    num_negative_samples,
 ):
+
    # set pandas display
    pd.set_option("display.max_columns", 500)
    pd.set_option("display.width", 1000)

-    if has_header:
-        skiprows = 1
-    else:
-        skiprows = 0
-
    # read in the start time and end time of the clips while removing the records with no label related
-    video_info_df = pd.read_csv(annotation_filepath, skiprows=skiprows)
+    video_info_df = pd.read_csv(annotation_filepath, skiprows=1)
    video_info_df = video_info_df.loc[video_info_df["metadata"] != "{}"]

+    video_info_df["file_list"] = video_info_df.apply(
+        lambda x: parse_video_file_name(x),
+        axis=1,
+    )
+
    # create clip file name and label
    video_info_df["clip_file_name"] = video_info_df.apply(
        lambda x: create_clip_file_name(x, clip_file_format=clip_format),
        axis=1,
    )
+
    video_info_df["clip_action_label"] = video_info_df.apply(
        lambda x: get_clip_action_label(x), axis=1
    )

-    # remove the clips with action label as '_DEFAULT'
-    video_info_df = video_info_df.loc[
-        video_info_df["clip_action_label"] != "_DEFAULT"
-    ]
+    # read list of classes
+    classes = read_classes_file(classes_filepath)
+    if no_action_class is not None:
+        if no_action_class not in classes:
+            raise Exception("no_action_class does not appear in list of classes.")

-    # script-input
+    # filter annotations to only include actions that appear in the classes file
+    video_info_df = video_info_df[video_info_df["clip_action_label"].isin(classes.keys())]
+
+    # extract all positive examples
    video_info_df.apply(lambda x: extract_clip(x, video_dir, clip_dir), axis=1)

-    # write the label
-    video_info_df[["clip_file_name", "clip_action_label"]].to_csv(
-        label_filepath, index=False
+    # write the labels for positive examples
+    video_info_df["clip_file_path"] = video_info_df.apply(lambda row: os.path.join(row.clip_action_label, row.clip_file_name), axis=1)
+    video_info_df["clip_file_path"] = video_info_df["clip_file_path"].apply(lambda x: os.path.splitext(x)[0])
+    video_info_df["clip_class_id"] = video_info_df["clip_action_label"].apply(lambda x: classes[x])
+    video_info_df[["clip_file_path", "clip_class_id"]].to_csv(
+        label_filepath, header=None, index=False, sep=' '
    )

-    # Extract negative samples
-    # add column with file name
-    video_info_df["video_file"] = video_info_df.apply(
-        lambda x: ast.literal_eval(x.file_list)[0], axis=1
-    )
-    negative_clip_dir = os.path.join(clip_dir, "negative_samples")
-    video_file_list = list(video_info_df["video_file"].unique())
-    negative_sample_info_df = pd.DataFrame()
-    for video_file in video_file_list:
-        res_df = extract_negative_samples_per_file(
-            video_file,
-            video_dir,
-            video_info_df,
-            negative_clip_dir,
-            clip_format,
-            ignore_clip_length=clip_margin,
-            clip_length=clip_length,
-            skip_clip_length=clip_margin,
-        )
+    # Extract negative samples if required
+    if no_action_class:
+        negative_clip_dir = os.path.join(clip_dir, no_action_class)
+        if not os.path.exists(negative_clip_dir):
+            os.makedirs(negative_clip_dir)
+        if contiguous:
+            video_files = list(video_info_df["file_list"].unique())
+            negative_sample_info_df = pd.DataFrame()
+            for video_file in video_files:
+                res_df = extract_contiguous_negative_clips(
+                    video_file,
+                    video_dir,
+                    video_info_df,
+                    negative_clip_dir,
+                    clip_format,
+                    no_action_class,
+                    ignore_clip_length=negative_clip_margin,
+                    clip_length=negative_clip_length,
+                    skip_clip_length=negative_clip_margin,
+                )
+                negative_sample_info_df = negative_sample_info_df.append(res_df)
+            with open(label_filepath, 'a') as f:
+                for index, row in negative_sample_info_df.iterrows():
+                    f.write("\""+row.negative_clip_file_name+"\""+" "+str(classes[no_action_class])+"\n")
+        
+        else:
+            # get list of original video files
+            video_files = os.listdir(video_dir)

-        negative_sample_info_df = negative_sample_info_df.append(res_df)
-
-    negative_sample_info_df.to_csv(
-        os.path.join(negative_clip_dir, "negative_clip_info.csv"), index=False
-    )
+            if sample_annotated_only:
+                video_files = list(set(video_info_df["file_list"]) & set(video_files))
+            
+            extract_sampled_negative_clips(
+                video_info_df,
+                num_negative_samples,
+                video_files,
+                video_dir,
+                clip_dir,
+                classes,
+                no_action_class,
+                negative_clip_length,
+                clip_format,
+                label_filepath,
+            )


 if __name__ == "__main__":
+
    parser = argparse.ArgumentParser()
    parser.add_argument(
-        "-A", "--annotation_filepath", help="CSV filepath from the annotator"
+        "--annotation_filepath", help="CSV filepath from the annotator", required=True,
    )
    parser.add_argument(
-        "-H",
-        "--has_header",
-        help="Set if the annotation file has header",
-        action="store_true",
-    )
-    parser.add_argument("-I", "--input_dir", help="Input video dir")
-    parser.add_argument(
-        "-O",
-        "--output_dir",
-        help="Output dir where the extracted clips will be stored",
-        default="./outputs",
+        "--video_dir", help="Input video dirictory", required=True
+    )
+    parser.add_argument(
+        "--clip_dir",
+        help="Output directory where the extracted clips will be stored",
+        required=True,
+    )
+    parser.add_argument(
+        "--classes_filepath", help="Path to file defining classes and class IDs", required=True
    )
    parser.add_argument(
-        "-L",
        "--label_filepath",
-        help="Path where the label csv will be stored",
-        default="./outputs/labels.csv",
+        help="Path where the label file will be stored",
+        required=True,
    )
-    parser.add_argument("-F", "--clip_format", default="mp4")
    parser.add_argument(
-        "-M",
-        "--clip_margin",
+        "--clip_format", default="mp4"
+    )
+    parser.add_argument(
+        "--no_action_class",
+        help="Label for the no action class. Provide this argument to create negative examples."
+    )
+    parser.add_argument(
+        "--contiguous",
+        help="Set to true to extract all non-overlapping negative samples. Otherwise extract num_negative_samples randomly sampled negative clips.",
+        action="store_true",
+        default=False,
+    )
+    parser.add_argument(
+        "--negative_clip_length",
+        type=float,
+        help="The length of negative samples to extract",
+        default=2.0,
+    )
+    parser.add_argument(
+        "--negative_clip_margin",
        type=float,
        help="The length around the positive samples to be ignored for negative sampling",
        default=3.0,
    )
    parser.add_argument(
-        "-T",
-        "--clip_length",
+        "--sample_annotated_only",
+        help="Source negative clips only from videos that have at least one positive action (only for non-contiguous samples)",
+        action="store_true",
+        default=True,
+    )
+    parser.add_argument(
+        "--num_negative_samples",
        type=float,
-        help="The length of negative samples to extract",
-        default=2.0,
+        help="The number of negative clips to sample. This only applies to non-contiguous sampling.",
+        default=0.0
    )
    args = parser.parse_args()

    main(
        annotation_filepath=args.annotation_filepath,
-        has_header=args.has_header,
-        video_dir=args.input_dir,
-        clip_dir=args.output_dir,
+        video_dir=args.video_dir,
+        clip_dir=args.clip_dir,
+        classes_filepath=args.classes_filepath,
        label_filepath=args.label_filepath,
        clip_format=args.clip_format,
-        clip_margin=args.clip_margin,
-        clip_length=args.clip_length,
+        no_action_class=args.no_action_class,
+        contiguous=args.contiguous,
+        negative_clip_length=args.negative_clip_length,
+        negative_clip_margin=args.negative_clip_margin,
+        sample_annotated_only=args.sample_annotated_only,
+        num_negative_samples=args.num_negative_samples
    )
--- a/scenarios/action_recognition/video_annotation/split_examples.py
+++ b/scenarios/action_recognition/video_annotation/split_examples.py
@ -0,0 +1,116 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+"""
+Splits a video dataset created with clip_extraction.py into training and evaluation sets. Provide the output
+label file in label_filepath and the proportion of examples to go to the training split with train_proportion.
+Optionally, if hard negative mining is required, a specified number of negative examples can be reserved for
+a negative candidates set or negative test set with the num_negatives_set and num_negatives_test parameters.
+"""
+
+import argparse
+import random
+import math
+import csv
+
+def output_split(labels_list, output_file):
+    with open(output_file, 'a') as f:
+        for label in labels_list:
+            f.write("\""+label[0]+"\""+" "+label[1]+"\n")
+
+def main(
+        label_filepath,
+        train_proportion,
+        num_negatives_set,
+        num_negatives_test,
+        negative_label_id,
+    ):
+
+    labels = {}
+    num_negative = 0
+    num_non_negative = 0
+    with open(label_filepath) as f:
+        freader = csv.reader(f, delimiter=" ", skipinitialspace=True)
+        for line in freader:
+            video, label = line[0], line[1]
+            labels[video] = label
+            if negative_label_id:
+                if label == negative_label_id:
+                    num_negative += 1
+                else:
+                    num_non_negative += 1
+
+    if num_negatives_set:
+        if num_negatives_set + num_negatives_test > num_negative:
+            raise Exception("Number of examples for negative candidate set and test set exceed number of negative examples")
+
+        negative_label_ids = {}
+        negative_test_labels = {}
+        train_val_labels = {}
+        negatives_sampled = 0
+        negatives_test_sampled = 0
+        for k, v in labels.items():
+            if (v == str(negative_label_id)) & (negatives_sampled < num_negatives_set):
+                negative_label_ids[k] = v
+                negatives_sampled += 1
+            elif (v == str(negative_label_id)) & (negatives_test_sampled < num_negatives_test):
+                negative_test_labels[k] = v
+                negatives_test_sampled += 1
+            else:
+                train_val_labels[k] = v
+
+        negatives_samples = random.sample(negative_label_ids.items(), k=len(negative_label_ids))
+        output_split(negatives_samples, "neg_set.txt")
+        negatives_test_samples = random.sample(negative_test_labels.items(), k=len(negative_test_labels))
+        output_split(negatives_test_samples, "neg_test.txt")
+    else:
+        train_val_labels = labels
+
+    samples = random.sample(train_val_labels.items(), k=len(train_val_labels))
+    split_point = math.floor(train_proportion*len(samples))
+    train = samples[:split_point]
+    val = samples[(split_point+1):]
+
+    output_split(train, "train.txt")
+    output_split(val, "val.txt")
+    
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--label_filepath",
+        help="Path to the label file",
+        required=True,
+    )
+    parser.add_argument(
+        "--train_proportion",
+        help="Proportion of examples to go in training set",
+        type=float,
+        required=True,
+    )
+    parser.add_argument(
+        "--num_negatives_set",
+        help="Number of negative samples to include in a negative condidate set for hard negative mining",
+        type=float,
+        default=0,
+    )
+    parser.add_argument(
+        "--num_negatives_test",
+        help="Number of negative samples to reserve for negative test set for hard negative mining",
+        type=float,
+        default=0,
+    )
+    parser.add_argument(
+        "--negative_label_id",
+        help="Label of the negative class if applicable"
+    )
+    args = parser.parse_args()
+
+    main(
+        label_filepath=args.label_filepath,
+        train_proportion=args.train_proportion,
+        num_negatives_set=args.num_negatives_set,
+        num_negatives_test=args.num_negatives_test,
+        negative_label_id=args.negative_label_id
+    )
+
--- a/utils_cv/action_recognition/video_annotation_utils.py
+++ b/utils_cv/action_recognition/video_annotation_utils.py
@ -3,9 +3,12 @@

 import ast
 import os
+import subprocess
+from collections import defaultdict
+import random
 import numpy as np
 import pandas as pd
-import subprocess
+

 # transform the encoded video:
 def video_format_conversion(video_path, output_path, h264_format=False):
@ -40,6 +43,38 @@ def video_format_conversion(video_path, output_path, h264_format=False):
        )


+def parse_video_file_name(row):
+    """
+    Extract file basename from file path
+
+    :param row: Pandas.Series.
+        One row of the video annotation output from the VIA tool.
+    :return: str.
+        The file basename
+    """
+    video_file = ast.literal_eval(row.file_list)[0]
+    return os.path.basename(video_file).replace("%20", " ")
+
+
+def read_classes_file(classes_filepath):
+    """
+    Read file that maps class names to class IDs. The file should be in the format:
+        ActionName1 0
+        ActionName2 1
+    
+    :param classes_filepath: str
+        The filepath of the classes file
+    :return: dict
+        Mapping of class names to class IDs
+    """
+    classes = {}
+    with open(classes_filepath) as class_file:
+        for line in class_file:
+            class_name, class_id = line.split(' ')
+            classes[class_name] = class_id.rstrip()
+    return classes
+    
+
 def create_clip_file_name(row, clip_file_format="mp4"):
    """
    Create the output clip file name.
@ -52,7 +87,8 @@ def create_clip_file_name(row, clip_file_format="mp4"):
    :return: str.
        The output clip file name.
    """
-    video_file = ast.literal_eval(row.file_list)[0]
+    #video_file = ast.literal_eval(row.file_list)[0]
+    video_file = os.path.splitext(row["file_list"])[0]
    clip_id = row["# CSV_HEADER = metadata_id"]
    clip_file = "{}_{}.{}".format(video_file, clip_id, clip_file_format)
    return clip_file
@ -151,11 +187,16 @@ def extract_clip(row, video_dir, clip_dir, ffmpeg_path=None):
        os.makedirs(clip_sub_dir)

    duration = end_time - start_time
-    video_file = ast.literal_eval(row.file_list)[0]
+    video_file = row.file_list
    video_path = os.path.join(video_dir, video_file)
    clip_file = row.clip_file_name
    clip_path = os.path.join(clip_sub_dir, clip_file)

+    # skip if video already extracted
+    if os.path.exists(clip_path):
+        print("Extracted clip already exists. Skipping extraction.")
+        return
+
    if not os.path.exists(video_path):
        raise ValueError(
            "The video path '{}' is not valid.".format(video_path)
@ -195,6 +236,26 @@ def get_video_length(video_file_path):
    return float(result.stdout)


+def check_interval_overlaps(clip_start, clip_end, interval_list):
+    """
+    Check whether a clip overlaps any intervals from a list of intervals.
+
+    param clip_start: float
+        Time in seconds of the start of the clip.
+    param clip_end: float
+        Time in seconds of the end of the clip.
+    param interval_list: list of tuples (float, float)
+        List of time intervals
+    return: Boolean
+        True if the clip overlaps any of the intervals in interval list.
+    """
+    overlapping = False
+    for interval in interval_list:
+        if (clip_start < interval[1]) and (clip_end > interval[0]):
+            overlapping = True
+    return overlapping
+
+
 def _merge_temporal_interval(temporal_interval_list):
    """
    Merge the temporal intervals in the input temporal interval list. This is for situations
@ -308,16 +369,17 @@ def _split_interval_list(
    return interval_res


-def extract_negative_samples_per_file(
+def extract_contiguous_negative_clips(
    video_file,
    video_dir,
    video_info_df,
    negative_clip_dir,
-    clip_file_format,
+    clip_format,
+    no_action_class,
    ignore_clip_length,
    clip_length,
-    ffmpeg_path=None,
    skip_clip_length=0,
+    ffmpeg_path=None,
 ):
    """
    Extract the negative sample for a single video file.
@ -330,7 +392,7 @@ def extract_negative_samples_per_file(
        The data frame which contains the video annotation output.
    :param negative_clip_dir: str.
        The directory of the output negative clips.
-    :param clip_file_format: str.
+    :param clip_format: str.
        The format of the output negative clips.
    :param ignore_clip_length: float.
        The clip length to ignore in the left/start of the interval. This is used to avoid creating
@ -354,18 +416,18 @@ def extract_negative_samples_per_file(
    # get the actions intervals
    if "temporal_coordinates" in video_info_df.columns:
        temporal_interval_series = video_info_df.loc[
-            video_info_df["video_file"] == video_file, "temporal_coordinates"
+            video_info_df["file_list"] == video_file, "temporal_coordinates"
        ]
        temporal_interval_list = temporal_interval_series.apply(
            lambda x: ast.literal_eval(x)
        ).tolist()
    elif "temporal_segment_start" in video_info_df.columns:
        video_start_list = video_info_df.loc[
-            video_info_df["video_file"] == video_file, "temporal_segment_start"
-        ].to_list()
+            video_info_df["file_list"] == video_file, "temporal_segment_start"
+        ].tolist()
        video_end_list = video_info_df.loc[
-            video_info_df["video_file"] == video_file, "temporal_segment_end"
-        ].to_list()
+            video_info_df["file_list"] == video_file, "temporal_segment_end"
+        ].tolist()
        temporal_interval_list = list(zip(video_start_list, video_end_list))
    else:
        raise Exception("There is no temporal information in the csv.")
@ -411,16 +473,18 @@ def extract_negative_samples_per_file(
    for i, clip_interval in enumerate(clip_interval_list):
        start_time = clip_interval[0]
        duration = clip_interval[1] - clip_interval[0]
-        negative_clip_file = "{}_{}.{}".format(video_file, i, clip_file_format)
-        negative_clip_file_list.append(negative_clip_file)
-        negative_clip_path = os.path.join(
-            negative_clip_dir, negative_clip_file
-        )
+        # negative_clip_file = "{}_{}.{}".format(video_file, i, clip_file_format)
+
+        # video_path = os.path.join(video_dir, negative_sample_file)
+        video_fname = os.path.splitext(os.path.basename(video_file_path))[0]
+        clip_fname = video_fname+no_action_class+str(i)
+        clip_subdir_fname = os.path.join(no_action_class, clip_fname)
+        negative_clip_file_list.append(clip_subdir_fname)
        _extract_clip_ffmpeg(
            start_time,
            duration,
            video_file_path,
-            negative_clip_path,
+            os.path.join(negative_clip_dir, clip_fname+"."+clip_format),
            ffmpeg_path,
        )

@ -431,3 +495,79 @@ def extract_negative_samples_per_file(
            "video_file": video_file,
        }
    )
+
+def extract_sampled_negative_clips(
+    video_info_df,
+    num_negative_samples,
+    video_files,
+    video_dir,
+    clip_dir,
+    classes,
+    no_action_class,
+    negative_clip_length,
+    clip_format,
+    label_filepath,
+):
+    """
+    Extract randomly sampled negative clips from a set of videos.
+
+    param video_info_df: Pandas.DataFrame
+        DataFrame containing annotated video information
+    param num_negative_samples: int
+        Number of negative samples to extract
+    param video_files: listof str
+        List of original video files
+    param video_dir: str
+        Directory of original videos
+    param clip_dir: str
+        Directory of extracted clips
+    param classes: dict
+        Classes dictionary
+    param no_action_class: str
+        Name of no action class
+    param negative_clip_length: float
+        Length of clips in seconds
+    param clip_format: str
+        Format for video files
+    param label_filepath: str
+        Path to the label file
+    return: None
+    """
+    # find video lengths
+    video_len = {}
+    for video in video_files:
+        video_len[video] = get_video_length(os.path.join(video_dir, video))
+    positive_intervals = defaultdict(list)
+    # get temporal interval of positive samples
+    for index, row in video_info_df.iterrows():
+        clip_file = row.file_list
+        int_start = row.temporal_segment_start
+        int_end = row.temporal_segment_end
+        segment_int = (int_start, int_end)
+        positive_intervals[clip_file].append(segment_int)
+    clips_sampled = 0
+    while clips_sampled < num_negative_samples:
+        # pick random file in list of videos
+        negative_sample_file = video_files[random.randint(0, len(video_files)-1)]
+        # get video duration
+        duration = video_len[negative_sample_file]
+        # pick random start time for clip
+        clip_start = random.uniform(0.0, duration)
+        clip_end = clip_start + negative_clip_length
+        if clip_end > duration:
+            continue
+        # check to ensure negative clip doesn't overlap a positive clip or pick another file
+        if negative_sample_file in positive_intervals.keys():
+            clip_positive_intervals = positive_intervals[negative_sample_file]
+            if check_interval_overlaps(clip_start, clip_end, clip_positive_intervals):
+                continue
+        video_path = os.path.join(video_dir, negative_sample_file)
+        video_fname = os.path.splitext(negative_sample_file)[0]
+        clip_fname = video_fname+no_action_class+str(clips_sampled)
+        clip_subdir_fname = os.path.join(no_action_class, clip_fname)
+        _extract_clip_ffmpeg(
+            clip_start, negative_clip_length, video_path, os.path.join(clip_dir, clip_subdir_fname+"."+clip_format),
+        )
+        with open(label_filepath, 'a') as f:
+            f.write("\""+clip_subdir_fname+"\""+" "+str(classes[no_action_class])+"\n")
+        clips_sampled += 1