generate_subset.py

# Copyright 2020 Tuan Chien, James Diprose
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Apply filters to the annotation data and sample a proportion of them.

# Authors: Tuan Chien, Jamie Diprose

import pickle
import random

import numpy as np

from ava_asd.config import read_config


def save_to_file(data, output_file):
    """
    Save a data structure to file using pickle.
    """
    serialised = pickle.dumps(data)
    with open(output_file, 'wb') as f:
        f.write(serialised)


def is_speaking(sequence):
    """
    Determine whether a sequence is speaking.
    """
    label = sequence[-1].label
    return label == 'SPEAKING_AUDIBLE' or label == 'SPEAKING_NOT_AUDIBLE'


def has_small_face(sequence, thresh=250):
    """
    Determine whether the sequence has a face that's too small.
    """
    for ann in sequence:
        width, height = ann.face_size
        if min(width, height) < thresh:
            return True
    return False


def has_label(sequence, labels):
    """
    Check if the sequence has the labels.
    """
    seq_label = sequence[-1].label
    for label in labels:
        if seq_label == label:
            return True
    return False


def filter_annotations(annotations, remove_small_faces=False, filter_out=None):
    """
    Apply filter to remove certain kinds of annotations.
    """
    filtered = []

    thresh = config['small_face_threshold']

    for sequence in annotations:
        if remove_small_faces and has_small_face(sequence, thresh=thresh):
            continue
        if has_label(sequence, filter_out):
            continue
        filtered.append(sequence)

    return filtered


def speaking_partition(annotations):
    """
    Get index lists to the speaking and non speaking indices of the annotations.
    """
    speak = []
    not_speak = []

    for i, sequence in enumerate(annotations):
        if is_speaking(sequence):
            speak.append(i)
        else:
            not_speak.append(i)

    return speak, not_speak


def resample(filtered, speak, nspeak, keep_ratio):
    """
    Resample data to get a balanced dataset.
    """
    n = int(len(speak) * keep_ratio)  # speak is the smaller one in AVA ASD dataset

    rspeak_indices = random.sample(speak, n)
    rnspeak_indices = random.sample(nspeak, n)

    rspeak = []
    for i in rspeak_indices:
        rspeak.append(filtered[i])

    rnspeak = []
    for i in rnspeak_indices:
        rnspeak.append(filtered[i])

    return rspeak, rnspeak


def filter_and_sample(filename, config):
    """
    Apply filter and sampler to a pickle file containing the annotations generated by assemble_data.py
    """
    annotations = pickle.load(open(filename, 'rb'))

    remove_small_faces = config['remove_small_faces']
    filter_out = config['filter_out']

    filtered = filter_annotations(
        annotations, remove_small_faces=remove_small_faces, filter_out=filter_out)
    n_filtered = len(filtered)

    delta = len(annotations) - n_filtered
    print('Number of filtered out training points: {}'.format(delta))

    speak, nspeak = speaking_partition(filtered)

    keep_ratio = config['train_keep_ratio']
    rspeak, rnspeak = resample(filtered, speak, nspeak, keep_ratio)

    len_s = len(rspeak)
    len_n = len(rnspeak)

    print('Generated subset of speaking: {} ({}% of original), nonspeaking: {} ({}% of original)'.format(
        len_s, len_s / n_filtered, len_n, len_n / n_filtered))

    return rspeak, rnspeak


def generate_and_save_subset(in_file, out_file, config):
    """
    Generate and save annotation subsets for the given annotation according to the filtering rules.
    """
    speak, nspeak = filter_and_sample(in_file, config)
    merged = speak + nspeak
    random.shuffle(merged)
    save_to_file(merged, out_file)


if __name__ == '__main__':
    config = read_config('config.yaml')

    # Set seed for debug. Remove later.
    np.random.seed(0)

    generate_and_save_subset(
        config['train_annotations_full'], config['train_annotations_subset'], config)

    generate_and_save_subset(
        config['test_annotations_full'], config['test_annotations_subset'], config)