blue-oil · nlpng · Jan 29, 2019 · Jan 29, 2019 · Jan 29, 2019 · Jan 29, 2019
diff --git a/dlk/examples/segmentation/lm_segnet_v1_quantize_camvid/minimal_graph_with_shape.pb b/dlk/examples/segmentation/lm_segnet_v1_quantize_camvid/minimal_graph_with_shape.pb
diff --git a/dlk/hw/intel/de10_nano/qconv_kn2row_tiling/preloader-mkpimage.bin b/dlk/hw/intel/de10_nano/qconv_kn2row_tiling/preloader-mkpimage.bin
diff --git a/dlk/hw/intel/de10_nano/qconv_kn2row_tiling/soc_system.dtb b/dlk/hw/intel/de10_nano/qconv_kn2row_tiling/soc_system.dtb
diff --git a/dlk/hw/intel/de10_nano/qconv_kn2row_tiling/soc_system.rbf b/dlk/hw/intel/de10_nano/qconv_kn2row_tiling/soc_system.rbf
diff --git a/...ython/dlk/templates/include/func/matmul.h → ...hon/dlk/templates/src/func/leaky_relu.cpp b/...ython/dlk/templates/include/func/matmul.h → ...hon/dlk/templates/src/func/leaky_relu.cpp
@@ -1,4 +1,4 @@
-/* Copyright 2018 The Blueoil Authors. All Rights Reserved.
+/* Copyright 2019 The Blueoil Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -13,14 +13,18 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef DLK_FUNC_MATMUL_H_INCLUDED
-#define DLK_FUNC_MATMUL_H_INCLUDED
-
 #include "global.h"
-#include "tensor_view.h"
+#include "func/leaky_relu.h"
+#include "time_measurement.h"
+
+void func_LeakyRelu(T_FLOAT input[], T_FLOAT output[], T_FLOAT alpha, T_UINT out_height,
+               T_UINT out_width, T_UINT out_depth) {
+  Measurement::Start("LeakyReLu");
+
+  T_UINT elements = out_height * out_width * out_depth;
 
-void func_Matmul(const TensorView<T_FLOAT, MemoryLayout::NC>& input,
-    const TensorView<T_FLOAT, MemoryLayout::NC>& factor,
-    const TensorView<T_FLOAT, MemoryLayout::NC>& output);
+  for (T_UINT i = 0; i < elements; i++)
+    output[i] = (input[i] * alpha > input[i] ? input[i] * alpha : input[i]);
 
-#endif // DLK_FUNC_MATMUL_H_INCLUDED
+  Measurement::Stop();
+}
diff --git a/dlk/tests/tstutils.py b/dlk/tests/tstutils.py
@@ -145,6 +145,8 @@ def setup_de10nano(hw_path: str, output_path: str, testcase=None):
     try:
         run_and_check(
             [ "ssh",
+             "-o",
+             "StrictHostKeyChecking no",
              f"root@{host}",
              f"mkdir -p ~/automated_testing; mkdir -p ~/boot; if grep -qs '/root/boot' /proc/mounts ;" \
              + "then echo 0 ; else mount /dev/mmcblk0p1 /root/boot ; fi"

diff --git a/lmnet/configs/core/object_detection/lm_fyolo_quantize_pascalvoc_2007_2012_tune.py b/lmnet/configs/core/object_detection/lm_fyolo_quantize_pascalvoc_2007_2012_tune.py
@@ -0,0 +1,182 @@
+# -*- coding: utf-8 -*-
+# Copyright 2019 The Blueoil Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+from easydict import EasyDict
+import tensorflow as tf
+
+from lmnet.common import Tasks
+from lmnet.networks.object_detection.lm_fyolo import LMFYoloQuantize
+from lmnet.datasets.pascalvoc_2007_2012 import Pascalvoc20072012
+from lmnet.data_processor import Sequence
+from lmnet.pre_processor import (
+    ResizeWithGtBoxes,
+    DivideBy255,
+)
+from lmnet.post_processor import (
+    FormatYoloV2,
+    ExcludeLowScoreBox,
+    NMS,
+)
+from lmnet.data_augmentor import (
+    Brightness,
+    Color,
+    Contrast,
+    FlipLeftRight,
+    Hue,
+    SSDRandomCrop,
+)
+
+from lmnet.quantizations import (
+    binary_channel_wise_mean_scaling_quantizer,
+    linear_mid_tread_half_quantizer,
+)
+
+from hyperopt import hp
+
+IS_DEBUG = False
+
+NETWORK_CLASS = LMFYoloQuantize
+DATASET_CLASS = Pascalvoc20072012
+
+IMAGE_SIZE = [320, 320]
+BATCH_SIZE = 16
+DATA_FORMAT = "NHWC"
+TASK = Tasks.OBJECT_DETECTION
+CLASSES = DATASET_CLASS.classes
+
+MAX_STEPS = 1000000
+SAVE_STEPS = 10000
+TEST_STEPS = 1000
+SUMMARISE_STEPS = 1000
+IS_DISTRIBUTION = False
+
+# for debug
+# IS_DEBUG = True
+# SUMMARISE_STEPS = 1
+# SUMMARISE_STEPS = 100
+# TEST_STEPS = 10000
+# SUMMARISE_STEPS = 100
+
+# pretrain
+IS_PRETRAIN = False
+PRETRAIN_VARS = []
+PRETRAIN_DIR = ""
+PRETRAIN_FILE = ""
+
+PRE_PROCESSOR = Sequence([
+    ResizeWithGtBoxes(size=IMAGE_SIZE),
+    DivideBy255()
+])
+anchors = [
+    (1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)
+]
+score_threshold = 0.05
+nms_iou_threshold = 0.5
+nms_max_output_size = 100
+POST_PROCESSOR = Sequence([
+    FormatYoloV2(
+        image_size=IMAGE_SIZE,
+        classes=CLASSES,
+        anchors=anchors,
+        data_format=DATA_FORMAT,
+    ),
+    ExcludeLowScoreBox(threshold=score_threshold),
+    NMS(iou_threshold=nms_iou_threshold, max_output_size=nms_max_output_size, classes=CLASSES,),
+])
+
+# Train data num per epoch is 16551
+step_per_epoch = int(16551 / BATCH_SIZE)
+
+TUNE_SPEC = {
+        'run': 'tunable_det',
+        'resources_per_trial': {"cpu": 2, "gpu": 1},
+        'stop': {
+            'mean_accuracy': 0.77,
+            'training_iteration': 777,
+        },
+        'config': {
+            'lm_config': {},
+        },
+        'local_dir': None,
+        'num_samples': 100,
+}
+
+TUNE_SPACE = {
+    'optimizer_class': hp.choice(
+        'optimizer_class', [
+            {
+                'optimizer': tf.train.MomentumOptimizer,
+                'momentum': 0.9,
+            },
+        ]
+    ),
+    'learning_rate': hp.uniform('learning_rate', 0, 0.05),
+    'learning_rate_func': hp.choice(
+        'learning_rate_func', [
+            {
+                'scheduler': tf.train.piecewise_constant,
+                'scheduler_factor': 0.1,
+                'soft_start': 5e-4,
+                'scheduler_steps': [step_per_epoch, step_per_epoch * 80, step_per_epoch * 120],
+            },
+        ]
+    ),
+}
+
+NETWORK = EasyDict()
+NETWORK.OPTIMIZER_CLASS = None
+NETWORK.OPTIMIZER_KWARGS = {}
+NETWORK.LEARNING_RATE_FUNC = None
+# In the origianl yolov2 Paper, with a starting learning rate of 10−3, dividing it by 10 at 60 and 90 epochs.
+NETWORK.LEARNING_RATE_KWARGS = {}
+NETWORK.IMAGE_SIZE = IMAGE_SIZE
+NETWORK.BATCH_SIZE = BATCH_SIZE
+NETWORK.DATA_FORMAT = DATA_FORMAT
+NETWORK.ANCHORS = anchors
+NETWORK.OBJECT_SCALE = 5.0
+NETWORK.NO_OBJECT_SCALE = 1.0
+NETWORK.CLASS_SCALE = 1.0
+NETWORK.COORDINATE_SCALE = 1.0
+NETWORK.LOSS_IOU_THRESHOLD = 0.6
+NETWORK.WEIGHT_DECAY_RATE = 0.0005
+NETWORK.SCORE_THRESHOLD = score_threshold
+NETWORK.NMS_IOU_THRESHOLD = nms_iou_threshold
+NETWORK.NMS_MAX_OUTPUT_SIZE = nms_max_output_size
+NETWORK.SEEN_THRESHOLD = 8000
+# quantization
+NETWORK.ACTIVATION_QUANTIZER = linear_mid_tread_half_quantizer
+NETWORK.ACTIVATION_QUANTIZER_KWARGS = {
+    'bit': 2,
+    'max_value': 2.0
+}
+NETWORK.WEIGHT_QUANTIZER = binary_channel_wise_mean_scaling_quantizer
+NETWORK.WEIGHT_QUANTIZER_KWARGS = {}
+NETWORK.QUANTIZE_FIRST_CONVOLUTION = True
+NETWORK.QUANTIZE_LAST_CONVOLUTION = False
+
+# dataset
+DATASET = EasyDict()
+DATASET.BATCH_SIZE = BATCH_SIZE
+DATASET.DATA_FORMAT = DATA_FORMAT
+DATASET.PRE_PROCESSOR = PRE_PROCESSOR
+DATASET.AUGMENTOR = Sequence([
+    FlipLeftRight(),
+    Brightness((0.75, 1.25)),
+    Color((0.75, 1.25)),
+    Contrast((0.75, 1.25)),
+    Hue((-10, 10)),
+    SSDRandomCrop(min_crop_ratio=0.7),
+])
+DATASET.ENABLE_PREFETCH = True
diff --git a/lmnet/executor/train.py b/lmnet/executor/train.py
@@ -43,6 +43,14 @@ def setup_dataset(config, subset, rank):
     return DatasetIterator(dataset, seed=rank, enable_prefetch=enable_prefetch)
 
 
+def setup_dataset(config, subset, rank):
+    DatasetClass = config.DATASET_CLASS
+    dataset_kwargs = dict((key.lower(), val) for key, val in config.DATASET.items())
+    dataset = DatasetClass(subset=subset, **dataset_kwargs)
+    enable_prefetch = dataset_kwargs.pop("enable_prefetch", False)
+    return DatasetIterator(dataset, seed=rank, enable_prefetch=enable_prefetch)
+
+
 def start_training(config):
     if config.IS_DISTRIBUTION:
         import horovod.tensorflow as hvd

diff --git a/lmnet/executor/tune_ray.py b/lmnet/executor/tune_ray.py
@@ -112,15 +112,17 @@ def update_parameters_for_each_trial(network_kwargs, chosen_kwargs):
     base_lr = chosen_kwargs['learning_rate']
     if network_kwargs['learning_rate_func'] is tf.train.piecewise_constant:
         lr_factor = chosen_kwargs['learning_rate_func']['scheduler_factor']
-        network_kwargs['learning_rate_kwargs']['values'] = [base_lr,
-                                                            base_lr * lr_factor,
-                                                            base_lr * lr_factor * lr_factor,
-                                                            base_lr * lr_factor * lr_factor * lr_factor]
+
+        if 'soft_start' in chosen_kwargs['learning_rate_func']:
+            num_decay_stage = len(chosen_kwargs['learning_rate_func']['scheduler_steps'])
+            lr_values = [base_lr * lr_factor ** n for n in range(num_decay_stage)]
+            lr_values.insert(0, chosen_kwargs['learning_rate_func']['soft_start'])
+        else:
+            num_decay_stage = len(chosen_kwargs['learning_rate_func']['scheduler_steps']) + 1
+            lr_values = [base_lr * lr_factor ** n for n in range(num_decay_stage)]
+
+        network_kwargs['learning_rate_kwargs']['values'] = lr_values
         network_kwargs['learning_rate_kwargs']['boundaries'] = chosen_kwargs['learning_rate_func']['scheduler_steps']
-    elif network_kwargs['learning_rate_func'] is tf.train.polynomial_decay:
-        network_kwargs['learning_rate_kwargs']['learning_rate'] = base_lr
-        network_kwargs['learning_rate_kwargs']['power'] = chosen_kwargs['learning_rate_func']['scheduler_power']
-        network_kwargs['learning_rate_kwargs']['decay_steps'] = chosen_kwargs['learning_rate_func']['scheduler_decay']
     else:
         network_kwargs['learning_rate_kwargs']['learning_rate'] = base_lr
 
@@ -232,6 +234,8 @@ def _train(self):
 
         if self.lm_config.NETWORK_CLASS.__module__.startswith("lmnet.networks.segmentation"):
             metric_accuracy = self.sess.run(self.metrics_ops_dict["mean_iou"])
+        elif self.lm_config.NETWORK_CLASS.__module__.startswith("lmnet.networks.object_detection"):
+            metric_accuracy = self.sess.run(self.metrics_ops_dict["MeanAveragePrecision_0.5"])
         else:
             metric_accuracy = self.sess.run(self.metrics_ops_dict["accuracy"])
 
@@ -271,7 +275,9 @@ def easydict_to_dict(config):
     # Expecting use of gpus to do parameter search
     ray.init(num_cpus=multiprocessing.cpu_count() // 2, num_gpus=max(get_num_gpu(), 1))
     algo = HyperOptSearch(tune_space, max_concurrent=4, reward_attr="mean_accuracy")
-    scheduler = AsyncHyperBandScheduler(time_attr="training_iteration", reward_attr="mean_accuracy", max_t=200)
+    scheduler = AsyncHyperBandScheduler(time_attr="training_iteration",
+                                        reward_attr="mean_accuracy",
+                                        max_t=tune_spec['stop']['training_iteration'])
     trials = run_experiments(experiments={'exp_tune': tune_spec},
                              search_alg=algo,
                              scheduler=scheduler)