Merge pull request #82 from ruimashita/learning-rate-schedule

Learning rate schedule
blue-oil · Dec 27, 2018 · 8529940 · 8529940
2 parents 6795610 + 5c0ea16
commit 8529940
Show file tree

Hide file tree

Showing 15 changed files with 181 additions and 156 deletions.
diff --git a/blueoil/blueoil_init.py b/blueoil/blueoil_init.py
@@ -15,6 +15,7 @@
 # =============================================================================
 import inspect
 import re
+from collections import OrderedDict
 
 import whaaaaat
 from jinja2 import Environment, FileSystemLoader
@@ -101,6 +102,14 @@
 # ]
 
 
+learning_rate_schedule_map = OrderedDict([
+    ("constant", "'constant' -> constant learning rate."),
+    ("2-step-decay", "'2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1."),
+    ("3-step-decay", "'3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1"),
+    ("3-step-decay-with-warmup", "'3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'"),
+])
+
+
 def network_name_choices(task_type):
     if task_type == 'classification':
         return [definition['name'] for definition in classification_network_definitions]
@@ -291,21 +300,19 @@ def ask_questions():
     }
     initial_learning_rate_value = prompt(initial_learning_rate_value_question)
 
-    training_learning_rate_question = {
+    # learning rate schedule
+    learning_rate_schedule_question = {
         'type': 'rawlist',
         'name': 'value',
-        'message': 'choose learning rate setting(tune1 / tune2 / tune3 / fixed):',
-        'choices': ['tune1 -> "2 times decay"', 'tune2 -> "3 times decay"', 'tune3 -> "warm-up and 3 times decay"', 'fixed'],
-        'default': 'tune1 -> "2 times decay"',
-    }
-    choices_key_map = {
-        'tune1 -> "2 times decay"': 'tune1',
-        'tune2 -> "3 times decay"': 'tune2',
-        'tune3 -> "warm-up and 3 times decay"': 'tune3',
-        'fixed': 'fixed',
+        'message': 'choose learning rate schedule \
+({epochs} is the number of training epochs you entered before):',
+        'choices': list(learning_rate_schedule_map.values()),
+        'default': learning_rate_schedule_map["constant"],
     }
-    tmp_learning_rate_setting = prompt(training_learning_rate_question)
-    training_learning_rate_setting = choices_key_map[tmp_learning_rate_setting]
+    _tmp_learning_rate_schedule = prompt(learning_rate_schedule_question)
+    for key, value in learning_rate_schedule_map.items():
+        if value == _tmp_learning_rate_schedule:
+            learning_rate_schedule = key
 
     if prompt(enable_data_augmentation):
         all_augmentor = {}

diff --git a/blueoil/generate_lmnet_config.py b/blueoil/generate_lmnet_config.py
@@ -16,10 +16,14 @@
 import argparse
 import os
 import re
+import importlib
 
 import yaml
 from jinja2 import Environment, FileSystemLoader
 
+from lmnet.utils.module_loader import load_class
+
+
 # TODO(wakisaka): objecte detection, segmentation
 _TASK_TYPE_TEMPLATE_FILE = {
     "classification": "classification.tpl.py",
@@ -132,12 +136,77 @@ def _blueoil_to_lmnet(blueoil_config):
     else:
         dataset_class_property = {"extend_dir": dataset_class_extend_dir}
 
+    # load dataset python module from string.
+    _loaded_dataset_module = importlib.import_module("lmnet.datasets.{}".format(dataset_module))
+    # load dataset python module from string.
+    _loaded_dataset_class = load_class(_loaded_dataset_module, dataset_class)
+    _dataset_class = type('DATASET_CLASS', (_loaded_dataset_class,), dataset_class_property)
+    _dataset_obj = _dataset_class(subset="train", batch_size=1)
+    classes = _dataset_obj.classes
+
     # trainer
     batch_size = blueoil_config["trainer"]["batch_size"]
     initial_learning_rate = blueoil_config["trainer"]["initial_learning_rate"]
-    learning_rate_setting = blueoil_config["trainer"]["learning_rate_setting"]
+    learning_rate_schedule = blueoil_config["trainer"]["learning_rate_schedule"]
     max_epochs = blueoil_config["trainer"]["epochs"]
 
+    step_per_epoch = float(_dataset_obj.num_per_epoch)/batch_size
+
+    learning_rate_func = None
+    learning_rate_kwargs = None
+    if learning_rate_schedule == "constant":
+        optimizer_kwargs = {"momentum": 0.9, "learning_rate": initial_learning_rate}
+    else:
+        optimizer_kwargs = {"momentum": 0.9}
+        learning_rate_func = "tf.train.piecewise_constant"
+
+    if learning_rate_schedule == "2-step-decay":
+        learning_rate_kwargs = {
+            "values": [
+                initial_learning_rate,
+                initial_learning_rate / 10,
+                initial_learning_rate / 100
+            ],
+            "boundaries": [
+                int((step_per_epoch * (max_epochs - 1)) / 2),
+                int(step_per_epoch * (max_epochs - 1))
+            ],
+        }
+
+    elif learning_rate_schedule == "3-step-decay":
+        learning_rate_kwargs = {
+            "values": [
+                initial_learning_rate,
+                initial_learning_rate / 10,
+                initial_learning_rate / 100,
+                initial_learning_rate / 1000
+            ],
+            "boundaries": [
+                int((step_per_epoch * (max_epochs - 1)) * 1 / 3),
+                int((step_per_epoch * (max_epochs - 1)) * 2 / 3),
+                int(step_per_epoch * (max_epochs - 1))
+            ],
+        }
+
+    elif learning_rate_schedule == "3-step-decay-with-warmup":
+        if max_epochs < 4:
+            raise ValueError("epoch number must be >= 4, when 3-step-decay-with-warmup is selected.")
+        learning_rate_kwargs = {
+            "values": [
+                initial_learning_rate / 1000,
+                initial_learning_rate,
+                initial_learning_rate / 10,
+                initial_learning_rate / 100,
+                initial_learning_rate / 1000
+            ],
+            "boundaries": [
+                int(step_per_epoch * 1),
+                int((step_per_epoch * (max_epochs - 1)) * 1 / 3),
+                int((step_per_epoch * (max_epochs - 1)) * 2 / 3),
+                int(step_per_epoch * (max_epochs - 1))
+            ],
+        }
+
     # common
     image_size = blueoil_config["common"]["image_size"]
 
@@ -167,10 +236,13 @@ def _blueoil_to_lmnet(blueoil_config):
 
         "batch_size": batch_size,
         "max_epochs": max_epochs,
-        "initial_learning_rate": initial_learning_rate,
-        "learning_rate_setting": learning_rate_setting,
+
+        "optimizer_kwargs": optimizer_kwargs,
+        "learning_rate_func": learning_rate_func,
+        "learning_rate_kwargs": learning_rate_kwargs,
 
         "image_size": image_size,
+        "classes": classes,
 
         "quantize_first_convolution": quantize_first_convolution,
 

diff --git a/blueoil/templates/blueoil-config.tpl.yml b/blueoil/templates/blueoil-config.tpl.yml
@@ -11,12 +11,12 @@ dataset:
 trainer:
   batch_size: {{ batch_size }}
   epochs: {{ training_epochs }}
-# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.
-#   'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.
-#   'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.
-#   'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.
-#   'fixed' is constant learning rate.
-  learning_rate_setting: {{ training_learning_rate_setting }}
+  # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before).
+  #   'constant' -> constant learning rate.
+  #   '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
+  #   '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1.
+  #   '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'.
+  learning_rate_schedule: {{ learning_rate_schedule }}
   initial_learning_rate: {{ initial_learning_rate_value }}
 
 network:

diff --git a/blueoil/templates/lmnet/classification.tpl.py b/blueoil/templates/lmnet/classification.tpl.py
@@ -44,10 +44,7 @@
 BATCH_SIZE = {{batch_size}}
 DATA_FORMAT = "NHWC"
 TASK = Tasks.CLASSIFICATION
-# In order to get instance property `classes`, instantiate DATASET_CLASS.
-dataset_obj = DATASET_CLASS(subset="train", batch_size=1)
-CLASSES = dataset_obj.classes
-step_per_epoch = float(dataset_obj.num_per_epoch)/BATCH_SIZE
+CLASSES = {{classes}}
 
 MAX_EPOCHS = {{max_epochs}}
 SAVE_STEPS = {{save_steps}}
@@ -71,32 +68,9 @@
 
 NETWORK = EasyDict()
 NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer
-
-if '{{learning_rate_setting}}' != 'fixed':
-    NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9}
-    NETWORK.LEARNING_RATE_FUNC = tf.train.piecewise_constant
-
-if '{{learning_rate_setting}}' == 'tune1':
-    NETWORK.LEARNING_RATE_KWARGS = {
-        "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100],
-        "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) / 2), int(step_per_epoch * (MAX_EPOCHS - 1))],
-    }
-elif '{{learning_rate_setting}}' == 'tune2':
-    NETWORK.LEARNING_RATE_KWARGS = {
-        "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000],
-        "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))],
-    }
-elif '{{learning_rate_setting}}' == 'tune3':
-    if MAX_EPOCHS < 4:
-        raise ValueError("epoch number must be >= 4, when tune3 is selected.")
-    NETWORK.LEARNING_RATE_KWARGS = {
-        "values": [{{initial_learning_rate}} / 1000, {{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000],
-        "boundaries": [int(step_per_epoch * 1), int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))],
-    }
-elif '{{learning_rate_setting}}' == 'fixed':
-    NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9, "learning_rate": {{initial_learning_rate}}}
-else:
-    raise ValueError
+NETWORK.OPTIMIZER_KWARGS = {{optimizer_kwargs}}
+NETWORK.LEARNING_RATE_FUNC = {{learning_rate_func}}
+NETWORK.LEARNING_RATE_KWARGS = {{learning_rate_kwargs}}
 
 NETWORK.IMAGE_SIZE = IMAGE_SIZE
 NETWORK.BATCH_SIZE = BATCH_SIZE

diff --git a/blueoil/templates/lmnet/object_detection.tpl.py b/blueoil/templates/lmnet/object_detection.tpl.py
@@ -49,10 +49,7 @@
 BATCH_SIZE = {{batch_size}}
 DATA_FORMAT = "NHWC"
 TASK = Tasks.OBJECT_DETECTION
-# In order to get instance property `classes`, instantiate DATASET_CLASS.
-dataset_obj = DATASET_CLASS(subset="train", batch_size=1)
-CLASSES = dataset_obj.classes
-step_per_epoch = float(dataset_obj.num_per_epoch)/BATCH_SIZE
+CLASSES = {{classes}}
 
 MAX_EPOCHS = {{max_epochs}}
 SAVE_STEPS = {{save_steps}}
@@ -90,39 +87,10 @@
 ])
 
 NETWORK = EasyDict()
-
-if '{{optimizer}}' == 'GradientDescentOptimizer':
-    NETWORK.OPTIMIZER_CLASS = tf.train.GradientDescentOptimizer
-elif '{{optimizer}}' == 'MomentumOptimizer':
-    NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer
-    NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9}
-elif '{{optimizer}}' == 'AdamOptimizer':
-    NETWORK.OPTIMIZER_CLASS = tf.train.AdamOptimizer
-
-if '{{learning_rate_setting}}' != 'fixed':
-    NETWORK.LEARNING_RATE_FUNC = tf.train.piecewise_constant
-
-if '{{learning_rate_setting}}' == 'tune1':
-    NETWORK.LEARNING_RATE_KWARGS = {
-        "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100],
-        "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) / 2), int(step_per_epoch * (MAX_EPOCHS - 1))],
-    }
-elif '{{learning_rate_setting}}' == 'tune2':
-    NETWORK.LEARNING_RATE_KWARGS = {
-        "values": [{{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000],
-        "boundaries": [int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))],
-    }
-elif '{{learning_rate_setting}}' == 'tune3':
-    if MAX_EPOCHS < 4:
-        raise ValueError("epoch number must be >= 4, when tune3 is selected.")
-    NETWORK.LEARNING_RATE_KWARGS = {
-        "values": [{{initial_learning_rate}} / 1000, {{initial_learning_rate}}, {{initial_learning_rate}} / 10, {{initial_learning_rate}} / 100, {{initial_learning_rate}} / 1000],
-        "boundaries": [int(step_per_epoch * 1), int((step_per_epoch * (MAX_EPOCHS - 1)) * 1 / 3), int((step_per_epoch * (MAX_EPOCHS - 1)) * 2 / 3), int(step_per_epoch * (MAX_EPOCHS - 1))],
-    }
-elif '{{learning_rate_setting}}' == 'fixed':
-    NETWORK.OPTIMIZER_KWARGS = {"momentum": 0.9, "learning_rate": {{initial_learning_rate}}}
-else:
-    raise ValueError
+NETWORK.OPTIMIZER_CLASS = tf.train.MomentumOptimizer
+NETWORK.OPTIMIZER_KWARGS = {{optimizer_kwargs}}
+NETWORK.LEARNING_RATE_FUNC = {{learning_rate_func}}
+NETWORK.LEARNING_RATE_KWARGS = {{learning_rate_kwargs}}
 
 NETWORK.IMAGE_SIZE = IMAGE_SIZE
 NETWORK.BATCH_SIZE = BATCH_SIZE

diff --git a/docs/usage/init.md b/docs/usage/init.md
@@ -22,7 +22,7 @@ This is an example of configuration.
   image size (integer x integer):  32x32
   how many epochs do you run training (integer):  100
   initial learning rate: 0.001
-  message': 'choose learning rate setting(tune1 / tune2 / tune3 / fixed): tune1 -> "2 times decay"
+  choose learning rate schedule ({epochs} is the number of training epochs you entered before):  '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
   enable data augmentation?  No
   apply quantization at the first layer: yes
 ```

diff --git a/tests/config/caltech101_classification.yml b/tests/config/caltech101_classification.yml
@@ -11,12 +11,12 @@ dataset:
 trainer:
   batch_size: 1
   epochs: 1
-# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.
-#   'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.
-#   'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.
-#   'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.
-#   'fixed' is constant learning rate.
-  learning_rate_setting: tune1
+  # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before).
+  #   'constant' -> constant learning rate.
+  #   '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
+  #   '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1.
+  #   '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'.
+  learning_rate_schedule: constant
   initial_learning_rate: 0.001
 
 network:

diff --git a/tests/config/caltech101_classification_has_validation.yml b/tests/config/caltech101_classification_has_validation.yml
@@ -11,12 +11,12 @@ dataset:
 trainer:
   batch_size: 1
   epochs: 1
-# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.
-#   'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.
-#   'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.
-#   'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.
-#   'fixed' is constant learning rate.
-  learning_rate_setting: tune1
+  # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before).
+  #   'constant' -> constant learning rate.
+  #   '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
+  #   '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1.
+  #   '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'.
+  learning_rate_schedule: constant
   initial_learning_rate: 0.001
 
 network:

diff --git a/tests/config/delta_mark_classification.yml b/tests/config/delta_mark_classification.yml
@@ -11,12 +11,12 @@ dataset:
 trainer:
   batch_size: 1
   epochs: 1
-# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.
-#   'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.
-#   'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.
-#   'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.
-#   'fixed' is constant learning rate.
-  learning_rate_setting: tune1
+  # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before).
+  #   'constant' -> constant learning rate.
+  #   '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
+  #   '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1.
+  #   '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'.
+  learning_rate_schedule: constant
   initial_learning_rate: 0.001
 
 network:

diff --git a/tests/config/delta_mark_classification_has_validation.yml b/tests/config/delta_mark_classification_has_validation.yml
@@ -11,12 +11,12 @@ dataset:
 trainer:
   batch_size: 1
   epochs: 1
-# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.
-#   'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.
-#   'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.
-#   'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.
-#   'fixed' is constant learning rate.
-  learning_rate_setting: tune1
+  # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before).
+  #   'constant' -> constant learning rate.
+  #   '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
+  #   '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1.
+  #   '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'.
+  learning_rate_schedule: constant
   initial_learning_rate: 0.001
 
 network:

diff --git a/tests/config/delta_mark_object_detection.yml b/tests/config/delta_mark_object_detection.yml
@@ -11,12 +11,12 @@ dataset:
 trainer:
   batch_size: 1
   epochs: 1
-# supported 'learning_rate_setting' is 'tune1', 'tune2', 'tune3', 'fixed'.
-#   'tune1' is 2 times decay, learning rate reduce to 1/10 on epoch/2 and epoch-1.
-#   'tune2' is 3 times decay, learning rate reduce to 1/10 on epoch/3 and epoch*2/3 and epoch-1.
-#   'tune3' is warmup and 3 times decay, warmup learning rate 1/1000 in 1 epoch, then train same as 'tune2'.
-#   'fixed' is constant learning rate.
-  learning_rate_setting: tune1
+  # supported 'learning_rate_schedule' is 'constant', '2-step-decay', '3-step-decay', '3-step-decay-with-warmup' ({epochs} is the number of training epochs you entered before).
+  #   'constant' -> constant learning rate.
+  #   '2-step-decay' -> learning rate decrease by 1/10 on {epochs}/2 and {epochs}-1.
+  #   '3-step-decay' -> learning rate decrease by 1/10 on {epochs}/3 and {epochs}*2/3 and {epochs}-1.
+  #   '3-step-decay-with-warmup' -> warmup learning rate 1/1000 in first epoch, then train the same way as '3-step-decay'.
+  learning_rate_schedule: constant
   initial_learning_rate: 0.001
 
 network: