diff --git a/deepy/dataset/__init__.py b/deepy/dataset/__init__.py index 9302fd6..61e7fd7 100644 --- a/deepy/dataset/__init__.py +++ b/deepy/dataset/__init__.py @@ -10,4 +10,6 @@ from seq_mini_batch import SequentialMiniBatches from binarized_mnist import BinarizedMnistDataset from bunch_seq import BunchSequences -from ondisk_dataset import OnDiskDataset \ No newline at end of file +from ondisk_dataset import OnDiskDataset +from data_processor import DataProcessor +from padding import pad_dataset \ No newline at end of file diff --git a/deepy/dataset/data_processor.py b/deepy/dataset/data_processor.py new file mode 100644 index 0000000..708b625 --- /dev/null +++ b/deepy/dataset/data_processor.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + + +class DataProcessor(object): + """ + An abstract class for data processor. + """ + def process(self, split, epoch, dataset): + return dataset \ No newline at end of file diff --git a/deepy/dataset/ondisk_dataset.py b/deepy/dataset/ondisk_dataset.py index c92844e..56fc49b 100644 --- a/deepy/dataset/ondisk_dataset.py +++ b/deepy/dataset/ondisk_dataset.py @@ -6,6 +6,7 @@ import types from . import Dataset +from data_processor import DataProcessor from deepy.utils import FakeGenerator, StreamPickler, global_rand import logging as loggers @@ -19,7 +20,10 @@ class OnDiskDataset(Dataset): """ def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, - cached=False, post_processing=None, shuffle_memory=False, curriculum=None): + cached=False, post_processing=None, shuffle_memory=False, data_processor=None): + """ + :type data_processor: DataProcessor + """ self._train_path = train_path self._valid_path = valid_path self._test_path = test_path @@ -28,48 +32,41 @@ def __init__(self, train_path, valid_path=None, test_path=None, train_size=None, self._cached_train_data = None self._post_processing = post_processing if post_processing else lambda x: x self._shuffle_memory = shuffle_memory - self._curriculum = curriculum - self._curriculum_count = 0 - if curriculum and not callable(curriculum): - raise Exception("curriculum function must be callable") - if curriculum and not cached: - raise Exception("curriculum learning needs training data to be cached") + self._epoch = 0 + self._data_processor = data_processor + if data_processor and not isinstance(data_processor, DataProcessor): + raise Exception("data_processor must be an instance of DataProcessor.") if self._cache_on_memory: logging.info("Cache on memory") self._cached_train_data = list(map(self._post_processing, StreamPickler.load(open(self._train_path)))) self._train_size = len(self._cached_train_data) - # if self._shuffle_memory: - # logging.info("Shuffle on-memory data") - # global_rand.shuffle(self._cached_train_data) + if self._shuffle_memory: + logging.info("Shuffle on-memory data") + global_rand.shuffle(self._cached_train_data) - def curriculum_train_data(self): - self._curriculum_count += 1 - logging.info("curriculum learning: round {}".format(self._curriculum_count)) - return self._curriculum(self._cached_train_data, self._curriculum_count) + def _process_data(self, split, epoch, dataset): + if self._data_processor: + return self._data_processor.process(split, epoch, dataset) + else: + return dataset def generate_train_data(self): - for data in StreamPickler.load(open(self._train_path)): + self._epoch += 1 + data_source = self._cached_train_data if self._cache_on_memory else StreamPickler.load(open(self._train_path)) + for data in self._process_data('train', self._epoch, data_source): yield self._post_processing(data) def generate_valid_data(self): - for data in StreamPickler.load(open(self._valid_path)): + data_source = StreamPickler.load(open(self._valid_path)) + for data in self._process_data('valid', self._epoch, data_source): yield self._post_processing(data) def generate_test_data(self): - for data in StreamPickler.load(open(self._test_path)): + data_source = StreamPickler.load(open(self._test_path)) + for data in self._process_data('test', self._epoch, data_source): yield self._post_processing(data) def train_set(self): - if self._cache_on_memory: - if self._shuffle_memory: - logging.info("shuffle on-memory data") - global_rand.shuffle(self._cached_train_data) - if self._curriculum: - if not isinstance(self._curriculum(self._cached_train_data, 1), types.GeneratorType): - raise Exception("Curriculum function must be a generator.") - return FakeGenerator(self, "curriculum_train_data") - else: - return self._cached_train_data if not self._train_path: return None return FakeGenerator(self, "generate_train_data") diff --git a/deepy/dataset/padding.py b/deepy/dataset/padding.py index 4cb3316..d5ba304 100644 --- a/deepy/dataset/padding.py +++ b/deepy/dataset/padding.py @@ -4,7 +4,7 @@ import numpy as np from deepy.utils import FLOATX -def pad_dataset(subset, side, length): +def pad_dataset(subset, side="right", length=-1): """ Pad data set to specified length. Parameters: diff --git a/deepy/layers/block.py b/deepy/layers/block.py index 76fbe74..4d9e0e3 100644 --- a/deepy/layers/block.py +++ b/deepy/layers/block.py @@ -51,6 +51,14 @@ def compute_tensor(self, x): def compute_test_tesnor(self, x): return x + def load_params(self, path, exclude_free_params=False): + """ + Load parameters to the block. + """ + from deepy.networks.comp_graph import ComputationalGraph + model = ComputationalGraph(blocks=[self]) + model.load_params(path, exclude_free_params=exclude_free_params) + @property def all_parameters(self): return self.parameters \ No newline at end of file diff --git a/deepy/layers/functions.py b/deepy/layers/functions.py index 9dcd357..b96def7 100644 --- a/deepy/layers/functions.py +++ b/deepy/layers/functions.py @@ -9,7 +9,10 @@ def concatenate(vars, axis=-1): """ A utility function of concatenate. """ - return Concatenate(axis=axis).compute(*vars) + concat_var = Concatenate(axis=axis).compute(*vars) + if axis == -1 or axis == vars[0].tensor.ndim - 1: + concat_var.output_dim = sum([x.output_dim for x in vars], 0) + return concat_var @neural_computation def ifelse(condition, then_branch, else_branch): diff --git a/deepy/layers/layer.py b/deepy/layers/layer.py index 1e176d0..033a5e2 100644 --- a/deepy/layers/layer.py +++ b/deepy/layers/layer.py @@ -93,10 +93,10 @@ def compute(self, *inputs, **kwargs): output = self.compute_tensor(*[t.tensor for t in inputs], **train_kwargs) test_output = self.compute_test_tesnor(*[t.test_tensor for t in inputs], **test_kwargs) - if type(output) != list: + if type(output) != list and type(output) != tuple: return NeuralVariable(output, test_output, self.output_dim) else: - return [NeuralVariable(*item) for item in zip(self.output_dims, output, test_output)] + return [NeuralVariable(*item) for item in zip(output, test_output, self.output_dims)] def prepare(self): """ diff --git a/deepy/layers/recurrent.py b/deepy/layers/recurrent.py index a895995..afb39a2 100644 --- a/deepy/layers/recurrent.py +++ b/deepy/layers/recurrent.py @@ -75,6 +75,22 @@ def merge_inputs(self, input_var, additional_inputs=None): def prepare(self): pass + @neural_computation + def compute_step(self, state, lstm_cell=None, input=None, additional_inputs=None): + """ + Compute one step in the RNN. + :return: one variable for RNN and GRU, multiple variables for LSTM + """ + input_map = self.merge_inputs(input, additional_inputs=additional_inputs) + input_map.update({"state": state, "lstm_cell": lstm_cell}) + output_map = self.compute_new_state(input_map) + outputs = [output_map.pop("state")] + outputs += output_map.values() + if len(outputs) == 1: + return outputs[0] + else: + return outputs + @neural_computation def get_initial_states(self, input_var): """ diff --git a/deepy/networks/comp_graph.py b/deepy/networks/comp_graph.py index 7dc7389..b38aea3 100644 --- a/deepy/networks/comp_graph.py +++ b/deepy/networks/comp_graph.py @@ -11,7 +11,7 @@ class ComputationalGraph(NeuralNetwork): """ def __init__(self, input_dim=0, model=None, input_tensor=None, monitors=None, - cost=None, output=None, outputs=None, blocks=None, input_vars=None, target_vars=None): + cost=None, output=None, outputs=None, blocks=None, input_vars=None, target_vars=None, output_map=None): """ Create a basic network. @@ -41,6 +41,9 @@ def __init__(self, input_dim=0, model=None, input_tensor=None, monitors=None, if not output and not cost: self._test_output = None self._test_outputs = [o.test_tensor for o in outputs] + + self.output_map = output_map if output_map else {} + if monitors: if type(monitors) == dict: monitors = monitors.items() diff --git a/deepy/preprocessing/__init__.py b/deepy/preprocessing/__init__.py new file mode 100644 index 0000000..7cb5f9a --- /dev/null +++ b/deepy/preprocessing/__init__.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from padding import pad_sequence \ No newline at end of file diff --git a/deepy/preprocessing/padding.py b/deepy/preprocessing/padding.py new file mode 100644 index 0000000..4dce63d --- /dev/null +++ b/deepy/preprocessing/padding.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import numpy as np +from itertools import izip, izip_longest + +def pad_sequence(batch, pad_value=0, output_mask=True, length=None): + if length: + max_len = length + else: + max_len = max(map(len, batch)) + mask = None + if output_mask: + mask = [] + for i in range(len(batch)): + mask.append([1] * len(batch[i]) + [0] * (max_len - len(batch[i]))) + mask = np.array(mask, dtype="float32") + if length: + new_batch = [] + for i in range(len(batch)): + new_row = list(batch[i]) + [pad_value] * (max_len - len(batch[i])) + new_batch.append(new_row) + new_batch = np.array(new_batch) + else: + new_batch = np.array(list(izip(*izip_longest(*batch, fillvalue=pad_value)))) + return new_batch, mask \ No newline at end of file diff --git a/deepy/trainers/base.py b/deepy/trainers/base.py index 42c0e96..a6da4a3 100644 --- a/deepy/trainers/base.py +++ b/deepy/trainers/base.py @@ -230,7 +230,7 @@ def _run_train(self, epoch, train_set, train_size=None): self.last_run_costs = costs return costs - def _run_valid(self, epoch, valid_set, dry_run=False): + def _run_valid(self, epoch, valid_set, dry_run=False, save_path=None): """ Run one valid iteration, return true if to continue training. """ @@ -246,9 +246,10 @@ def _run_valid(self, epoch, valid_set, dry_run=False): self.best_cost = J self.best_epoch = epoch - if self.config.auto_save and self._skip_batches == 0: + save_path = save_path if save_path else self.config.auto_save + if save_path and self._skip_batches == 0: self.network.train_logger.record_progress(self._progress) - self.network.save_params(self.config.auto_save, new_thread=True) + self.network.save_params(save_path, new_thread=True) info = ' '.join('%s=%.2f' % el for el in costs) epoch_str = "epoch=%d" % (epoch + 1) diff --git a/deepy/utils/activations.py b/deepy/utils/activations.py index 91b4fc3..d3b0d31 100644 --- a/deepy/utils/activations.py +++ b/deepy/utils/activations.py @@ -49,7 +49,8 @@ def compose(a, b): 'theano_softmax': T.nnet.softmax, # shorthands - 'relu': lambda z: z * (z > 0), + 'relu': lambda z: T.nnet.relu(z), + 'leaky_relu': lambda z: T.nnet.relu(z, 0.01), 'trel': lambda z: z * (z > 0) * (z < 1), 'trec': lambda z: z * (z > 1), 'tlin': lambda z: z * (abs(z) > 1), diff --git a/deepy/utils/decorations.py b/deepy/utils/decorations.py index 622ddfa..88017e4 100644 --- a/deepy/utils/decorations.py +++ b/deepy/utils/decorations.py @@ -1,13 +1,14 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from theano.tensor.var import TensorVariable + def convert_to_theano_var(obj): """ Convert neural vars to theano vars. :param obj: NeuralVariable or list or dict or tuple :return: theano var, test var, tensor found, neural var found """ - from theano.tensor.var import TensorVariable from deepy.layers.neural_var import NeuralVariable if type(obj) == tuple: return tuple(convert_to_theano_var(list(obj))) @@ -39,6 +40,18 @@ def convert_to_theano_var(obj): return obj.tensor, obj.test_tensor, False, True elif type(obj) == TensorVariable: return obj, obj, True, False + elif type(obj) == slice: + normal_args = [] + test_args = [] + theano_var_found = False + neural_var_found = False + for arg in [obj.start, obj.stop, obj.step]: + normal_var, test_var, tensor_found, neural_found = convert_to_theano_var(arg) + normal_args.append(normal_var) + test_args.append(test_var) + if tensor_found: theano_var_found = True + if neural_found: neural_var_found = True + return slice(*normal_args), slice(*test_args), theano_var_found, neural_var_found else: return obj, obj, False, False @@ -74,7 +87,6 @@ def neural_computation(original_func, prefer_tensor=False): """ def wrapper(*args, **kwargs): - normal_args, test_args, tensor_found_in_args, neural_found_in_args = convert_to_theano_var(args) normal_kwargs, test_kwargs, tensor_found_in_kwargs, neural_found_in_kwargs = convert_to_theano_var(kwargs) @@ -90,10 +102,15 @@ def wrapper(*args, **kwargs): # No neural variables are inputted, so output tensors return normal_result else: - # Output neural variables + # Output neural variables, auto set output_dim test_result = original_func(*test_args, **test_kwargs) - return convert_to_neural_var(normal_result, test_result) - + result_var = convert_to_neural_var(normal_result, test_result) + if (isinstance(normal_result, TensorVariable) and + hasattr(normal_result.tag, "test_value") and + hasattr(normal_result.tag.test_value, "shape") and + normal_result.tag.test_value.shape): + result_var.output_dim = normal_result.tag.test_value.shape[-1] + return result_var return wrapper def neural_computation_prefer_tensor(original_func): diff --git a/deepy/utils/neural_tensor.py b/deepy/utils/neural_tensor.py index 60fb1d5..96626c9 100644 --- a/deepy/utils/neural_tensor.py +++ b/deepy/utils/neural_tensor.py @@ -1,8 +1,10 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import theano.tensor as T from theano import tensor as theano_tensor from decorations import neural_computation +from deepy.layers.neural_var import NeuralVariable class NeuralTensorNet(object): @@ -17,8 +19,12 @@ def wrapper(*args, **kwargs): class NeuralTensor(object): """ A class for exporting Theano tensor operations to neural variables. + """ + def constant(self, value, dtype="float32", dim=None): + return NeuralVariable(T.constant(value, dtype=dtype), dim=dim) + def __getattr__(self, func_name): global deepy_nnet @neural_computation