-
Notifications
You must be signed in to change notification settings - Fork 133
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix rom dataobj #2051
base: devel
Are you sure you want to change the base?
Fix rom dataobj #2051
Changes from all commits
d55f078
fdf98df
415bf7f
47fa43d
28c6855
b39f8ec
76a9732
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -107,26 +107,28 @@ def __init__(self): | |
@ Out, None | ||
""" | ||
super().__init__() | ||
self.amITrained = False # boolean flag, is the ROM trained? | ||
self.printTag = 'ROM MODEL' # label | ||
self.cvInstanceName = None # the name of Cross Validation instance | ||
self.cvInstance = None # Instance of provided cross validation | ||
self._estimatorNameList = [] # the name list of estimator instance | ||
self._estimatorList = [] # List of instances of provided estimators (ROM) | ||
self._interfaceROM = None # Instance of provided ROM | ||
|
||
self.pickled = False # True if ROM comes from a pickled rom | ||
self.pivotParameterId = 'time' # The name of pivot parameter | ||
self.canHandleDynamicData = False # check if the model can autonomously handle the time-dependency | ||
# if not and time-dep data are passed in, a list of ROMs are constructed | ||
self.isADynamicModel = False # True if the ROM is time-dependent | ||
self.supervisedContainer = [] # List ROM instances | ||
self.historySteps = [] # The history steps of pivot parameter | ||
self.segment = False # True if segmenting/clustering/interpolating is requested | ||
self.numThreads = 1 # number of threads used by the ROM | ||
self.seed = None # seed information | ||
self._segmentROM = None # segment rom instance | ||
self._paramInput = None # the parsed xml input | ||
self.amITrained = False # boolean flag, is the ROM trained? | ||
self.printTag = 'ROM MODEL' # label | ||
self.cvInstanceName = None # the name of Cross Validation instance | ||
self.cvInstance = None # Instance of provided cross validation | ||
self._estimatorNameList = [] # the name list of estimator instance | ||
self._estimatorList = [] # List of instances of provided estimators (ROM) | ||
self._interfaceROM = None # Instance of provided ROM | ||
self.trainingSet = None # Data or instance from which this was trained | ||
|
||
self.pickled = False # True if ROM comes from a pickled rom | ||
self.pivotParameterId = 'time' # The name of pivot parameter | ||
self.canHandleDynamicData = False # check if the model can autonomously handle the time-dependency | ||
# if not and time-dep data are passed in, a list of ROMs are constructed | ||
|
||
self.isADynamicModel = False # True if the ROM is time-dependent | ||
self.supervisedContainer = [] # List ROM instances | ||
self.historySteps = [] # The history steps of pivot parameter | ||
self.segment = False # True if segmenting/clustering/interpolating is requested | ||
self.numThreads = 1 # number of threads used by the ROM | ||
self.seed = None # seed information | ||
self._segmentROM = None # segment rom instance | ||
self._paramInput = None # the parsed xml input | ||
|
||
# for Clustered ROM | ||
self.addAssemblerObject('Classifier', InputData.Quantity.zero_to_one) | ||
|
@@ -347,66 +349,101 @@ def train(self,trainingSet): | |
@ In, trainingSet, dict or PointSet or HistorySet, data used to train the ROM; if an HistorySet is provided the a list of ROM is created in order to create a temporal-ROM | ||
@ Out, None | ||
""" | ||
if type(trainingSet).__name__ == 'ROM': | ||
self.trainingSet = copy.copy(trainingSet.trainingSet) | ||
self.amITrained = copy.deepcopy(trainingSet.amITrained) | ||
self.supervisedContainer = copy.deepcopy(trainingSet.supervisedContainer) | ||
self.seed = trainingSet.seed | ||
if isinstance(trainingSet, ROM): | ||
self.trainFromInstance(trainingSet) | ||
else: | ||
# TODO: The following check may need to be moved to Dummy Class -- wangc 7/30/2018 | ||
if type(trainingSet).__name__ != 'dict' and trainingSet.type == 'HistorySet': | ||
if type(trainingSet) != dict and trainingSet.type == 'HistorySet': | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. First, could you add a description to list all possible data structures for trainingSet? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like a specific check for history set alignment, right? I don't know if we need to find out all the different approaches to ROMs within this PR, do we? This sounds like a bigger issue. |
||
if not trainingSet.checkIndexAlignment(indexesToCheck=self.pivotParameterId): | ||
self.raiseAnError(IOError, "The data provided by the data object", trainingSet.name, "is not synchonized!", | ||
"The time-dependent ROM requires all the histories are synchonized!") | ||
self.raiseAnError( | ||
IOError, | ||
f"The data provided by the data object {trainingSet.name}, is not synchonized!", | ||
"The time-dependent ROM requires all the histories are synchonized!" | ||
) | ||
self.trainingSet = copy.copy(self._inputToInternal(trainingSet)) | ||
self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False) | ||
if not self.supervisedContainer[0].requireJobHandler and 'jobHandler' in self.assemblerDict: | ||
self.assemblerDict.pop("jobHandler") | ||
|
||
# LEGACY SupervisedLearning (SVL) objects train on dictionaries/matrices | ||
# New SVL can bypass the data manip and use the dataset directly | ||
useDict = self.supervisedContainer[0].needsDictTraining | ||
if useDict: | ||
self.trainingSet = copy.copy(self._inputToInternal(trainingSet)) | ||
else: | ||
self.trainingSet = trainingSet | ||
|
||
self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you check to see if this line works with your proposed data structure? In Model.py, this method only accept dict or list as input. |
||
self.supervisedContainer[0].setAssembledObjects(self.assemblerDict) | ||
# if training using ROMCollection, special treatment | ||
if self.segment: | ||
|
||
if not useDict or self.segment: | ||
self.supervisedContainer[0].train(self.trainingSet) | ||
else: | ||
# not a collection # TODO move time-dependent snapshots to collection! | ||
## time-dependent or static ROM? | ||
if any(type(x).__name__ == 'list' for x in self.trainingSet.values()): | ||
# we need to build a "time-dependent" ROM | ||
self.isADynamicModel = True | ||
if self.pivotParameterId not in list(self.trainingSet.keys()): | ||
self.raiseAnError(IOError, 'The pivot parameter "{}" is not present in the training set.'.format(self.pivotParameterId), | ||
'A time-dependent-like ROM cannot be created!') | ||
if type(self.trainingSet[self.pivotParameterId]).__name__ != 'list': | ||
self.raiseAnError(IOError, 'The pivot parameter "{}" is not a list.'.format(self.pivotParameterId), | ||
" Are you sure it is part of the output space of the training set?") | ||
self.historySteps = self.trainingSet.get(self.pivotParameterId)[-1] | ||
if not len(self.historySteps): | ||
self.raiseAnError(IOError, "the training set is empty!") | ||
# intrinsically time-dependent or does the Gate need to handle it? | ||
if self.canHandleDynamicData: | ||
# the ROM is able to manage the time dependency on its own | ||
self.supervisedContainer[-1].train(self.trainingSet) | ||
else: | ||
# TODO we can probably migrate this time-dependent handling to a type of ROMCollection! | ||
# we need to construct a chain of ROMs | ||
# the check on the number of time steps (consistency) is performed inside the historySnapShoots method | ||
# get the time slices | ||
newTrainingSet = mathUtils.historySnapShoots(self.trainingSet, len(self.historySteps)) | ||
assert type(newTrainingSet).__name__ == 'list' | ||
# copy the original ROM | ||
originalROM = self.supervisedContainer[0] | ||
# start creating and training the time-dep ROMs | ||
self.supervisedContainer = [copy.deepcopy(originalROM) for _ in range(len(self.historySteps))] | ||
# train | ||
for ts in range(len(self.historySteps)): | ||
self.supervisedContainer[ts].train(newTrainingSet[ts]) | ||
# if a static ROM ... | ||
else: | ||
#self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False) | ||
self.supervisedContainer[0].train(self.trainingSet) | ||
# END if ROMCollection | ||
self.trainLegacy() | ||
self.amITrained = True | ||
|
||
def trainFromInstance(self, rom): | ||
""" | ||
Trains ROM model from an already-trained ROM instance | ||
@ In, rom, ROM, rom instance | ||
@ Out, None | ||
""" | ||
self.trainingSet = copy.copy(rom.trainingSet) | ||
self.amITrained = copy.deepcopy(rom.amITrained) | ||
self.supervisedContainer = copy.deepcopy(rom.supervisedContainer) | ||
self.seed = rom.seed | ||
|
||
def trainLegacy(self): | ||
""" | ||
Train SLVs in the legacy style, with data manipulations and dynamic behavior checking | ||
@ In, None | ||
@ Out, None | ||
""" | ||
## time-dependent or static ROM? | ||
if any(isinstance(x, list) for x in self.trainingSet.values()): | ||
# we need to build a "time-dependent" ROM | ||
self.isADynamicModel = True | ||
|
||
if self.pivotParameterId not in list(self.trainingSet.keys()): | ||
self.raiseAnError( | ||
IOError, | ||
f'The pivot parameter "{self.pivotParameterId}" is not present in the training set. ', | ||
'A time-dependent-like ROM cannot be created!' | ||
) | ||
|
||
if type(self.trainingSet[self.pivotParameterId]).__name__ != 'list': | ||
self.raiseAnError( | ||
IOError, | ||
f'The pivot parameter "{self.pivotParameterId}" is not a list. ', | ||
'Are you sure it is part of the output space of the training set?' | ||
) | ||
|
||
self.historySteps = self.trainingSet.get(self.pivotParameterId)[-1] | ||
|
||
if not len(self.historySteps): | ||
self.raiseAnError(IOError, "the training set is empty!") | ||
|
||
# intrinsically time-dependent or does the Gate need to handle it? | ||
if self.canHandleDynamicData: | ||
# the ROM is able to manage the time dependency on its own | ||
self.supervisedContainer[-1].train(self.trainingSet, indexMap=self.trainingSet.get('_indexMap', None)) | ||
else: | ||
# TODO we can probably migrate this time-dependent handling to a type of ROMCollection! | ||
# we need to construct a chain of ROMs | ||
# the check on the number of time steps (consistency) is performed inside the historySnapShoots method | ||
# get the time slices | ||
newTrainingSet = mathUtils.historySnapShoots(self.trainingSet, len(self.historySteps)) | ||
assert type(newTrainingSet).__name__ == 'list' | ||
# copy the original ROM | ||
originalROM = self.supervisedContainer[0] | ||
# start creating and training the time-dep ROMs | ||
self.supervisedContainer = [copy.deepcopy(originalROM) for _ in range(len(self.historySteps))] | ||
for ts in range(len(self.historySteps)): | ||
self.supervisedContainer[ts].train(newTrainingSet[ts]) | ||
else: | ||
self.supervisedContainer[0].train(self.trainingSet) | ||
|
||
|
||
def confidence(self,request,target = None): | ||
""" | ||
This is to get a value that is inversely proportional to the confidence that we have | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I realize
originalVariables
is a deepcopy ofsampledVars
, but it would be nice if this set ofif isinstance
checked on the same variable instead of the two different ones.