Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: enable I/O of SBO terms from SBML file #235

Merged
merged 7 commits into from
Sep 19, 2019
45 changes: 44 additions & 1 deletion io/exportModel.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
% exportModel
% Exports a constraint-based model to an SBML file (L3V1 FBCv2)
%
% Input:
% model a model structure
% fileName filename to export the model to (without file extension)
% exportGeneComplexes true if gene complexes (all gene sets linked with
Expand Down Expand Up @@ -221,6 +222,16 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
modelSBML.compartment(i).metaid=model.comps{i};
end
%Prepare Miriam strings
if ~isempty(model.compMiriams{i})
[~,sbo_ind] = ismember('sbo',model.compMiriams{i}.name);
if sbo_ind > 0
modelSBML.compartment(i).sboTerm=str2double(regexprep(model.compMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
% remove the SBO term from compMiriams so the information is
% not duplicated in the "annotation" field later on
model.compMiriams{i}.name(sbo_ind) = [];
model.compMiriams{i}.value(sbo_ind) = [];
end
end
if ~isempty(model.compMiriams{i}) && isfield(modelSBML.compartment(i),'annotation')
modelSBML.compartment(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.comps{i} '">'];
modelSBML.compartment(i).annotation=[modelSBML.compartment(i).annotation '<bqbiol:is><rdf:Bag>'];
Expand Down Expand Up @@ -286,6 +297,16 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
modelSBML.species(i).isSetfbc_charge=0;
end
end
if ~isempty(model.metMiriams{i})
[~,sbo_ind] = ismember('sbo',model.metMiriams{i}.name);
if sbo_ind > 0
modelSBML.species(i).sboTerm=str2double(regexprep(model.metMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
% remove the SBO term from metMiriams so the information is
% not duplicated in the "annotation" field later on
model.metMiriams{i}.name(sbo_ind) = [];
model.metMiriams{i}.value(sbo_ind) = [];
end
end
if isfield(modelSBML.species,'annotation')
if ~isempty(model.metMiriams{i}) || ~isempty(model.metFormulas{i})
hasInchi=false;
Expand Down Expand Up @@ -319,7 +340,7 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
%Add the default values, as these will be the same in all entries
if i==1
if isfield(modelSBML.fbc_geneProduct, 'sboTerm')
modelSBML.fbc_geneProduct(i).sboTerm=252;
modelSBML.fbc_geneProduct(i).sboTerm=243;
end
end
%Copy the default values to the next index as long as it is not the
Expand All @@ -331,6 +352,16 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
if isfield(modelSBML.fbc_geneProduct,'metaid')
modelSBML.fbc_geneProduct(i).metaid=model.genes{i};
end
if ~isempty(model.geneMiriams{i})
[~,sbo_ind] = ismember('sbo',model.geneMiriams{i}.name);
if sbo_ind > 0
modelSBML.fbc_geneProduct(i).sboTerm=str2double(regexprep(model.geneMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
% remove the SBO term from compMiriams so the information is
% not duplicated in the "annotation" field later on
model.geneMiriams{i}.name(sbo_ind) = [];
model.geneMiriams{i}.value(sbo_ind) = [];
end
end
if ~isempty(model.geneMiriams{i}) && isfield(modelSBML.fbc_geneProduct(i),'annotation')
modelSBML.fbc_geneProduct(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_' model.genes{i} '">'];
modelSBML.fbc_geneProduct(i).annotation=[modelSBML.fbc_geneProduct(i).annotation '<bqbiol:is><rdf:Bag>'];
Expand Down Expand Up @@ -453,6 +484,18 @@ function exportModel(model,fileName,exportGeneComplexes,supressWarnings)
modelSBML.reaction(i).notes=[modelSBML.reaction(i).notes '</body></notes>'];
end

% Export SBO terms from rxnMiriams
if ~isempty(model.rxnMiriams{i})
[~,sbo_ind] = ismember('sbo',model.rxnMiriams{i}.name);
if sbo_ind > 0
modelSBML.reaction(i).sboTerm=str2double(regexprep(model.rxnMiriams{i}.value{sbo_ind},'SBO:','','ignorecase'));
% remove the SBO term from rxnMiriams so the information is not
% duplicated in the "annotation" field later on
model.rxnMiriams{i}.name(sbo_ind) = [];
model.rxnMiriams{i}.value(sbo_ind) = [];
end
end

%Export annotation information from rxnMiriams
if (~isempty(model.rxnMiriams{i}) && isfield(modelSBML.reaction(i),'annotation')) || ~isempty(model.eccodes{i})
modelSBML.reaction(i).annotation=['<annotation><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:vCard="http://www.w3.org/2001/vcard-rdf/3.0#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/" xmlns:bqmodel="http://biomodels.net/model-qualifiers/"><rdf:Description rdf:about="#meta_R_' model.rxns{i} '">'];
Expand Down
90 changes: 88 additions & 2 deletions io/importModel.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
% importModel
% Import a constraint-based model from a SBML file
%
% Input:
% fileName a SBML file to import
% removeExcMets true if exchange metabolites should be removed. This is
% needed to be able to run simulations, but it could also
Expand All @@ -12,6 +13,7 @@
% supressWarnings true if warnings regarding the model structure should
% be supressed (opt, default false)
%
% Output:
% model
% id model ID
% description description of model contents
Expand Down Expand Up @@ -162,6 +164,11 @@
compartmentOutside=cell(numel(modelSBML.compartment),1);
compartmentMiriams=cell(numel(modelSBML.compartment),1);

if isfield(modelSBML.compartment,'sboTerm') && numel(unique([modelSBML.compartment.sboTerm])) == 1
%If all the SBO terms are identical, don't add them to compMiriams
modelSBML.compartment = rmfield(modelSBML.compartment,'sboTerm');
end

for i=1:numel(modelSBML.compartment)
compartmentNames{i}=modelSBML.compartment(i).name;
compartmentIDs{i}=regexprep(modelSBML.compartment(i).id,'^C_','');
Expand All @@ -180,6 +187,10 @@
else
compartmentMiriams{i}=[];
end

if isfield(modelSBML.compartment(i),'sboTerm')
compartmentMiriams{i} = addSBOtoMiriam(compartmentMiriams{i},modelSBML.compartment(i).sboTerm);
end
end

%If there are no compartment names then use compartment id as name
Expand Down Expand Up @@ -209,7 +220,8 @@
%specified in the yeast consensus model both metabolites and genes are a
%type of 'species'. The metabolites have names starting with 'M_' and genes
%with 'E_'

geneSBOs = [];
metSBOs = [];
for i=1:numel(modelSBML.species)
if ~isSBML2COBRA
if length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:2),'E_')
Expand Down Expand Up @@ -240,8 +252,13 @@
else
geneShortNames{numel(geneShortNames)+1,1}='';
end
%If it's a complex keep the ID and name

%Get SBO term
if isfield(modelSBML.species(i),'sboTerm')
geneSBOs(end+1,1) = modelSBML.species(i).sboTerm;
end
elseif length(modelSBML.species(i).id)>=2 && strcmpi(modelSBML.species(i).id(1:3),'Cx_')
%If it's a complex keep the ID and name
complexIDs=[complexIDs;modelSBML.species(i).id];
complexNames=[complexNames;modelSBML.species(i).name];
else
Expand Down Expand Up @@ -322,6 +339,10 @@
elseif ~isfield(modelSBML.species(i),'annotation')
metaboliteFormula{numel(metaboliteFormula)+1,1}='';
end
%Get SBO term
if isfield(modelSBML.species(i),'sboTerm')
metSBOs(end+1,1) = modelSBML.species(i).sboTerm;
end
end

elseif isSBML2COBRA
Expand Down Expand Up @@ -357,6 +378,19 @@
if isfield(modelSBML.species(i),'notes')
metaboliteFormula{numel(metaboliteFormula)+1,1}=parseNote(modelSBML.species(i).notes,'FORMULA');
end

%Get Miriam info
if ~isempty(modelSBML.species(i).annotation)
metMiriam=parseMiriam(modelSBML.species(i).annotation);
else
metMiriam=[];
end
metaboliteMiriams{numel(metaboliteMiriams)+1,1}=metMiriam;

%Get SBO term
if isfield(modelSBML.species(i),'sboTerm')
metSBOs(end+1,1) = modelSBML.species(i).sboTerm;
end
end
%The following lines are executed regardless isSBML2COBRA setting
if isempty(modelSBML.species(i).id) || ~strcmpi(modelSBML.species(i).id(1:2),'E_')
Expand Down Expand Up @@ -412,6 +446,18 @@
end
end

%Add SBO terms to gene and metabolite miriam fields
if numel(unique(geneSBOs)) > 1 % don't add if they're all identical
for i = 1:numel(geneNames)
geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},geneSBOs(i));
end
end
if numel(unique(metSBOs)) > 1
for i = 1:numel(metaboliteNames)
metaboliteMiriams{i} = addSBOtoMiriam(metaboliteMiriams{i},metSBOs(i));
end
end

%Retrieve info on reactions
reactionNames=cell(numel(modelSBML.reaction),1);
reactionIDs=cell(numel(modelSBML.reaction),1);
Expand Down Expand Up @@ -444,6 +490,11 @@
parameter.value={modelSBML.parameter(:).value}';
end

if isfield(modelSBML.reaction,'sboTerm') && numel(unique([modelSBML.reaction.sboTerm])) == 1
%If all the SBO terms are identical, don't add them to rxnMiriams
modelSBML.reaction = rmfield(modelSBML.reaction,'sboTerm');
end

for i=1:numel(modelSBML.reaction)

%Check that the reaction doesn't produce a complex and nothing else. If
Expand Down Expand Up @@ -603,6 +654,11 @@
end
end

%Get SBO terms
if isfield(modelSBML.reaction(i),'sboTerm')
rxnMiriams{counter} = addSBOtoMiriam(rxnMiriams{counter}, modelSBML.reaction(i).sboTerm);
end

%Get ec-codes
eccode='';
if ~isempty(modelSBML.reaction(i).annotation)
Expand Down Expand Up @@ -846,6 +902,22 @@
%that matching geneShortNames in function below will work
if isfield(modelSBML,'fbc_geneProduct')
genes={modelSBML.fbc_geneProduct.fbc_id};

%Get gene Miriams if they were not retrieved above (this occurs
%when genes are stored as fbc_geneProduct instead of species)
if isempty(geneMiriams)
geneMiriams = cell(numel(genes),1);
if isfield(modelSBML.fbc_geneProduct,'sboTerm') && numel(unique([modelSBML.fbc_geneProduct.sboTerm])) == 1
%If all the SBO terms are identical, don't add them to geneMiriams
modelSBML.fbc_geneProduct = rmfield(modelSBML.fbc_geneProduct,'sboTerm');
end
for i = 1:numel(genes)
geneMiriams{i}=parseMiriam(modelSBML.fbc_geneProduct(i).annotation);
if isfield(modelSBML.fbc_geneProduct(i),'sboTerm')
geneMiriams{i} = addSBOtoMiriam(geneMiriams{i},modelSBML.fbc_geneProduct(i).sboTerm);
end
end
end
else
genes=getGeneList(grRules);
end
Expand Down Expand Up @@ -1122,3 +1194,17 @@
end
end
end

function miriam = addSBOtoMiriam(miriam,sboTerm)
%Appends SBO term to miriam structure

sboTerm = {['SBO:' sprintf('%07u',sboTerm)]}; % convert to proper format
if isempty(miriam)
miriam.name = {'sbo'};
miriam.value = sboTerm;
else
miriam.name(end+1) = {'sbo'};
miriam.value(end+1) = sboTerm;
end

end
17 changes: 15 additions & 2 deletions struct_conversion/ravenCobraWrapper.m
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
% ravenCobraWrapper
% Converts between RAVEN and COBRA structures
%
% Input:
% model a RAVEN/COBRA-compatible model structure
%
% Ouput:
% newModel a COBRA/RAVEN-compatible model structure
%
% This function is a bidirectional tool to convert between RAVEN and COBRA
Expand Down Expand Up @@ -243,7 +245,11 @@
i=ismember(extractedMiriamNames,'uniprot');
if any(i)
newModel.proteinisuniprotID=miriams(:,i);
end
end
i=ismember(extractedMiriamNames,'sbo');
if any(i)
newModel.geneSBOTerms=miriams(:,i);
end
end
if isfield(model,'geneShortNames')
newModel.geneNames=model.geneShortNames;
Expand Down Expand Up @@ -456,7 +462,7 @@
if isfield(model,'genes')
newModel.genes=model.genes;
end
if isfield(model,'geneiskegg__46__genesID') || isfield(model,'geneissgdID') || isfield(model,'proteinisuniprotID')
if any(isfield(model,{'geneiskegg__46__genesID','geneissgdID','proteinisuniprotID','geneSBOTerm'}))
for i=1:numel(model.genes)
counter=1;
newModel.geneMiriams{i,1}=[];
Expand All @@ -481,6 +487,13 @@
counter=counter+1;
end
end
if isfield(model,'geneSBOTerm')
if ~isempty(model.geneSBOTerm{i})
newModel.geneMiriams{i,1}.name{counter,1} = 'sbo';
newModel.geneMiriams{i,1}.value{counter,1} = model.geneSBOTerm{i};
counter=counter+1;
end
end
end
end
if isfield(model,'geneNames')
Expand Down