Merge pull request #1761 from opencobra/develop

Develop
opencobra · Apr 22, 2021 · 6c1ba69 · 6c1ba69
2 parents 9478adc + b74c212
commit 6c1ba69
Show file tree

Hide file tree

Showing 801 changed files with 39,053 additions and 5,901 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -46,3 +46,6 @@
 	path = external/base/samplers/looplessFluxSampler
 	url = https://github.com/rmtfleming/looplessFluxSampler
 	ignore = dirty
+[submodule "external/base/utilities/condalab"]
+	path = external/base/utilities/condalab
+	url = https://github.com/sg-s/condalab
diff --git a/external/base/utilities/condalab b/external/base/utilities/condalab
diff --git a/external/dataIntegration/mCADRE/pruningModel.m b/external/dataIntegration/mCADRE/pruningModel.m
@@ -66,7 +66,17 @@
                 model_rem = removeRxns(tissueModel, r);
             end
             % Check for inactive reactions after removal of r
-            [fluxConsistentMetBool,fluxConsistentRxnBool] = findFluxConsistentSubset(model_rem,paramConsistency);
+            try
+                [fluxConsistentMetBool,fluxConsistentRxnBool] = findFluxConsistentSubset(model_rem,paramConsistency);
+                rStatus_and_not_error = true;
+            catch
+                rStatus_and_not_error = false;
+            end
+        else
+            rStatus_and_not_error = false;
+        end
+
+        if rStatus_and_not_error
             inactive_G= [ r; model_rem.rxns(fluxConsistentRxnBool==0)];
 
             inactiveCore = intersect(inactive_G, coreRxn);

diff --git a/...analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m b/...analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m
@@ -113,7 +113,7 @@ function analyzeMgPipeResults(infoFilePath,resPath,varargin)
             cd(violinPath)
 
             % create violin plots for net uptake and secretion files
-            if any(contains(fileList{i,1},{'net_uptake_fluxes.csv','net_secretion_fluxes.csv'}))
+            if any(strcmp(fileList{i,1},{'net_uptake_fluxes.csv','net_secretion_fluxes.csv'}))
                 makeViolinPlots(sampleData, infoFile, 'stratification',sampleGroupHeaders{j}, 'plottedFeature', filename, 'unit', 'mmol/person/day')
             end
             cd(currentDir)

diff --git a/...is/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m b/...is/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m
diff --git a/...s/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m b/...s/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m
@@ -17,7 +17,7 @@
 % AUTHOR
 %       - Almut Heinken, 08/2020
 
-reactionDatabase = readtable('reactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
+reactionDatabase = readtable('ReactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
 reactionDatabase=table2cell(reactionDatabase);
 
 reactionAbundance = readtable(reactionAbundancePath, 'ReadVariableNames', false);
@@ -54,6 +54,4 @@
     end
 end
 
-writetable(cell2table(subsystemAbundance),'SubsystemAbundance.txt','FileType','text','WriteVariableNames',false,'Delimiter','\t');
-
 end
diff --git a/...ultiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m b/...ultiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m
@@ -0,0 +1,119 @@
+function ReactionAbundance = fastCalculateReactionAbundance(abundancePath, modelPath, rxnsList, numWorkers)
+% Part of the Microbiome Modeling Toolbox. This function calculates and
+% plots the total abundance of reactions of interest in a given microbiome
+% sample based on the strain-level composition.
+% Reaction presence or absence in each strain is derived from the reaction content
+% of the respective AGORA model.
+%
+% USAGE
+%
+%    ReactionAbundance = fastCalculateReactionAbundance(abundancePath, modelPath, rxnsList, numWorkers)
+%
+% INPUTS:
+%    abundancePath:          Path to the .csv file with the abundance data.
+%                            Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv'
+%    modelPath:              Folder containing the strain-specific AGORA models
+% OPTIONAL INPUTS:
+%    rxnsList:               List of reactions for which the abundance
+%                            should be calculated (if left empty: all
+%                            reactions in all models)
+%    numWorkers:             Number of workers used for parallel pool. If
+%                            left empty, the parallel pool will not be
+%                            started. Parallellization is recommended if
+%                            all reactions are computed.
+%
+% OUTPUT:
+%    ReactionAbundance       Table with total abundance for each microbiome
+%                            and reaction
+%
+% .. Author: - Almut Heinken, 04/2021
+
+% read the csv file with the abundance data
+abundance = readtable(abundancePath, 'ReadVariableNames', false);
+abundance = table2cell(abundance);
+if isnumeric(abundance{2, 1})
+    abundance(:, 1) = [];
+end
+
+% load the models
+for i = 2:size(abundance, 1)
+    model = readCbModel([modelPath filesep abundance{i, 1} '.mat']);
+    modelsList{i, 1} = model;
+end
+
+if ~exist('rxnsList', 'var') || isempty(rxnsList)  % define reaction list if not entered
+    fprintf('No reaction list entered. Abundances will be calculated for all reactions in all models. \n')
+    % get model list from abundance input file
+    for i = 2:size(abundance, 1)
+        model = modelsList{i, 1};
+        rxnsList = vertcat(model.rxns, rxnsList);
+    end
+    rxnsList = unique(rxnsList);
+end
+
+% load the models found in the individuals and extract which reactions are
+% in which model
+for i = 2:size(abundance, 1)
+    model = modelsList{i, 1};
+    ReactionPresence{i, 1} = abundance{i, 1};
+    for j = 1:length(rxnsList)
+        ReactionPresence{1, j + 1} = rxnsList{j};
+        if ~isempty(find(ismember(model.rxns, rxnsList{j})))
+            ReactionPresence{i, j + 1} = '1';
+        else
+            ReactionPresence{i, j + 1} = '0';
+        end
+    end
+end
+ReactionPresence{1,1}='Strains';
+
+
+% prepare table for the total abundance
+ReactionAbundance = {};
+for i = 1:length(rxnsList)
+    ReactionAbundance{1, i + 1} = rxnsList{i};
+end
+for i = 2:size(abundance, 2)
+    ReactionAbundance{i, 1} = abundance{1, i};
+end
+
+% use parallel pool if workers specified as input
+if exist('numWorkers', 'var') && numWorkers > 0
+    poolobj = gcp('nocreate');
+    if isempty(poolobj)
+        parpool(numWorkers)
+    end
+end
+
+clear abundance
+
+totalAbun={};
+parfor i = 2:size(ReactionAbundance, 1)
+    i
+    % reload the file to avoid running out of memory
+    abundance = readtable(abundancePath, 'ReadVariableNames', false);
+    abundance = table2cell(abundance);
+    if isnumeric(abundance{2, 1})
+        abundance(:, 1) = [];
+    end
+
+    % temporarily store reaction abundances
+    totalAbun{i} = zeros(length(rxnsList), 1);
+
+    for j = 2:size(abundance, 1)
+        % find all reactions present in the strain
+        presentRxns = find(strcmp(ReactionPresence(j,2:end),'1'));
+
+        for k = 1:length(presentRxns)
+            % summarize total abundance
+            totalAbun{i}(presentRxns(k),1) = totalAbun{i}(presentRxns(k),1) + str2double(abundance{j,i});
+        end
+    end
+end
+
+% collect the temporarily stored abundances to put together the table
+for i = 2:size(ReactionAbundance, 1)
+    ReactionAbundance(i,2:end) = num2cell(totalAbun{i});
+end
+
+end
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m
@@ -51,7 +51,7 @@
 
 % Define the list of metabolites required by at least one AGORA model for
 % growth
-essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_adpcbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'};
+essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'};
 
 % fix any exchange nomenclature issues
 adaptedDietConstraints(:, 1) = strrep(adaptedDietConstraints(:, 1), '[e]', '(e)');

diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m
@@ -0,0 +1,78 @@
+function [exch,modelStoragePath] = buildModelStorage(microbeNames,modPath)
+
+currentDir=pwd;
+mkdir('modelStorage')
+cd('modelStorage')
+modelStoragePath = pwd;
+
+exch = {};
+for j = 1:size(microbeNames, 1)
+    model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
+    %exch = union(exch, model.mets(find(sum(model.S(:, strncmp('EX_', model.rxns, 3)), 2) ~= 0)));
+    exStruct = findSExRxnInd(model);
+    new_exch = findMetsFromRxns(model,model.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool));
+    exch = union(exch,new_exch);
+end
+
+% get already built reconstructions
+dInfo = dir(modelStoragePath);
+modelList={dInfo.name};
+modelList=modelList';
+modelList=strrep(modelList,'.mat','');
+microbesNames=setdiff(microbeNames,modelList);
+
+
+if length(microbesNames)>0
+    %% create a new extracellular space [u] for microbes
+    for j = 1:size(microbeNames, 1)
+        model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
+        % temp fix
+        if isfield(model,'C')
+            model=rmfield(model,'C');
+            model=rmfield(model,'d');
+        end
+        %
+
+        % removing possible constraints of the bacs
+        selExc = findExcRxns(model);
+        Reactions2 = model.rxns(find(selExc));
+        allex = Reactions2(strmatch('EX', Reactions2));
+        biomass = allex(find(strncmp(allex,'bio',3)));
+        finrex = setdiff(allex, biomass);
+        model = changeRxnBounds(model, finrex, -1000, 'l');
+        model = changeRxnBounds(model, finrex, 1000, 'u');
+
+        % removing blocked reactions from the bacs
+        %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel);
+        %model= removeRxns(model, BlockedRxns);
+        %BlockedReaction = findBlockedReaction(model,'L2')
+
+        model = convertOldStyleModel(model);
+        exmod = model.rxns(strncmp('EX_', model.rxns, 3));  % find exchange reactions
+        eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]')));  % exchanged metabolites
+        dummyMicEU = createModel();
+        %dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1));
+        dummyMicEUmets = [strcat(strcat(microbeNames{j, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')];
+        dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets);
+        nMets = numel(eMets);
+        S = [speye(nMets);-speye(nMets)];
+        lbs = repmat(-1000,nMets,1);
+        ubs = repmat(1000,nMets,1);
+        names = strcat(strcat(microbeNames{j, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr');
+        dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs);
+        model = removeRxns(model, exmod);
+        model.rxns = strcat(strcat(microbeNames{j, 1}, '_'), model.rxns);
+        model.mets = strcat(strcat(microbeNames{j, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]'));  % replace [e] with [u]
+        [model] = mergeTwoModels(dummyMicEU, model, 2, false, false);
+
+        %finish up by A: removing duplicate reactions
+        %We will lose information here, but we will just remove the duplicates.
+        [model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1);
+
+        writeCbModel(model,'format','mat','fileName',[microbeNames{j,1} '.mat']);  % store model
+    end
+end
+
+cd(currentDir)
+
+end
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m
@@ -28,7 +28,7 @@
 createdModels = {};
 
 % use the setup model containing every strain in every sample
-parfor k = 1:length(sampNames)
+for k = 1:length(sampNames)
     mgmodel = model;
     abunRed = abundance(:,k+1);
 
@@ -64,7 +64,7 @@
         % Coupling constraints for bacteria
         for i = 1:length(presBac)
             IndRxns=find(strncmp(mgmodel.rxns,[presBac{i,1} '_'],length(presBac{i,1})+1));%finding indixes of specific reactions
-            % find the name of biomass reacion in the microbe model
+            % find the name of biomass reaction in the microbe model
             bioRxn=mgmodel.rxns{find(strncmp(mgmodel.rxns,strcat(presBac{i,1},'_bio'),length(char(strcat(presBac{i,1},'_bio')))))};
             mgmodel=coupleRxnList2Rxn(mgmodel,mgmodel.rxns(IndRxns(1:length(mgmodel.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions
         end