diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 744a022..9d62d07 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,12 @@ Change Log ========== +------------------- +3.1.1_ - 2022-12-15 +------------------- + +- Separated functions doing alignments and tree making for the sake of compatibility with PHANTASM. + ------------------- 3.1.0_ - 2022-09-10 ------------------- diff --git a/TUTORIAL.rst b/TUTORIAL.rst index 8c6eedd..428514d 100644 --- a/TUTORIAL.rst +++ b/TUTORIAL.rst @@ -26,7 +26,7 @@ Next we're going to put a copy of the repository inside this directory. (Note th Next put the repository into the right release. At the command line, cd into the repository directory (``xgiTutorial/xenoGI``). Then do:: - git checkout v3.1.0 + git checkout v3.1.1 Next create a second subdirectory of ``xgiTutorial/`` called ``enterics/``. This is the working directory for the data we'll be using. diff --git a/misc/ncbiXmlToWgetScript.py b/misc/ncbiXmlToWgetScript.py new file mode 100644 index 0000000..3ef27a6 --- /dev/null +++ b/misc/ncbiXmlToWgetScript.py @@ -0,0 +1,80 @@ +## Takes a list of assemblies from ncbi in xml format, and produces an +## output file with wget commands for download. +## Authors: Jacob Fischer, Tona Gonzalez, Rachael Soh, Eliot Bush +import sys +import xml.etree.ElementTree as ET + +## Example + +# Say we have run the following query on ncbi's assembly database: +# Vibrionaceae [Organism] AND "Sequence from type" [Filter] AND "complete genome" [assembly level] + +# We save the search results to file in xml format +# (e.g. assembly_results.xml). We next must edit assembly_results.xml +# with a text editor. There needs to be a tag at the very +# beginning, and at the very end. (For some reason this seems +# not to be included in what comes down from ncbi). Then we can use this script as follows: + +# python3 ncbiXmlToWgetScript.py assembly_results.xml downloadWget.sh GCF + +# the final argument specifies whether we'll get GCF (refseq) assemblies or GCA (genbank). + +# downloadWget.sh contains wget commands to get all the assemblies. It can be run like this: + +# sh downloadWget.sh + +## Functions + +def createGenomeLinks(xmlFN, outputFN, assemblySource): + ''' Parses through xml document output from NCBI assembly search + and creates and stores genome links in outputString ''' + + tree = ET.parse(xmlFN) + + root = tree.getroot() + + f = open(outputFN,'w') + + if assemblySource == 'GCA': + number = 45 + else: + number = 46 + + for child in root: + link = child[number].text + + if link == None: + continue + + i = 0 + end = '' + copying = False + while i != len(link): + if copying: + end+=link[i] + i+=1 + + else: + if link[i:i+4] == "GCA_" or link[i:i+4] == "GCF_": + end += link[i] + copying = True + i+=1 + else: + i+=1 + link = "wget " + link + '/' + end + + f.write(link) + f.write('_genomic.gbff.gz\n') + + + f.close() + +## Main + +if __name__ == "__main__": + xmlFN = sys.argv[1] + outputFN = sys.argv[2] + assemblySource = sys.argv[3] + assert(assemblySource in ['GCA', 'GCF']) + + createGenomeLinks(xmlFN, outputFN, assemblySource) diff --git a/xenoGI/xenoGI.py b/xenoGI/xenoGI.py index e3458fd..f2b6f7c 100644 --- a/xenoGI/xenoGI.py +++ b/xenoGI/xenoGI.py @@ -1,5 +1,5 @@ """Provides the entry point to xenoGI's functionality.""" -__version__ = "3.1.0" +__version__ = "3.1.1" import sys, glob, os, readline, rlcompleter from . import parameters,genbank,blast,trees,genomes,Score,scores,families,islands,analysis,islandBed from .Tree import *