From f71cb76b1ef6717982493a3a7ba7295903878153 Mon Sep 17 00:00:00 2001 From: "41898282+github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 7 Aug 2024 16:29:43 +0000 Subject: [PATCH] Deployed 2745910 to dev with MkDocs 1.5.3 and mike 2.1.2 --- dev/404.html | 2 +- dev/assets/javascripts/bundle.51198bba.min.js | 29 ------------------ .../javascripts/bundle.51198bba.min.js.map | 8 ----- dev/assets/javascripts/bundle.e1c3ead8.min.js | 29 ++++++++++++++++++ .../javascripts/bundle.e1c3ead8.min.js.map | 7 +++++ .../javascripts/lunr/min/lunr.el.min.js | 1 + .../javascripts/lunr/min/lunr.he.min.js | 1 + .../javascripts/lunr/min/lunr.hy.min.js | 1 + .../javascripts/lunr/min/lunr.kn.min.js | 1 + .../javascripts/lunr/min/lunr.ko.min.js | 2 +- .../javascripts/lunr/min/lunr.sa.min.js | 1 + .../javascripts/lunr/min/lunr.te.min.js | 1 + .../javascripts/lunr/min/lunr.zh.min.js | 2 +- dev/assets/javascripts/lunr/wordcut.js | 4 +-- ...208ed371.min.js => search.b8dbb3d2.min.js} | 18 +++++------ ....min.js.map => search.b8dbb3d2.min.js.map} | 9 +++--- dev/assets/stylesheets/main.50c56a3b.min.css | 1 + .../stylesheets/main.50c56a3b.min.css.map | 1 + dev/assets/stylesheets/main.ded33207.min.css | 1 - .../stylesheets/main.ded33207.min.css.map | 1 - .../stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + .../stylesheets/palette.a0c5b2b5.min.css | 1 - .../stylesheets/palette.a0c5b2b5.min.css.map | 1 - dev/faq/questions/index.html | 2 +- dev/index.html | 2 +- dev/license/index.html | 2 +- dev/pipeline-details/methods/index.html | 2 +- dev/pipeline-details/output/index.html | 4 +-- dev/pipeline-details/overview/index.html | 2 +- dev/pipeline-details/settings/index.html | 2 +- dev/pipeline-details/tools/index.html | 2 +- dev/release-guide/index.html | 4 +-- dev/requirements.txt | 2 ++ dev/search/search_index.json | 2 +- dev/sitemap.xml.gz | Bin 127 -> 127 bytes dev/usage/cache/index.html | 4 +-- dev/usage/gui/index.html | 4 +-- dev/usage/run/index.html | 4 +-- dev/usage/unlock/index.html | 4 +-- 40 files changed, 86 insertions(+), 80 deletions(-) delete mode 100644 dev/assets/javascripts/bundle.51198bba.min.js delete mode 100644 dev/assets/javascripts/bundle.51198bba.min.js.map create mode 100644 dev/assets/javascripts/bundle.e1c3ead8.min.js create mode 100644 dev/assets/javascripts/bundle.e1c3ead8.min.js.map create mode 100644 dev/assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 dev/assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 dev/assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 dev/assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 dev/assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 dev/assets/javascripts/lunr/min/lunr.te.min.js rename dev/assets/javascripts/workers/{search.208ed371.min.js => search.b8dbb3d2.min.js} (66%) rename dev/assets/javascripts/workers/{search.208ed371.min.js.map => search.b8dbb3d2.min.js.map} (56%) create mode 100644 dev/assets/stylesheets/main.50c56a3b.min.css create mode 100644 dev/assets/stylesheets/main.50c56a3b.min.css.map delete mode 100644 dev/assets/stylesheets/main.ded33207.min.css delete mode 100644 dev/assets/stylesheets/main.ded33207.min.css.map create mode 100644 dev/assets/stylesheets/palette.06af60db.min.css create mode 100644 dev/assets/stylesheets/palette.06af60db.min.css.map delete mode 100644 dev/assets/stylesheets/palette.a0c5b2b5.min.css delete mode 100644 dev/assets/stylesheets/palette.a0c5b2b5.min.css.map diff --git a/dev/404.html b/dev/404.html index d4a616c..72b6970 100644 --- a/dev/404.html +++ b/dev/404.html @@ -1 +1 @@ - XAVIER Documentation
\ No newline at end of file + XAVIER Documentation

404 - Not found

\ No newline at end of file diff --git a/dev/assets/javascripts/bundle.51198bba.min.js b/dev/assets/javascripts/bundle.51198bba.min.js deleted file mode 100644 index 31bd041..0000000 --- a/dev/assets/javascripts/bundle.51198bba.min.js +++ /dev/null @@ -1,29 +0,0 @@ -"use strict";(()=>{var Ri=Object.create;var gr=Object.defineProperty;var ki=Object.getOwnPropertyDescriptor;var Hi=Object.getOwnPropertyNames,Ht=Object.getOwnPropertySymbols,Pi=Object.getPrototypeOf,yr=Object.prototype.hasOwnProperty,on=Object.prototype.propertyIsEnumerable;var nn=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))yr.call(t,r)&&nn(e,r,t[r]);if(Ht)for(var r of Ht(t))on.call(t,r)&&nn(e,r,t[r]);return e};var an=(e,t)=>{var r={};for(var n in e)yr.call(e,n)&&t.indexOf(n)<0&&(r[n]=e[n]);if(e!=null&&Ht)for(var n of Ht(e))t.indexOf(n)<0&&on.call(e,n)&&(r[n]=e[n]);return r};var Pt=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var $i=(e,t,r,n)=>{if(t&&typeof t=="object"||typeof t=="function")for(let o of Hi(t))!yr.call(e,o)&&o!==r&&gr(e,o,{get:()=>t[o],enumerable:!(n=ki(t,o))||n.enumerable});return e};var yt=(e,t,r)=>(r=e!=null?Ri(Pi(e)):{},$i(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var cn=Pt((xr,sn)=>{(function(e,t){typeof xr=="object"&&typeof sn!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(xr,function(){"use strict";function e(r){var n=!0,o=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(T){return!!(T&&T!==document&&T.nodeName!=="HTML"&&T.nodeName!=="BODY"&&"classList"in T&&"contains"in T.classList)}function c(T){var Qe=T.type,De=T.tagName;return!!(De==="INPUT"&&s[Qe]&&!T.readOnly||De==="TEXTAREA"&&!T.readOnly||T.isContentEditable)}function f(T){T.classList.contains("focus-visible")||(T.classList.add("focus-visible"),T.setAttribute("data-focus-visible-added",""))}function u(T){T.hasAttribute("data-focus-visible-added")&&(T.classList.remove("focus-visible"),T.removeAttribute("data-focus-visible-added"))}function p(T){T.metaKey||T.altKey||T.ctrlKey||(a(r.activeElement)&&f(r.activeElement),n=!0)}function m(T){n=!1}function d(T){a(T.target)&&(n||c(T.target))&&f(T.target)}function h(T){a(T.target)&&(T.target.classList.contains("focus-visible")||T.target.hasAttribute("data-focus-visible-added"))&&(o=!0,window.clearTimeout(i),i=window.setTimeout(function(){o=!1},100),u(T.target))}function v(T){document.visibilityState==="hidden"&&(o&&(n=!0),G())}function G(){document.addEventListener("mousemove",N),document.addEventListener("mousedown",N),document.addEventListener("mouseup",N),document.addEventListener("pointermove",N),document.addEventListener("pointerdown",N),document.addEventListener("pointerup",N),document.addEventListener("touchmove",N),document.addEventListener("touchstart",N),document.addEventListener("touchend",N)}function oe(){document.removeEventListener("mousemove",N),document.removeEventListener("mousedown",N),document.removeEventListener("mouseup",N),document.removeEventListener("pointermove",N),document.removeEventListener("pointerdown",N),document.removeEventListener("pointerup",N),document.removeEventListener("touchmove",N),document.removeEventListener("touchstart",N),document.removeEventListener("touchend",N)}function N(T){T.target.nodeName&&T.target.nodeName.toLowerCase()==="html"||(n=!1,oe())}document.addEventListener("keydown",p,!0),document.addEventListener("mousedown",m,!0),document.addEventListener("pointerdown",m,!0),document.addEventListener("touchstart",m,!0),document.addEventListener("visibilitychange",v,!0),G(),r.addEventListener("focus",d,!0),r.addEventListener("blur",h,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var fn=Pt(Er=>{(function(e){var t=function(){try{return!!Symbol.iterator}catch(f){return!1}},r=t(),n=function(f){var u={next:function(){var p=f.shift();return{done:p===void 0,value:p}}};return r&&(u[Symbol.iterator]=function(){return u}),u},o=function(f){return encodeURIComponent(f).replace(/%20/g,"+")},i=function(f){return decodeURIComponent(String(f).replace(/\+/g," "))},s=function(){var f=function(p){Object.defineProperty(this,"_entries",{writable:!0,value:{}});var m=typeof p;if(m!=="undefined")if(m==="string")p!==""&&this._fromString(p);else if(p instanceof f){var d=this;p.forEach(function(oe,N){d.append(N,oe)})}else if(p!==null&&m==="object")if(Object.prototype.toString.call(p)==="[object Array]")for(var h=0;hd[0]?1:0}),f._entries&&(f._entries={});for(var p=0;p1?i(d[1]):"")}})})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er);(function(e){var t=function(){try{var o=new e.URL("b","http://a");return o.pathname="c d",o.href==="http://a/c%20d"&&o.searchParams}catch(i){return!1}},r=function(){var o=e.URL,i=function(c,f){typeof c!="string"&&(c=String(c)),f&&typeof f!="string"&&(f=String(f));var u=document,p;if(f&&(e.location===void 0||f!==e.location.href)){f=f.toLowerCase(),u=document.implementation.createHTMLDocument(""),p=u.createElement("base"),p.href=f,u.head.appendChild(p);try{if(p.href.indexOf(f)!==0)throw new Error(p.href)}catch(T){throw new Error("URL unable to set base "+f+" due to "+T)}}var m=u.createElement("a");m.href=c,p&&(u.body.appendChild(m),m.href=m.href);var d=u.createElement("input");if(d.type="url",d.value=c,m.protocol===":"||!/:/.test(m.href)||!d.checkValidity()&&!f)throw new TypeError("Invalid URL");Object.defineProperty(this,"_anchorElement",{value:m});var h=new e.URLSearchParams(this.search),v=!0,G=!0,oe=this;["append","delete","set"].forEach(function(T){var Qe=h[T];h[T]=function(){Qe.apply(h,arguments),v&&(G=!1,oe.search=h.toString(),G=!0)}}),Object.defineProperty(this,"searchParams",{value:h,enumerable:!0});var N=void 0;Object.defineProperty(this,"_updateSearchParams",{enumerable:!1,configurable:!1,writable:!1,value:function(){this.search!==N&&(N=this.search,G&&(v=!1,this.searchParams._fromString(this.search),v=!0))}})},s=i.prototype,a=function(c){Object.defineProperty(s,c,{get:function(){return this._anchorElement[c]},set:function(f){this._anchorElement[c]=f},enumerable:!0})};["hash","host","hostname","port","protocol"].forEach(function(c){a(c)}),Object.defineProperty(s,"search",{get:function(){return this._anchorElement.search},set:function(c){this._anchorElement.search=c,this._updateSearchParams()},enumerable:!0}),Object.defineProperties(s,{toString:{get:function(){var c=this;return function(){return c.href}}},href:{get:function(){return this._anchorElement.href.replace(/\?$/,"")},set:function(c){this._anchorElement.href=c,this._updateSearchParams()},enumerable:!0},pathname:{get:function(){return this._anchorElement.pathname.replace(/(^\/?)/,"/")},set:function(c){this._anchorElement.pathname=c},enumerable:!0},origin:{get:function(){var c={"http:":80,"https:":443,"ftp:":21}[this._anchorElement.protocol],f=this._anchorElement.port!=c&&this._anchorElement.port!=="";return this._anchorElement.protocol+"//"+this._anchorElement.hostname+(f?":"+this._anchorElement.port:"")},enumerable:!0},password:{get:function(){return""},set:function(c){},enumerable:!0},username:{get:function(){return""},set:function(c){},enumerable:!0}}),i.createObjectURL=function(c){return o.createObjectURL.apply(o,arguments)},i.revokeObjectURL=function(c){return o.revokeObjectURL.apply(o,arguments)},e.URL=i};if(t()||r(),e.location!==void 0&&!("origin"in e.location)){var n=function(){return e.location.protocol+"//"+e.location.hostname+(e.location.port?":"+e.location.port:"")};try{Object.defineProperty(e.location,"origin",{get:n,enumerable:!0})}catch(o){setInterval(function(){e.location.origin=n()},100)}}})(typeof global!="undefined"?global:typeof window!="undefined"?window:typeof self!="undefined"?self:Er)});var Kr=Pt((Mt,qr)=>{/*! - * clipboard.js v2.0.11 - * https://clipboardjs.com/ - * - * Licensed MIT © Zeno Rocha - */(function(t,r){typeof Mt=="object"&&typeof qr=="object"?qr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof Mt=="object"?Mt.ClipboardJS=r():t.ClipboardJS=r()})(Mt,function(){return function(){var e={686:function(n,o,i){"use strict";i.d(o,{default:function(){return Ci}});var s=i(279),a=i.n(s),c=i(370),f=i.n(c),u=i(817),p=i.n(u);function m(j){try{return document.execCommand(j)}catch(O){return!1}}var d=function(O){var E=p()(O);return m("cut"),E},h=d;function v(j){var O=document.documentElement.getAttribute("dir")==="rtl",E=document.createElement("textarea");E.style.fontSize="12pt",E.style.border="0",E.style.padding="0",E.style.margin="0",E.style.position="absolute",E.style[O?"right":"left"]="-9999px";var H=window.pageYOffset||document.documentElement.scrollTop;return E.style.top="".concat(H,"px"),E.setAttribute("readonly",""),E.value=j,E}var G=function(O,E){var H=v(O);E.container.appendChild(H);var I=p()(H);return m("copy"),H.remove(),I},oe=function(O){var E=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},H="";return typeof O=="string"?H=G(O,E):O instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(O==null?void 0:O.type)?H=G(O.value,E):(H=p()(O),m("copy")),H},N=oe;function T(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?T=function(E){return typeof E}:T=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},T(j)}var Qe=function(){var O=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},E=O.action,H=E===void 0?"copy":E,I=O.container,q=O.target,Me=O.text;if(H!=="copy"&&H!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(q!==void 0)if(q&&T(q)==="object"&&q.nodeType===1){if(H==="copy"&&q.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(H==="cut"&&(q.hasAttribute("readonly")||q.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Me)return N(Me,{container:I});if(q)return H==="cut"?h(q):N(q,{container:I})},De=Qe;function $e(j){return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?$e=function(E){return typeof E}:$e=function(E){return E&&typeof Symbol=="function"&&E.constructor===Symbol&&E!==Symbol.prototype?"symbol":typeof E},$e(j)}function wi(j,O){if(!(j instanceof O))throw new TypeError("Cannot call a class as a function")}function rn(j,O){for(var E=0;E0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof I.action=="function"?I.action:this.defaultAction,this.target=typeof I.target=="function"?I.target:this.defaultTarget,this.text=typeof I.text=="function"?I.text:this.defaultText,this.container=$e(I.container)==="object"?I.container:document.body}},{key:"listenClick",value:function(I){var q=this;this.listener=f()(I,"click",function(Me){return q.onClick(Me)})}},{key:"onClick",value:function(I){var q=I.delegateTarget||I.currentTarget,Me=this.action(q)||"copy",kt=De({action:Me,container:this.container,target:this.target(q),text:this.text(q)});this.emit(kt?"success":"error",{action:Me,text:kt,trigger:q,clearSelection:function(){q&&q.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(I){return vr("action",I)}},{key:"defaultTarget",value:function(I){var q=vr("target",I);if(q)return document.querySelector(q)}},{key:"defaultText",value:function(I){return vr("text",I)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(I){var q=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return N(I,q)}},{key:"cut",value:function(I){return h(I)}},{key:"isSupported",value:function(){var I=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],q=typeof I=="string"?[I]:I,Me=!!document.queryCommandSupported;return q.forEach(function(kt){Me=Me&&!!document.queryCommandSupported(kt)}),Me}}]),E}(a()),Ci=Ai},828:function(n){var o=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,c){for(;a&&a.nodeType!==o;){if(typeof a.matches=="function"&&a.matches(c))return a;a=a.parentNode}}n.exports=s},438:function(n,o,i){var s=i(828);function a(u,p,m,d,h){var v=f.apply(this,arguments);return u.addEventListener(m,v,h),{destroy:function(){u.removeEventListener(m,v,h)}}}function c(u,p,m,d,h){return typeof u.addEventListener=="function"?a.apply(null,arguments):typeof m=="function"?a.bind(null,document).apply(null,arguments):(typeof u=="string"&&(u=document.querySelectorAll(u)),Array.prototype.map.call(u,function(v){return a(v,p,m,d,h)}))}function f(u,p,m,d){return function(h){h.delegateTarget=s(h.target,p),h.delegateTarget&&d.call(u,h)}}n.exports=c},879:function(n,o){o.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},o.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||o.node(i[0]))},o.string=function(i){return typeof i=="string"||i instanceof String},o.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(n,o,i){var s=i(879),a=i(438);function c(m,d,h){if(!m&&!d&&!h)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(h))throw new TypeError("Third argument must be a Function");if(s.node(m))return f(m,d,h);if(s.nodeList(m))return u(m,d,h);if(s.string(m))return p(m,d,h);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function f(m,d,h){return m.addEventListener(d,h),{destroy:function(){m.removeEventListener(d,h)}}}function u(m,d,h){return Array.prototype.forEach.call(m,function(v){v.addEventListener(d,h)}),{destroy:function(){Array.prototype.forEach.call(m,function(v){v.removeEventListener(d,h)})}}}function p(m,d,h){return a(document.body,m,d,h)}n.exports=c},817:function(n){function o(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var c=window.getSelection(),f=document.createRange();f.selectNodeContents(i),c.removeAllRanges(),c.addRange(f),s=c.toString()}return s}n.exports=o},279:function(n){function o(){}o.prototype={on:function(i,s,a){var c=this.e||(this.e={});return(c[i]||(c[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var c=this;function f(){c.off(i,f),s.apply(a,arguments)}return f._=s,this.on(i,f,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),c=0,f=a.length;for(c;c{"use strict";/*! - * escape-html - * Copyright(c) 2012-2013 TJ Holowaychuk - * Copyright(c) 2015 Andreas Lubbe - * Copyright(c) 2015 Tiancheng "Timothy" Gu - * MIT Licensed - */var ns=/["'&<>]/;Go.exports=os;function os(e){var t=""+e,r=ns.exec(t);if(!r)return t;var n,o="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(f[0]===6||f[0]===2)){r=0;continue}if(f[0]===3&&(!i||f[1]>i[0]&&f[1]=e.length&&(e=void 0),{value:e&&e[n++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function W(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var n=r.call(e),o,i=[],s;try{for(;(t===void 0||t-- >0)&&!(o=n.next()).done;)i.push(o.value)}catch(a){s={error:a}}finally{try{o&&!o.done&&(r=n.return)&&r.call(n)}finally{if(s)throw s.error}}return i}function D(e,t,r){if(r||arguments.length===2)for(var n=0,o=t.length,i;n1||a(m,d)})})}function a(m,d){try{c(n[m](d))}catch(h){p(i[0][3],h)}}function c(m){m.value instanceof et?Promise.resolve(m.value.v).then(f,u):p(i[0][2],m)}function f(m){a("next",m)}function u(m){a("throw",m)}function p(m,d){m(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function ln(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof Ee=="function"?Ee(e):e[Symbol.iterator](),r={},n("next"),n("throw"),n("return"),r[Symbol.asyncIterator]=function(){return this},r);function n(i){r[i]=e[i]&&function(s){return new Promise(function(a,c){s=e[i](s),o(a,c,s.done,s.value)})}}function o(i,s,a,c){Promise.resolve(c).then(function(f){i({value:f,done:a})},s)}}function C(e){return typeof e=="function"}function at(e){var t=function(n){Error.call(n),n.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var It=at(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: -`+r.map(function(n,o){return o+1+") "+n.toString()}).join(` - `):"",this.name="UnsubscriptionError",this.errors=r}});function Ve(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ie=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,n,o,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=Ee(s),c=a.next();!c.done;c=a.next()){var f=c.value;f.remove(this)}}catch(v){t={error:v}}finally{try{c&&!c.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var u=this.initialTeardown;if(C(u))try{u()}catch(v){i=v instanceof It?v.errors:[v]}var p=this._finalizers;if(p){this._finalizers=null;try{for(var m=Ee(p),d=m.next();!d.done;d=m.next()){var h=d.value;try{mn(h)}catch(v){i=i!=null?i:[],v instanceof It?i=D(D([],W(i)),W(v.errors)):i.push(v)}}}catch(v){n={error:v}}finally{try{d&&!d.done&&(o=m.return)&&o.call(m)}finally{if(n)throw n.error}}}if(i)throw new It(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)mn(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Ve(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Ve(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Sr=Ie.EMPTY;function jt(e){return e instanceof Ie||e&&"closed"in e&&C(e.remove)&&C(e.add)&&C(e.unsubscribe)}function mn(e){C(e)?e():e.unsubscribe()}var Le={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var st={setTimeout:function(e,t){for(var r=[],n=2;n0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var n=this,o=this,i=o.hasError,s=o.isStopped,a=o.observers;return i||s?Sr:(this.currentObservers=null,a.push(r),new Ie(function(){n.currentObservers=null,Ve(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var n=this,o=n.hasError,i=n.thrownError,s=n.isStopped;o?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new F;return r.source=this,r},t.create=function(r,n){return new En(r,n)},t}(F);var En=function(e){ie(t,e);function t(r,n){var o=e.call(this)||this;return o.destination=r,o.source=n,o}return t.prototype.next=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.next)===null||o===void 0||o.call(n,r)},t.prototype.error=function(r){var n,o;(o=(n=this.destination)===null||n===void 0?void 0:n.error)===null||o===void 0||o.call(n,r)},t.prototype.complete=function(){var r,n;(n=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||n===void 0||n.call(r)},t.prototype._subscribe=function(r){var n,o;return(o=(n=this.source)===null||n===void 0?void 0:n.subscribe(r))!==null&&o!==void 0?o:Sr},t}(x);var Et={now:function(){return(Et.delegate||Date).now()},delegate:void 0};var wt=function(e){ie(t,e);function t(r,n,o){r===void 0&&(r=1/0),n===void 0&&(n=1/0),o===void 0&&(o=Et);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=n,i._timestampProvider=o,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=n===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,n),i}return t.prototype.next=function(r){var n=this,o=n.isStopped,i=n._buffer,s=n._infiniteTimeWindow,a=n._timestampProvider,c=n._windowTime;o||(i.push(r),!s&&i.push(a.now()+c)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var n=this._innerSubscribe(r),o=this,i=o._infiniteTimeWindow,s=o._buffer,a=s.slice(),c=0;c0?e.prototype.requestAsyncId.call(this,r,n,o):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,n,o){var i;if(o===void 0&&(o=0),o!=null?o>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,n,o);var s=r.actions;n!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==n&&(ut.cancelAnimationFrame(n),r._scheduled=void 0)},t}(Wt);var Tn=function(e){ie(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var n=this._scheduled;this._scheduled=void 0;var o=this.actions,i;r=r||o.shift();do if(i=r.execute(r.state,r.delay))break;while((r=o[0])&&r.id===n&&o.shift());if(this._active=!1,i){for(;(r=o[0])&&r.id===n&&o.shift();)r.unsubscribe();throw i}},t}(Dt);var Te=new Tn(Sn);var _=new F(function(e){return e.complete()});function Vt(e){return e&&C(e.schedule)}function Cr(e){return e[e.length-1]}function Ye(e){return C(Cr(e))?e.pop():void 0}function Oe(e){return Vt(Cr(e))?e.pop():void 0}function zt(e,t){return typeof Cr(e)=="number"?e.pop():t}var pt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Nt(e){return C(e==null?void 0:e.then)}function qt(e){return C(e[ft])}function Kt(e){return Symbol.asyncIterator&&C(e==null?void 0:e[Symbol.asyncIterator])}function Qt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Ni(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Yt=Ni();function Gt(e){return C(e==null?void 0:e[Yt])}function Bt(e){return pn(this,arguments,function(){var r,n,o,i;return $t(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,et(r.read())];case 3:return n=s.sent(),o=n.value,i=n.done,i?[4,et(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,et(o)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function Jt(e){return C(e==null?void 0:e.getReader)}function U(e){if(e instanceof F)return e;if(e!=null){if(qt(e))return qi(e);if(pt(e))return Ki(e);if(Nt(e))return Qi(e);if(Kt(e))return On(e);if(Gt(e))return Yi(e);if(Jt(e))return Gi(e)}throw Qt(e)}function qi(e){return new F(function(t){var r=e[ft]();if(C(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function Ki(e){return new F(function(t){for(var r=0;r=2;return function(n){return n.pipe(e?A(function(o,i){return e(o,i,n)}):de,ge(1),r?He(t):Vn(function(){return new Zt}))}}function zn(){for(var e=[],t=0;t=2,!0))}function pe(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,n=e.resetOnError,o=n===void 0?!0:n,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,c=a===void 0?!0:a;return function(f){var u,p,m,d=0,h=!1,v=!1,G=function(){p==null||p.unsubscribe(),p=void 0},oe=function(){G(),u=m=void 0,h=v=!1},N=function(){var T=u;oe(),T==null||T.unsubscribe()};return y(function(T,Qe){d++,!v&&!h&&G();var De=m=m!=null?m:r();Qe.add(function(){d--,d===0&&!v&&!h&&(p=$r(N,c))}),De.subscribe(Qe),!u&&d>0&&(u=new rt({next:function($e){return De.next($e)},error:function($e){v=!0,G(),p=$r(oe,o,$e),De.error($e)},complete:function(){h=!0,G(),p=$r(oe,s),De.complete()}}),U(T).subscribe(u))})(f)}}function $r(e,t){for(var r=[],n=2;ne.next(document)),e}function K(e,t=document){return Array.from(t.querySelectorAll(e))}function z(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function _e(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}function tr(e){return L(b(document.body,"focusin"),b(document.body,"focusout")).pipe(ke(1),l(()=>{let t=_e();return typeof t!="undefined"?e.contains(t):!1}),V(e===_e()),B())}function Xe(e){return{x:e.offsetLeft,y:e.offsetTop}}function Qn(e){return L(b(window,"load"),b(window,"resize")).pipe(Ce(0,Te),l(()=>Xe(e)),V(Xe(e)))}function rr(e){return{x:e.scrollLeft,y:e.scrollTop}}function dt(e){return L(b(e,"scroll"),b(window,"resize")).pipe(Ce(0,Te),l(()=>rr(e)),V(rr(e)))}var Gn=function(){if(typeof Map!="undefined")return Map;function e(t,r){var n=-1;return t.some(function(o,i){return o[0]===r?(n=i,!0):!1}),n}return function(){function t(){this.__entries__=[]}return Object.defineProperty(t.prototype,"size",{get:function(){return this.__entries__.length},enumerable:!0,configurable:!0}),t.prototype.get=function(r){var n=e(this.__entries__,r),o=this.__entries__[n];return o&&o[1]},t.prototype.set=function(r,n){var o=e(this.__entries__,r);~o?this.__entries__[o][1]=n:this.__entries__.push([r,n])},t.prototype.delete=function(r){var n=this.__entries__,o=e(n,r);~o&&n.splice(o,1)},t.prototype.has=function(r){return!!~e(this.__entries__,r)},t.prototype.clear=function(){this.__entries__.splice(0)},t.prototype.forEach=function(r,n){n===void 0&&(n=null);for(var o=0,i=this.__entries__;o0},e.prototype.connect_=function(){!Dr||this.connected_||(document.addEventListener("transitionend",this.onTransitionEnd_),window.addEventListener("resize",this.refresh),ga?(this.mutationsObserver_=new MutationObserver(this.refresh),this.mutationsObserver_.observe(document,{attributes:!0,childList:!0,characterData:!0,subtree:!0})):(document.addEventListener("DOMSubtreeModified",this.refresh),this.mutationEventsAdded_=!0),this.connected_=!0)},e.prototype.disconnect_=function(){!Dr||!this.connected_||(document.removeEventListener("transitionend",this.onTransitionEnd_),window.removeEventListener("resize",this.refresh),this.mutationsObserver_&&this.mutationsObserver_.disconnect(),this.mutationEventsAdded_&&document.removeEventListener("DOMSubtreeModified",this.refresh),this.mutationsObserver_=null,this.mutationEventsAdded_=!1,this.connected_=!1)},e.prototype.onTransitionEnd_=function(t){var r=t.propertyName,n=r===void 0?"":r,o=va.some(function(i){return!!~n.indexOf(i)});o&&this.refresh()},e.getInstance=function(){return this.instance_||(this.instance_=new e),this.instance_},e.instance_=null,e}(),Bn=function(e,t){for(var r=0,n=Object.keys(t);r0},e}(),Xn=typeof WeakMap!="undefined"?new WeakMap:new Gn,Zn=function(){function e(t){if(!(this instanceof e))throw new TypeError("Cannot call a class as a function.");if(!arguments.length)throw new TypeError("1 argument required, but only 0 present.");var r=ya.getInstance(),n=new Aa(t,r,this);Xn.set(this,n)}return e}();["observe","unobserve","disconnect"].forEach(function(e){Zn.prototype[e]=function(){var t;return(t=Xn.get(this))[e].apply(t,arguments)}});var Ca=function(){return typeof nr.ResizeObserver!="undefined"?nr.ResizeObserver:Zn}(),eo=Ca;var to=new x,Ra=$(()=>k(new eo(e=>{for(let t of e)to.next(t)}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),J(1));function he(e){return{width:e.offsetWidth,height:e.offsetHeight}}function ye(e){return Ra.pipe(S(t=>t.observe(e)),g(t=>to.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(()=>he(e)))),V(he(e)))}function bt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function ar(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var ro=new x,ka=$(()=>k(new IntersectionObserver(e=>{for(let t of e)ro.next(t)},{threshold:0}))).pipe(g(e=>L(ze,k(e)).pipe(R(()=>e.disconnect()))),J(1));function sr(e){return ka.pipe(S(t=>t.observe(e)),g(t=>ro.pipe(A(({target:r})=>r===e),R(()=>t.unobserve(e)),l(({isIntersecting:r})=>r))))}function no(e,t=16){return dt(e).pipe(l(({y:r})=>{let n=he(e),o=bt(e);return r>=o.height-n.height-t}),B())}var cr={drawer:z("[data-md-toggle=drawer]"),search:z("[data-md-toggle=search]")};function oo(e){return cr[e].checked}function Ke(e,t){cr[e].checked!==t&&cr[e].click()}function Ue(e){let t=cr[e];return b(t,"change").pipe(l(()=>t.checked),V(t.checked))}function Ha(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function Pa(){return L(b(window,"compositionstart").pipe(l(()=>!0)),b(window,"compositionend").pipe(l(()=>!1))).pipe(V(!1))}function io(){let e=b(window,"keydown").pipe(A(t=>!(t.metaKey||t.ctrlKey)),l(t=>({mode:oo("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),A(({mode:t,type:r})=>{if(t==="global"){let n=_e();if(typeof n!="undefined")return!Ha(n,r)}return!0}),pe());return Pa().pipe(g(t=>t?_:e))}function le(){return new URL(location.href)}function ot(e){location.href=e.href}function ao(){return new x}function so(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)so(e,r)}function M(e,t,...r){let n=document.createElement(e);if(t)for(let o of Object.keys(t))typeof t[o]!="undefined"&&(typeof t[o]!="boolean"?n.setAttribute(o,t[o]):n.setAttribute(o,""));for(let o of r)so(n,o);return n}function fr(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function co(){return location.hash.substring(1)}function Vr(e){let t=M("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function $a(e){return L(b(window,"hashchange"),e).pipe(l(co),V(co()),A(t=>t.length>0),J(1))}function fo(e){return $a(e).pipe(l(t=>ce(`[id="${t}"]`)),A(t=>typeof t!="undefined"))}function zr(e){let t=matchMedia(e);return er(r=>t.addListener(()=>r(t.matches))).pipe(V(t.matches))}function uo(){let e=matchMedia("print");return L(b(window,"beforeprint").pipe(l(()=>!0)),b(window,"afterprint").pipe(l(()=>!1))).pipe(V(e.matches))}function Nr(e,t){return e.pipe(g(r=>r?t():_))}function ur(e,t={credentials:"same-origin"}){return ue(fetch(`${e}`,t)).pipe(fe(()=>_),g(r=>r.status!==200?Tt(()=>new Error(r.statusText)):k(r)))}function We(e,t){return ur(e,t).pipe(g(r=>r.json()),J(1))}function po(e,t){let r=new DOMParser;return ur(e,t).pipe(g(n=>n.text()),l(n=>r.parseFromString(n,"text/xml")),J(1))}function pr(e){let t=M("script",{src:e});return $(()=>(document.head.appendChild(t),L(b(t,"load"),b(t,"error").pipe(g(()=>Tt(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(l(()=>{}),R(()=>document.head.removeChild(t)),ge(1))))}function lo(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function mo(){return L(b(window,"scroll",{passive:!0}),b(window,"resize",{passive:!0})).pipe(l(lo),V(lo()))}function ho(){return{width:innerWidth,height:innerHeight}}function bo(){return b(window,"resize",{passive:!0}).pipe(l(ho),V(ho()))}function vo(){return Q([mo(),bo()]).pipe(l(([e,t])=>({offset:e,size:t})),J(1))}function lr(e,{viewport$:t,header$:r}){let n=t.pipe(Z("size")),o=Q([n,r]).pipe(l(()=>Xe(e)));return Q([r,t,o]).pipe(l(([{height:i},{offset:s,size:a},{x:c,y:f}])=>({offset:{x:s.x-c,y:s.y-f+i},size:a})))}(()=>{function e(n,o){parent.postMessage(n,o||"*")}function t(...n){return n.reduce((o,i)=>o.then(()=>new Promise(s=>{let a=document.createElement("script");a.src=i,a.onload=s,document.body.appendChild(a)})),Promise.resolve())}var r=class extends EventTarget{constructor(n){super(),this.url=n,this.m=i=>{i.source===this.w&&(this.dispatchEvent(new MessageEvent("message",{data:i.data})),this.onmessage&&this.onmessage(i))},this.e=(i,s,a,c,f)=>{if(s===`${this.url}`){let u=new ErrorEvent("error",{message:i,filename:s,lineno:a,colno:c,error:f});this.dispatchEvent(u),this.onerror&&this.onerror(u)}};let o=document.createElement("iframe");o.hidden=!0,document.body.appendChild(this.iframe=o),this.w.document.open(),this.w.document.write(`

Frequently Asked Questions

Coming soon!


Last update: 2021-11-30
\ No newline at end of file + General Questions - XAVIER Documentation

Frequently Asked Questions

Coming soon!

\ No newline at end of file diff --git a/dev/index.html b/dev/index.html index 27c5a5a..781ba84 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1 +1 @@ - XAVIER Documentation

XAVIER - eXome Analysis and Variant explorER 🔬

tests docs Docker Pulls GitHub Issues MIT license DOI

XAVIER - eXome Analysis and Variant explorER. XAVIER is an open-source, reproducible, and scalable solution for analyzing Whole Exome sequencing data. Its long-term goals: to accurately call germline and somatic variants, to infer CNVs, and to boldly annotate variants like no pipeline before!


Overview

Welcome to XAVIER's documentation! This guide is the main source of documentation for users that are getting started with the XAVIER pipeline.

The xavier pipeline is composed several inter-related sub commands to setup and run the pipeline across different systems. Each of the available sub commands perform different functions:

  • xavier run: Run the XAVIER pipeline with your input files.
  • xavier unlock: Unlocks a previous runs output directory.
  • xavier cache: Cache remote resources locally, coming soon!

XAVIER is a comprehensive whole exome-sequencing pipeline following the Broad's set of best practices. It relies on technologies like Singularity1 to maintain the highest-level of reproducibility. The pipeline consists of a series of data processing and quality-control steps orchestrated by Snakemake2, a flexible and scalable workflow management system, to submit jobs to a cluster or cloud provider.

The pipeline is compatible with data generated from Illumina short-read sequencing technologies. As input, it accepts a set of FastQ or BAM files and can be run locally on a compute instance, on-premise using a cluster, or on the cloud (feature coming soon!). A user can define the method or mode of execution. The pipeline can submit jobs to a cluster using a job scheduler like SLURM, or run on AWS using Tibanna (feature coming soon!). A hybrid approach ensures the pipeline is accessible to all users.

Before getting started, we highly recommend reading through the usage section of each available sub command.

For more information about issues or trouble-shooting a problem, please checkout our FAQ prior to opening an issue on Github.

Contribute

This site is a living document, created for and by members like you. XAVIER is maintained by the members of CCBR and is improved by continuous feedback! We encourage you to contribute new content and make improvements to existing content via pull request to our GitHub repository .

References

1. Kurtzer GM, Sochat V, Bauer MW (2017). Singularity: Scientific containers for mobility of compute. PLoS ONE 12(5): e0177459.
2. Koster, J. and S. Rahmann (2018). "Snakemake-a scalable bioinformatics workflow engine." Bioinformatics 34(20): 3600.


Last update: 2024-07-11
\ No newline at end of file + XAVIER Documentation

XAVIER - eXome Analysis and Variant explorER 🔬

tests docs Docker Pulls GitHub Issues MIT license DOI

XAVIER - eXome Analysis and Variant explorER. XAVIER is an open-source, reproducible, and scalable solution for analyzing Whole Exome sequencing data. Its long-term goals: to accurately call germline and somatic variants, to infer CNVs, and to boldly annotate variants like no pipeline before!


Overview

Welcome to XAVIER's documentation! This guide is the main source of documentation for users that are getting started with the XAVIER pipeline.

The xavier pipeline is composed several inter-related sub commands to setup and run the pipeline across different systems. Each of the available sub commands perform different functions:

  • xavier run: Run the XAVIER pipeline with your input files.
  • xavier unlock: Unlocks a previous runs output directory.
  • xavier cache: Cache remote resources locally, coming soon!

XAVIER is a comprehensive whole exome-sequencing pipeline following the Broad's set of best practices. It relies on technologies like Singularity1 to maintain the highest-level of reproducibility. The pipeline consists of a series of data processing and quality-control steps orchestrated by Snakemake2, a flexible and scalable workflow management system, to submit jobs to a cluster or cloud provider.

The pipeline is compatible with data generated from Illumina short-read sequencing technologies. As input, it accepts a set of FastQ or BAM files and can be run locally on a compute instance, on-premise using a cluster, or on the cloud (feature coming soon!). A user can define the method or mode of execution. The pipeline can submit jobs to a cluster using a job scheduler like SLURM, or run on AWS using Tibanna (feature coming soon!). A hybrid approach ensures the pipeline is accessible to all users.

Before getting started, we highly recommend reading through the usage section of each available sub command.

For more information about issues or trouble-shooting a problem, please checkout our FAQ prior to opening an issue on Github.

Contribute

This site is a living document, created for and by members like you. XAVIER is maintained by the members of CCBR and is improved by continuous feedback! We encourage you to contribute new content and make improvements to existing content via pull request to our GitHub repository .

References

1. Kurtzer GM, Sochat V, Bauer MW (2017). Singularity: Scientific containers for mobility of compute. PLoS ONE 12(5): e0177459.
2. Koster, J. and S. Rahmann (2018). "Snakemake-a scalable bioinformatics workflow engine." Bioinformatics 34(20): 3600.

\ No newline at end of file diff --git a/dev/license/index.html b/dev/license/index.html index 4243d3d..46b53d8 100644 --- a/dev/license/index.html +++ b/dev/license/index.html @@ -1 +1 @@ - License - XAVIER Documentation

MIT License

Copyright © 2021 CCBR

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


Last update: 2024-01-30
\ No newline at end of file + License - XAVIER Documentation

MIT License

Copyright © 2021 CCBR

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

\ No newline at end of file diff --git a/dev/pipeline-details/methods/index.html b/dev/pipeline-details/methods/index.html index 37a26f0..9f27bf1 100644 --- a/dev/pipeline-details/methods/index.html +++ b/dev/pipeline-details/methods/index.html @@ -1 +1 @@ - Methods - XAVIER Documentation

Methods description

This page contains a description of all methods used in the pipeline, along with references for important tools.

Note that depending on the settings used, not all of these methods may be applicable, so please adapt this text appropriately for your application.

You can also download this text as a Word document (.docx) that contains an EndNote traveling library using the button below.

Download DOCX


Data preprocessing

Low-quality and adapters sequences are trimmed from the raw sequencing reads using Trimmomatic (v. 0.39)1. Trimmed reads are then aligned to the human hg38 reference genome using BWA mapping software (v. 0.7.17)2. Duplicate reads are marked using Samblaster (v. 0.1.25)3 and sorted using samtools (v. 1.8). Finally, base quality score recalibration is performed as indicated in the GATK4 (v. 4.2.2.0) best practices 4.

Germline variant calling

HaplotypeCaller from GATK4 (v. 4.2.2.0) is used to call germline variants, parallelized across chromosomes, and all samples in the cohort are joint genotyped together 4,5.

Somatic variant calling

Somatic variant calling (SNPs and Indels) is performed using Mutect (v. 1.1.7)6, Mutect2 (GATK v. 4.2.0)7, Strelka2 (v. 2.9.0)8, and VarDict (v. 1.4)9 in tumor-normal mode. Variants from all callers are merged using the CombineVariants tool from GATK version 3.8-1. Genomic, functional and consequence annotations are added using Variant Effect Predictor (VEP v. 99)10 and converted to Mutation Annotation Format (MAF) using the vcf2maf tool (v. 1.6.16)11.

For Copy Number Variants (CNVs), Control-Freec (v. 11.6)12 is used to generate pileups, which are used as input for the R package 'sequenza' (v. 3.0.0)13. The complete Control-Freec workflow is then re-run using ploidy and cellularity estimates from 'sequenza'.

FFPE Artifact filtering

SOBDetector is a tool that scores variants based on strand-orientation bias, which can be a sign of DNA damage caused by fixation of tissue. This pipeline runs SOBDetector in a two-pass method. The first pass uses parameters provided with the software (calculated from publicly available data from TCGA), then cohort-specific bias metrics are computed from those results, and SOBDetector is re-run using these cohort-specific values.

Quality and identity metrics

Ancestry and relatedness scores are generated using Somalier (v. 0.2.13)14. Contamination analyses are performed against viral and bacterial genomes from NCBI using Kraken2 (v. 2.1.2)15, as well as against mouse, human, and UniVec databases using FastQ Screen (v. 0.14.1)16. Sequence, mapping and variant statistics are computed using FastQC (v. 0.11.9), Qualimap (v. 2.2.1)17 and SNPeff (v. 4.3t)18. All of these metrics are combined into an interactive HTML report using MultiQC (v. 1.11)19.

Pipeline Orchestration

Job execution and management is done using Snakemake (v. 6.8.2)20 using custom-built Singularity (v. 3.8.5) containers for reproducibility.

References


  1. Bolger, A.M., M. Lohse, and B. Usadel, Trimmomatic: a flexible trimmer for Illumina sequence data. Bioinformatics, 2014. 30(15): p. 2114-20. 

  2. Li, H. and R. Durbin, Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 2009. 25(14): p. 1754-60. 

  3. Faust, G.G. and I.M. Hall, SAMBLASTER: fast duplicate marking and structural variant read extraction. Bioinformatics, 2014. 30(17): p. 2503-5. 

  4. Van der Auwera, G.A. and B.D. O'Connor, Genomics in the cloud : using Docker, GATK, and WDL in Terra. First edition. ed. 2020, Sebastopol, CA: O'Reilly Media. 

  5. Poplin, R., et al., Scaling accurate genetic variant discovery to tens of thousands of samples. bioRxiv, 2018: p. 201178. 

  6. Cibulskis, K., et al., Sensitive detection of somatic point mutations in impure and heterogeneous cancer samples. Nat Biotechnol, 2013. 31(3): p. 213-9. 

  7. Benjamin, D., et al., Calling Somatic SNVs and Indels with Mutect2. bioRxiv, 2019: p. 861054. 

  8. Kim, S., et al., Strelka2: fast and accurate calling of germline and somatic variants. Nat Methods, 2018. 15(8): p. 591-594. 

  9. Lai, Z., et al., VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research. Nucleic Acids Res, 2016. 44(11): p. e108. 

  10. McLaren, W., et al., The Ensembl Variant Effect Predictor. Genome Biol, 2016. 17(1): p. 122. 

  11. Memorial Sloan Kettering Cancer Center. vcf2maf. 2013; Available from: https://github.com/mskcc/vcf2maf

  12. Boeva, V., et al., Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data. Bioinformatics, 2012. 28(3): p. 423-5. 

  13. Favero, F., et al., Sequenza: allele-specific copy number and mutation profiles from tumor sequencing data. Ann Oncol, 2015. 26(1): p. 64-70. 

  14. Pedersen, B. somalier: extract informative sites, evaluate relatedness, and perform quality-control on BAM/CRAM/BCF/VCF/GVCF. 2018; Available from: https://github.com/brentp/somalier

  15. Wood, D.E., J. Lu, and B. Langmead, Improved metagenomic analysis with Kraken 2. Genome Biol, 2019. 20(1): p. 257. 

  16. Wingett, S.W. and S. Andrews, FastQ Screen: A tool for multi-genome mapping and quality control. F1000Res, 2018. 7: p. 1338. 

  17. Okonechnikov, K., A. Conesa, and F. Garcia-Alcalde, Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 2016. 32(2): p. 292-4. 

  18. Cingolani, P., et al., A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin), 2012. 6(2): p. 80-92. 

  19. Ewels, P., et al., MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 2016. 32(19): p. 3047-8. 

  20. Koster, J. and S. Rahmann, Snakemake-a scalable bioinformatics workflow engine. Bioinformatics, 2018. 34(20): p. 3600. 


Last update: 2024-01-30
\ No newline at end of file + Methods - XAVIER Documentation

Methods description

This page contains a description of all methods used in the pipeline, along with references for important tools.

Note that depending on the settings used, not all of these methods may be applicable, so please adapt this text appropriately for your application.

You can also download this text as a Word document (.docx) that contains an EndNote traveling library using the button below.

Download DOCX


Data preprocessing

Low-quality and adapters sequences are trimmed from the raw sequencing reads using Trimmomatic (v. 0.39)1. Trimmed reads are then aligned to the human hg38 reference genome using BWA mapping software (v. 0.7.17)2. Duplicate reads are marked using Samblaster (v. 0.1.25)3 and sorted using samtools (v. 1.8). Finally, base quality score recalibration is performed as indicated in the GATK4 (v. 4.2.2.0) best practices 4.

Germline variant calling

HaplotypeCaller from GATK4 (v. 4.2.2.0) is used to call germline variants, parallelized across chromosomes, and all samples in the cohort are joint genotyped together 4,5.

Somatic variant calling

Somatic variant calling (SNPs and Indels) is performed using Mutect (v. 1.1.7)6, Mutect2 (GATK v. 4.2.0)7, Strelka2 (v. 2.9.0)8, and VarDict (v. 1.4)9 in tumor-normal mode. Variants from all callers are merged using the CombineVariants tool from GATK version 3.8-1. Genomic, functional and consequence annotations are added using Variant Effect Predictor (VEP v. 99)10 and converted to Mutation Annotation Format (MAF) using the vcf2maf tool (v. 1.6.16)11.

For Copy Number Variants (CNVs), Control-Freec (v. 11.6)12 is used to generate pileups, which are used as input for the R package 'sequenza' (v. 3.0.0)13. The complete Control-Freec workflow is then re-run using ploidy and cellularity estimates from 'sequenza'.

FFPE Artifact filtering

SOBDetector is a tool that scores variants based on strand-orientation bias, which can be a sign of DNA damage caused by fixation of tissue. This pipeline runs SOBDetector in a two-pass method. The first pass uses parameters provided with the software (calculated from publicly available data from TCGA), then cohort-specific bias metrics are computed from those results, and SOBDetector is re-run using these cohort-specific values.

Quality and identity metrics

Ancestry and relatedness scores are generated using Somalier (v. 0.2.13)14. Contamination analyses are performed against viral and bacterial genomes from NCBI using Kraken2 (v. 2.1.2)15, as well as against mouse, human, and UniVec databases using FastQ Screen (v. 0.14.1)16. Sequence, mapping and variant statistics are computed using FastQC (v. 0.11.9), Qualimap (v. 2.2.1)17 and SNPeff (v. 4.3t)18. All of these metrics are combined into an interactive HTML report using MultiQC (v. 1.11)19.

Pipeline Orchestration

Job execution and management is done using Snakemake (v. 6.8.2)20 using custom-built Singularity (v. 3.8.5) containers for reproducibility.

References


  1. Bolger, A.M., M. Lohse, and B. Usadel, Trimmomatic: a flexible trimmer for Illumina sequence data. Bioinformatics, 2014. 30(15): p. 2114-20. 

  2. Li, H. and R. Durbin, Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 2009. 25(14): p. 1754-60. 

  3. Faust, G.G. and I.M. Hall, SAMBLASTER: fast duplicate marking and structural variant read extraction. Bioinformatics, 2014. 30(17): p. 2503-5. 

  4. Van der Auwera, G.A. and B.D. O'Connor, Genomics in the cloud : using Docker, GATK, and WDL in Terra. First edition. ed. 2020, Sebastopol, CA: O'Reilly Media. 

  5. Poplin, R., et al., Scaling accurate genetic variant discovery to tens of thousands of samples. bioRxiv, 2018: p. 201178. 

  6. Cibulskis, K., et al., Sensitive detection of somatic point mutations in impure and heterogeneous cancer samples. Nat Biotechnol, 2013. 31(3): p. 213-9. 

  7. Benjamin, D., et al., Calling Somatic SNVs and Indels with Mutect2. bioRxiv, 2019: p. 861054. 

  8. Kim, S., et al., Strelka2: fast and accurate calling of germline and somatic variants. Nat Methods, 2018. 15(8): p. 591-594. 

  9. Lai, Z., et al., VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research. Nucleic Acids Res, 2016. 44(11): p. e108. 

  10. McLaren, W., et al., The Ensembl Variant Effect Predictor. Genome Biol, 2016. 17(1): p. 122. 

  11. Memorial Sloan Kettering Cancer Center. vcf2maf. 2013; Available from: https://github.com/mskcc/vcf2maf

  12. Boeva, V., et al., Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data. Bioinformatics, 2012. 28(3): p. 423-5. 

  13. Favero, F., et al., Sequenza: allele-specific copy number and mutation profiles from tumor sequencing data. Ann Oncol, 2015. 26(1): p. 64-70. 

  14. Pedersen, B. somalier: extract informative sites, evaluate relatedness, and perform quality-control on BAM/CRAM/BCF/VCF/GVCF. 2018; Available from: https://github.com/brentp/somalier

  15. Wood, D.E., J. Lu, and B. Langmead, Improved metagenomic analysis with Kraken 2. Genome Biol, 2019. 20(1): p. 257. 

  16. Wingett, S.W. and S. Andrews, FastQ Screen: A tool for multi-genome mapping and quality control. F1000Res, 2018. 7: p. 1338. 

  17. Okonechnikov, K., A. Conesa, and F. Garcia-Alcalde, Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 2016. 32(2): p. 292-4. 

  18. Cingolani, P., et al., A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin), 2012. 6(2): p. 80-92. 

  19. Ewels, P., et al., MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 2016. 32(19): p. 3047-8. 

  20. Koster, J. and S. Rahmann, Snakemake-a scalable bioinformatics workflow engine. Bioinformatics, 2018. 34(20): p. 3600. 

\ No newline at end of file diff --git a/dev/pipeline-details/output/index.html b/dev/pipeline-details/output/index.html index 1726043..72738ac 100644 --- a/dev/pipeline-details/output/index.html +++ b/dev/pipeline-details/output/index.html @@ -1,4 +1,4 @@ - Output Files - XAVIER Documentation

Output files

XAVIER

The output files and their locations are broken down here for the XAVIER pipeline. Pre-processing and germline variant calling steps are common but somatic variant calling is dependent on whether the pipeline was run in either (A) tumor-normal pair or (B) tumor-only analysis mode. All file locations are relative to the output directory specified during the job submission.

The output directory after a complete XAVIER run should look like:

xavier_output/
+ Output Files - XAVIER Documentation      

Output files

XAVIER

The output files and their locations are broken down here for the XAVIER pipeline. Pre-processing and germline variant calling steps are common but somatic variant calling is dependent on whether the pipeline was run in either (A) tumor-normal pair or (B) tumor-only analysis mode. All file locations are relative to the output directory specified during the job submission.

The output directory after a complete XAVIER run should look like:

xavier_output/
 ├── bams
 ├── cluster.json # cluster info for the run
 ├── config
@@ -139,4 +139,4 @@
        └── vcf
     ├── vardict_out
     └── varscan_out
-

Last update: 2024-01-30
\ No newline at end of file +
\ No newline at end of file diff --git a/dev/pipeline-details/overview/index.html b/dev/pipeline-details/overview/index.html index 037c563..56e16c3 100644 --- a/dev/pipeline-details/overview/index.html +++ b/dev/pipeline-details/overview/index.html @@ -1 +1 @@ - Overview - XAVIER Documentation

Pipeline Overview

Pipeline Diagram

Workflow diagram of the XAVIER: the pipeline is composed of a series of data processing steps to trim, align, and recalibrate reads prior to calling variants. These data processing steps closely follow GATK's best pratices for cleaning up raw alignments. The pipeline also consists of a series of comprehensive quality-control steps.


Last update: 2024-01-30
\ No newline at end of file + Overview - XAVIER Documentation

Pipeline Overview

Pipeline Diagram

Workflow diagram of the XAVIER: the pipeline is composed of a series of data processing steps to trim, align, and recalibrate reads prior to calling variants. These data processing steps closely follow GATK's best pratices for cleaning up raw alignments. The pipeline also consists of a series of comprehensive quality-control steps.

\ No newline at end of file diff --git a/dev/pipeline-details/settings/index.html b/dev/pipeline-details/settings/index.html index d7da9fb..94fc3d3 100644 --- a/dev/pipeline-details/settings/index.html +++ b/dev/pipeline-details/settings/index.html @@ -1 +1 @@ - Settings - XAVIER Documentation

Settings

This page contains details of the settings used for different tools in the pipeline

Somatic paired variant calling

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

-normal

BAM file for paired normal sample

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor AND normal bam)

strelka

calling

--exome

Preset filters for exome data

mutect

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/db/PipeDB/lib/COSMIC_82_hg38.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-M

If set, output only candidate somatic

-S

exclude variants that fail filters

filter

--exclude 'STATUS="Germline" | STATUS="LikelyLOH" | STATUS="AFDiff"'

Removes variants with certain flags from vardict; (1) Germline: detected in germline sample (pass all quality parameters); (2) LikelyLOH: detected in germline but either lost in tumor OR 20-80% in germline, but increased to 1-opt_V (95%); (3) AFDiff: detected in tumor (pass quality parameters) and present in germline but didn’t pass quality parameters.

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

Somatic tumor-only variant calling

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor bam)

mutect

calling

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

-x 500

Nucleotides to extend

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with map quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-S

exclude variants that fail filters

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz


Last update: 2023-11-29
\ No newline at end of file + Settings - XAVIER Documentation

Settings

This page contains details of the settings used for different tools in the pipeline

Somatic paired variant calling

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

-normal

BAM file for paired normal sample

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor AND normal bam)

strelka

calling

--exome

Preset filters for exome data

mutect

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/db/PipeDB/lib/COSMIC_82_hg38.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-M

If set, output only candidate somatic

-S

exclude variants that fail filters

filter

--exclude 'STATUS="Germline" | STATUS="LikelyLOH" | STATUS="AFDiff"'

Removes variants with certain flags from vardict; (1) Germline: detected in germline sample (pass all quality parameters); (2) LikelyLOH: detected in germline but either lost in tumor OR 20-80% in germline, but increased to 1-opt_V (95%); (3) AFDiff: detected in tumor (pass quality parameters) and present in germline but didn’t pass quality parameters.

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

Somatic tumor-only variant calling

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor bam)

mutect

calling

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

-x 500

Nucleotides to extend

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with map quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-S

exclude variants that fail filters

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

\ No newline at end of file diff --git a/dev/pipeline-details/tools/index.html b/dev/pipeline-details/tools/index.html index 0c28290..6958ac7 100644 --- a/dev/pipeline-details/tools/index.html +++ b/dev/pipeline-details/tools/index.html @@ -1 +1 @@ - Tools - XAVIER Documentation

Tools

This table lists information about the steps performed, tools used, and their details.

Module

Category

Analysis Type

Software

Version

Rule File(s)

Preprocessing

Preprocessing

trim

Trimmomatic

0.39

trim_map_preprocess.smk

map

bwa

0.7.17

trim_map_preprocess.smk

markdup

samblaster

0.1.25

trim_map_preprocess.smk

GATK4 Best Practices

GATK4

4.2.2

SNP/Indel Calling

Mutect2

GATK 4.2.2

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Strelka

2.9.0

somatic_snps.paired.smk

VarScan

2.4.3

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Mutect

1.1.7

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

VarDict

1.4

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

FFPE Artifact Filter

SOBDetector

1.0.4

ffpe.smk

Consensus SNP/Indels

GATK3 CombineVariants

GATK_3.8-1

somatic_snps.common.smk

Somatic Copy Number Variation (CNV)

CNV

Control-FREEC

11.5

somatic_snps.paired.smk

Sequenza

somatic_snps.paired.smk

Somatic Analysis

Annotate

vcf2maf

somatic_snps.common.smk

Germline

Germline SNV Calling

Germline Variants

HaplotypeCaller

GATK_4.2.2

germline.smk

Germline Analysis

Ancestry

Somalier

qc.smk

Relatedness

Somalier

qc.smk

QC Metrics

QC

depth

qualimap

2.2.1

qc.smk

report

multiqc

1.11

qc.smk

base quality

FastQC

0.11.9

qc.smk

contamination

Fastq Screen

0.14.1

qc.smk

kraken

2.1.2

qc.smk

variant quality

vcftools stat

0.1.16

qc.smk

bcftools_stat

1.9

qc.smk

variant effect

SNPeff

4.3t

qc.smk

General

General

R scripts

R

4.1

general

variant wrangling

bcftools

1.9

general

vcftools

0.1.16

general

alignment wrangling

samtools

1.8

general

Orchestration

Orchestration

Containerization

singularity

3.8.5

Orchestration

Workflow managemanet

snakemake

6.8.2

Orchestration


Last update: 2022-01-21
\ No newline at end of file + Tools - XAVIER Documentation

Tools

This table lists information about the steps performed, tools used, and their details.

Module

Category

Analysis Type

Software

Version

Rule File(s)

Preprocessing

Preprocessing

trim

Trimmomatic

0.39

trim_map_preprocess.smk

map

bwa

0.7.17

trim_map_preprocess.smk

markdup

samblaster

0.1.25

trim_map_preprocess.smk

GATK4 Best Practices

GATK4

4.2.2

SNP/Indel Calling

Mutect2

GATK 4.2.2

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Strelka

2.9.0

somatic_snps.paired.smk

VarScan

2.4.3

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Mutect

1.1.7

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

VarDict

1.4

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

FFPE Artifact Filter

SOBDetector

1.0.4

ffpe.smk

Consensus SNP/Indels

GATK3 CombineVariants

GATK_3.8-1

somatic_snps.common.smk

Somatic Copy Number Variation (CNV)

CNV

Control-FREEC

11.5

somatic_snps.paired.smk

Sequenza

somatic_snps.paired.smk

Somatic Analysis

Annotate

vcf2maf

somatic_snps.common.smk

Germline

Germline SNV Calling

Germline Variants

HaplotypeCaller

GATK_4.2.2

germline.smk

Germline Analysis

Ancestry

Somalier

qc.smk

Relatedness

Somalier

qc.smk

QC Metrics

QC

depth

qualimap

2.2.1

qc.smk

report

multiqc

1.11

qc.smk

base quality

FastQC

0.11.9

qc.smk

contamination

Fastq Screen

0.14.1

qc.smk

kraken

2.1.2

qc.smk

variant quality

vcftools stat

0.1.16

qc.smk

bcftools_stat

1.9

qc.smk

variant effect

SNPeff

4.3t

qc.smk

General

General

R scripts

R

4.1

general

variant wrangling

bcftools

1.9

general

vcftools

0.1.16

general

alignment wrangling

samtools

1.8

general

Orchestration

Orchestration

Containerization

singularity

3.8.5

Orchestration

Workflow managemanet

snakemake

6.8.2

Orchestration

\ No newline at end of file diff --git a/dev/release-guide/index.html b/dev/release-guide/index.html index d534470..bc1a0c2 100644 --- a/dev/release-guide/index.html +++ b/dev/release-guide/index.html @@ -1,4 +1,4 @@ - Release Guide - XAVIER Documentation

Release Guide

Make sure you're keeping the changelog up-to-date during development. Ideally, every PR that includes a user-facing change (e.g. a new feature, bug fix, or any API change) should add a concise summary to the changelog with a link to the PR. Only approve or merge PRs that either update the changelog or have no user-facing changes.

How to release a new version on GitHub

  1. Determine the new version number according to semantic versioning guidelines.
  2. Update CHANGELOG.md:
  3. Edit the heading for the development version to match the new version.
  4. If needed, clean up the changelog -- fix any typos, optionally create subheadings for 'New features' and 'Bug fixes' if there are lots of changes, etc.
  5. Update the version in src/__init__.py.
  6. On GitHub, go to "Releases" and click "Draft a new release". https://github.com/CCBR/XAVIER/releases/new
  7. Choose a tag: same as the version number.
  8. Choose the target: most likely this should be the main branch, or a specific commit hash.
  9. Set the title as the new version number, e.g. v3.0.2
  10. Copy and paste the release notes from the CHANGELOG into the description box.
  11. Check the box "Set as the latest release".
  12. Click "Publish release".
  13. Post release chores:
  14. Add a new "development version" heading to the top of CHANGELOG.md.
  15. Bump the version number in src/__init__.py to include -dev, e.g. v3.0.2-dev if you just released v3.0.2.

How to install a release on biowulf

After releasing a new version on GitHub:

# go to the shared pipeline directory on biowulf
+ Release Guide - XAVIER Documentation      

Release Guide

Make sure you're keeping the changelog up-to-date during development. Ideally, every PR that includes a user-facing change (e.g. a new feature, bug fix, or any API change) should add a concise summary to the changelog with a link to the PR. Only approve or merge PRs that either update the changelog or have no user-facing changes.

How to release a new version on GitHub

  1. Determine the new version number according to semantic versioning guidelines.
  2. Update CHANGELOG.md:
  3. Edit the heading for the development version to match the new version.
  4. If needed, clean up the changelog -- fix any typos, optionally create subheadings for 'New features' and 'Bug fixes' if there are lots of changes, etc.
  5. Update the version in src/__init__.py.
  6. On GitHub, go to "Releases" and click "Draft a new release". https://github.com/CCBR/XAVIER/releases/new
  7. Choose a tag: same as the version number.
  8. Choose the target: most likely this should be the main branch, or a specific commit hash.
  9. Set the title as the new version number, e.g. v3.0.2
  10. Copy and paste the release notes from the CHANGELOG into the description box.
  11. Check the box "Set as the latest release".
  12. Click "Publish release".
  13. Post release chores:
  14. Add a new "development version" heading to the top of CHANGELOG.md.
  15. Bump the version number in src/__init__.py to include -dev, e.g. v3.0.2-dev if you just released v3.0.2.

How to install a release on biowulf

After releasing a new version on GitHub:

# go to the shared pipeline directory on biowulf
 cd /data/CCBR_Pipeliner/Pipelines/XAVIER
 
 # clone the new version tag (e.g. v3.0.2) to a hidden directory
@@ -17,4 +17,4 @@
 # you can verify that the symlink points to the new version with readlink
 readlink -f v3.0
 

Versions of the ccbrpipeliner module only specify the major and minor version of each pipeline. If the new pipeline release only increments the patch number, ccbrpipeliner will use it automatically after you update the symlink as above. If you need to release a new major or minor version of a pipeline on biowulf, contact Kelly or Vishal.

Verify that ccbrpipeliner uses the latest version with:

module load ccbrpipeliner && xavier --version
-

Last update: 2024-01-30
\ No newline at end of file +
\ No newline at end of file diff --git a/dev/requirements.txt b/dev/requirements.txt index 64f9a23..62dc6a6 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -16,9 +16,11 @@ MarkupSafe==2.1.4 mergedeep==1.3.4 mkdocs==1.5.3 mkdocs-git-revision-date-plugin==0.3.2 +mkdocs-git-revision-date-localized-plugin mkdocs-material==9.5.6 mkdocs-material-extensions==1.3.1 mkdocs-minify-plugin==0.8.0 +mike packaging==23.2 paginate==0.5.6 pathspec==0.12.1 diff --git a/dev/search/search_index.json b/dev/search/search_index.json index dfb2a09..c5bf22d 100644 --- a/dev/search/search_index.json +++ b/dev/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"XAVIER - eXome Analysis and Variant explorER \ud83d\udd2c","text":"

XAVIER - eXome Analysis and Variant explorER. XAVIER is an open-source, reproducible, and scalable solution for analyzing Whole Exome sequencing data. Its long-term goals: to accurately call germline and somatic variants, to infer CNVs, and to boldly annotate variants like no pipeline before!

"},{"location":"#overview","title":"Overview","text":"

Welcome to XAVIER's documentation! This guide is the main source of documentation for users that are getting started with the XAVIER pipeline.

The xavier pipeline is composed several inter-related sub commands to setup and run the pipeline across different systems. Each of the available sub commands perform different functions:

  • xavier run: Run the XAVIER pipeline with your input files.
  • xavier unlock: Unlocks a previous runs output directory.
  • xavier cache: Cache remote resources locally, coming soon!

XAVIER is a comprehensive whole exome-sequencing pipeline following the Broad's set of best practices. It relies on technologies like Singularity1 to maintain the highest-level of reproducibility. The pipeline consists of a series of data processing and quality-control steps orchestrated by Snakemake2, a flexible and scalable workflow management system, to submit jobs to a cluster or cloud provider.

The pipeline is compatible with data generated from Illumina short-read sequencing technologies. As input, it accepts a set of FastQ or BAM files and can be run locally on a compute instance, on-premise using a cluster, or on the cloud (feature coming soon!). A user can define the method or mode of execution. The pipeline can submit jobs to a cluster using a job scheduler like SLURM, or run on AWS using Tibanna (feature coming soon!). A hybrid approach ensures the pipeline is accessible to all users.

Before getting started, we highly recommend reading through the usage section of each available sub command.

For more information about issues or trouble-shooting a problem, please checkout our FAQ prior to opening an issue on Github.

"},{"location":"#contribute","title":"Contribute","text":"

This site is a living document, created for and by members like you. XAVIER is maintained by the members of CCBR and is improved by continuous feedback! We encourage you to contribute new content and make improvements to existing content via pull request to our GitHub repository .

"},{"location":"#references","title":"References","text":"

1. Kurtzer GM, Sochat V, Bauer MW (2017). Singularity: Scientific containers for mobility of compute. PLoS ONE 12(5): e0177459. 2. Koster, J. and S. Rahmann (2018). \"Snakemake-a scalable bioinformatics workflow engine.\" Bioinformatics 34(20): 3600.

"},{"location":"license/","title":"MIT License","text":"

Copyright \u00a9 2021 CCBR

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"},{"location":"release-guide/","title":"Release Guide","text":"

Make sure you're keeping the changelog up-to-date during development. Ideally, every PR that includes a user-facing change (e.g. a new feature, bug fix, or any API change) should add a concise summary to the changelog with a link to the PR. Only approve or merge PRs that either update the changelog or have no user-facing changes.

"},{"location":"release-guide/#how-to-release-a-new-version-on-github","title":"How to release a new version on GitHub","text":"
  1. Determine the new version number according to semantic versioning guidelines.
  2. Update CHANGELOG.md:
  3. Edit the heading for the development version to match the new version.
  4. If needed, clean up the changelog -- fix any typos, optionally create subheadings for 'New features' and 'Bug fixes' if there are lots of changes, etc.
  5. Update the version in src/__init__.py.
  6. On GitHub, go to \"Releases\" and click \"Draft a new release\". https://github.com/CCBR/XAVIER/releases/new
  7. Choose a tag: same as the version number.
  8. Choose the target: most likely this should be the main branch, or a specific commit hash.
  9. Set the title as the new version number, e.g. v3.0.2
  10. Copy and paste the release notes from the CHANGELOG into the description box.
  11. Check the box \"Set as the latest release\".
  12. Click \"Publish release\".
  13. Post release chores:
  14. Add a new \"development version\" heading to the top of CHANGELOG.md.
  15. Bump the version number in src/__init__.py to include -dev, e.g. v3.0.2-dev if you just released v3.0.2.
"},{"location":"release-guide/#how-to-install-a-release-on-biowulf","title":"How to install a release on biowulf","text":"

After releasing a new version on GitHub:

# go to the shared pipeline directory on biowulf\ncd /data/CCBR_Pipeliner/Pipelines/XAVIER\n\n# clone the new version tag (e.g. v3.0.2) to a hidden directory\ngit clone --depth 1 --branch v3.0.2 https://github.com/CCBR/XAVIER .v3.0.2\n\n# change permissions for the new directory so anyone will be able to use the pipeline\nchown -R :CCBR_Pipeliner .v3.0.2\nchmod -R a+rX /data/CCBR_Pipeliner/Pipelines/XAVIER/.v3.0.2\n\n# if needed, remove the old symlink for the minor version number\nrm -i v3.0\n\n# recreate the symlink to point to the new latest version\nln -s .v3.0.2 v3.0\n\n# you can verify that the symlink points to the new version with readlink\nreadlink -f v3.0\n

Versions of the ccbrpipeliner module only specify the major and minor version of each pipeline. If the new pipeline release only increments the patch number, ccbrpipeliner will use it automatically after you update the symlink as above. If you need to release a new major or minor version of a pipeline on biowulf, contact Kelly or Vishal.

Verify that ccbrpipeliner uses the latest version with:

module load ccbrpipeliner && xavier --version\n
"},{"location":"faq/questions/","title":"Frequently Asked Questions","text":"

Coming soon!

"},{"location":"pipeline-details/methods/","title":"Methods description","text":"

This page contains a description of all methods used in the pipeline, along with references for important tools.

Note that depending on the settings used, not all of these methods may be applicable, so please adapt this text appropriately for your application.

You can also download this text as a Word document (.docx) that contains an EndNote traveling library using the button below.

"},{"location":"pipeline-details/methods/#data-preprocessing","title":"Data preprocessing","text":"

Low-quality and adapters sequences are trimmed from the raw sequencing reads using Trimmomatic (v. 0.39)1. Trimmed reads are then aligned to the human hg38 reference genome using BWA mapping software (v. 0.7.17)2. Duplicate reads are marked using Samblaster (v. 0.1.25)3 and sorted using samtools (v. 1.8). Finally, base quality score recalibration is performed as indicated in the GATK4 (v. 4.2.2.0) best practices 4.

"},{"location":"pipeline-details/methods/#germline-variant-calling","title":"Germline variant calling","text":"

HaplotypeCaller from GATK4 (v. 4.2.2.0) is used to call germline variants, parallelized across chromosomes, and all samples in the cohort are joint genotyped together 4,5.

"},{"location":"pipeline-details/methods/#somatic-variant-calling","title":"Somatic variant calling","text":"

Somatic variant calling (SNPs and Indels) is performed using Mutect (v. 1.1.7)6, Mutect2 (GATK v. 4.2.0)7, Strelka2 (v. 2.9.0)8, and VarDict (v. 1.4)9 in tumor-normal mode. Variants from all callers are merged using the CombineVariants tool from GATK version 3.8-1. Genomic, functional and consequence annotations are added using Variant Effect Predictor (VEP v. 99)10 and converted to Mutation Annotation Format (MAF) using the vcf2maf tool (v. 1.6.16)11.

For Copy Number Variants (CNVs), Control-Freec (v. 11.6)12 is used to generate pileups, which are used as input for the R package 'sequenza' (v. 3.0.0)13. The complete Control-Freec workflow is then re-run using ploidy and cellularity estimates from 'sequenza'.

"},{"location":"pipeline-details/methods/#ffpe-artifact-filtering","title":"FFPE Artifact filtering","text":"

SOBDetector is a tool that scores variants based on strand-orientation bias, which can be a sign of DNA damage caused by fixation of tissue. This pipeline runs SOBDetector in a two-pass method. The first pass uses parameters provided with the software (calculated from publicly available data from TCGA), then cohort-specific bias metrics are computed from those results, and SOBDetector is re-run using these cohort-specific values.

"},{"location":"pipeline-details/methods/#quality-and-identity-metrics","title":"Quality and identity metrics","text":"

Ancestry and relatedness scores are generated using Somalier (v. 0.2.13)14. Contamination analyses are performed against viral and bacterial genomes from NCBI using Kraken2 (v. 2.1.2)15, as well as against mouse, human, and UniVec databases using FastQ Screen (v. 0.14.1)16. Sequence, mapping and variant statistics are computed using FastQC (v. 0.11.9), Qualimap (v. 2.2.1)17 and SNPeff (v. 4.3t)18. All of these metrics are combined into an interactive HTML report using MultiQC (v. 1.11)19.

"},{"location":"pipeline-details/methods/#pipeline-orchestration","title":"Pipeline Orchestration","text":"

Job execution and management is done using Snakemake (v. 6.8.2)20 using custom-built Singularity (v. 3.8.5) containers for reproducibility.

"},{"location":"pipeline-details/methods/#references","title":"References","text":"
  1. Bolger, A.M., M. Lohse, and B. Usadel, Trimmomatic: a flexible trimmer for Illumina sequence data. Bioinformatics, 2014. 30(15): p. 2114-20.\u00a0\u21a9

  2. Li, H. and R. Durbin, Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 2009. 25(14): p. 1754-60.\u00a0\u21a9

  3. Faust, G.G. and I.M. Hall, SAMBLASTER: fast duplicate marking and structural variant read extraction. Bioinformatics, 2014. 30(17): p. 2503-5.\u00a0\u21a9

  4. Van der Auwera, G.A. and B.D. O'Connor, Genomics in the cloud : using Docker, GATK, and WDL in Terra. First edition. ed. 2020, Sebastopol, CA: O'Reilly Media.\u00a0\u21a9\u21a9

  5. Poplin, R., et al., Scaling accurate genetic variant discovery to tens of thousands of samples. bioRxiv, 2018: p. 201178.\u00a0\u21a9

  6. Cibulskis, K., et al., Sensitive detection of somatic point mutations in impure and heterogeneous cancer samples. Nat Biotechnol, 2013. 31(3): p. 213-9.\u00a0\u21a9

  7. Benjamin, D., et al., Calling Somatic SNVs and Indels with Mutect2. bioRxiv, 2019: p. 861054.\u00a0\u21a9

  8. Kim, S., et al., Strelka2: fast and accurate calling of germline and somatic variants. Nat Methods, 2018. 15(8): p. 591-594.\u00a0\u21a9

  9. Lai, Z., et al., VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research. Nucleic Acids Res, 2016. 44(11): p. e108.\u00a0\u21a9

  10. McLaren, W., et al., The Ensembl Variant Effect Predictor. Genome Biol, 2016. 17(1): p. 122.\u00a0\u21a9

  11. Memorial Sloan Kettering Cancer Center. vcf2maf. 2013; Available from: https://github.com/mskcc/vcf2maf.\u00a0\u21a9

  12. Boeva, V., et al., Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data. Bioinformatics, 2012. 28(3): p. 423-5.\u00a0\u21a9

  13. Favero, F., et al., Sequenza: allele-specific copy number and mutation profiles from tumor sequencing data. Ann Oncol, 2015. 26(1): p. 64-70.\u00a0\u21a9

  14. Pedersen, B. somalier: extract informative sites, evaluate relatedness, and perform quality-control on BAM/CRAM/BCF/VCF/GVCF. 2018; Available from: https://github.com/brentp/somalier.\u00a0\u21a9

  15. Wood, D.E., J. Lu, and B. Langmead, Improved metagenomic analysis with Kraken 2. Genome Biol, 2019. 20(1): p. 257.\u00a0\u21a9

  16. Wingett, S.W. and S. Andrews, FastQ Screen: A tool for multi-genome mapping and quality control. F1000Res, 2018. 7: p. 1338.\u00a0\u21a9

  17. Okonechnikov, K., A. Conesa, and F. Garcia-Alcalde, Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 2016. 32(2): p. 292-4.\u00a0\u21a9

  18. Cingolani, P., et al., A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin), 2012. 6(2): p. 80-92.\u00a0\u21a9

  19. Ewels, P., et al., MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 2016. 32(19): p. 3047-8.\u00a0\u21a9

  20. Koster, J. and S. Rahmann, Snakemake-a scalable bioinformatics workflow engine. Bioinformatics, 2018. 34(20): p. 3600.\u00a0\u21a9

"},{"location":"pipeline-details/output/","title":"Output files","text":""},{"location":"pipeline-details/output/#xavier","title":"XAVIER","text":"

The output files and their locations are broken down here for the XAVIER pipeline. Pre-processing and germline variant calling steps are common but somatic variant calling is dependent on whether the pipeline was run in either (A) tumor-normal pair or (B) tumor-only analysis mode. All file locations are relative to the output directory specified during the job submission.

The output directory after a complete XAVIER run should look like:

xavier_output/\n\u251c\u2500\u2500 bams\n\u251c\u2500\u2500 cluster.json # cluster info for the run\n\u251c\u2500\u2500 config\n\u251c\u2500\u2500 config.json  # config file for the run\n\u251c\u2500\u2500 fastqs\n\u251c\u2500\u2500 germline\n\u251c\u2500\u2500 indels.vcf.gz[.tbi] # raw germline INDELs\n\u251c\u2500\u2500 input_files\n\u251c\u2500\u2500 intervals.list\n\u251c\u2500\u2500 {sample1}-normal.R1.fastq.gz -> /path/to/{sample1}-normal.R1.fastq.gz\n\u251c\u2500\u2500 {sample1}-normal.R2.fastq.gz -> /path/to/{sample1}-normal.R2.fastq.gz\n\u251c\u2500\u2500 {sample1}-tumor.R1.fastq.gz -> /path/to/{sample1}-tumor.R1.fastq.gz\n\u251c\u2500\u2500 {sample1}-tumor.R2.fastq.gz -> /path/to/{sample1}-tumor.R2.fastq.gz\n.\n.\n.\n\u251c\u2500\u2500 kickoff.sh\n\u251c\u2500\u2500 logfiles\n\u251c\u2500\u2500 QC\n\u251c\u2500\u2500 resources\n\u251c\u2500\u2500 snps.vcf.gz[.tbi] # raw germline SNPs\n\u251c\u2500\u2500 somatic_paired # in case of tumor-normal paired run\n\u251c\u2500\u2500 somatic_tumor_only # in case of tumor-only run\n\u2514\u2500\u2500 workflow\n

Below we describe the different folders that contain specific outputs obtained for all samples from the XAVIER pipeline

"},{"location":"pipeline-details/output/#1-qc","title":"1. QC","text":"

The QC folder contains all the Quality-Control analyses performed at different steps of the pipeline for each sample to assess sequencing quality before and after adapter trimming, microbial taxonomic composition, contamination, variant calling, etc. The final summary report and data is available finalQC folder. \\ The MultiQC report also contains results from other analysis like mapping statistics, ancestry and relatedness, etc. It is recommended to study the MultiQC report first to get a birds eye view of the sequence data quality.

QC/\n\u251c\u2500\u2500 exome_targets.bed\n\u251c\u2500\u2500 finalQC/\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 MultiQC_Report_data\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 MultiQC_Report.html\n\u251c\u2500\u2500 FQscreen\n\u251c\u2500\u2500 sample1-normal\n\u251c\u2500\u2500 sample1-normal_fastqc.html\n.\n.\n.\n\u251c\u2500\u2500 sample1-tumor\n\u251c\u2500\u2500 sample1-tumor_fastqc.html\n.\n.\n.\n\u251c\u2500\u2500 kraken\n\u251c\u2500\u2500 raw_variants.het\n\u251c\u2500\u2500 raw_variants.variant_calling_detail_metrics\n\u2514\u2500\u2500 raw_variants.variant_calling_summary_metrics\n
"},{"location":"pipeline-details/output/#2-bams","title":"2. bams","text":"

The bams folder contain two subfolders chrom_split and final_bams. final_bams contains the final processed BAM files for each sample in the run and the chrom_split folder contains all the sample BAM files split by each chromosome.

bams/\n\u251c\u2500\u2500 chrom_split\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 {sample1}-normal.chr1.split.bam[.bai]\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 {sample1}-normal.chr2.split.bam[.bai]\n.   .\n.   .\n.   .\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 {sampleN}-tumor.chrN.split.bam[.bai]\n\u2514\u2500\u2500 final_bams\n    \u251c\u2500\u2500 {sample1}-normal.bam[.bai]\n\u251c\u2500\u2500 {sample1}-tumor.bam[.bai]\n.\n    .\n    .\n    \u2514\u2500\u2500 {sampleN}-tumor.bam.bai\n
"},{"location":"pipeline-details/output/#3-germline","title":"3. germline","text":"

This folder contains the output from the GATK Best Practices pipeline to obtain germline variants with a few alterations detailed below. Briefly, joint SNP and INDEL variant detection is conducted across all samples included in a pipeline run using the GATK Haplotypcaller under default settings. Raw variants are then subsequently filtered based on several GATK annotations: \\ A strict set of criteria (QD < 2.0, FS > 60.0, MQ < 40.0, MQRankSum < -12.5, ReadPosRankSum < -8.0 for SNPs; QD < 2.0, FS > 200.0, ReadPosRankSum < -20.0 for INDELs) generates the 'combined.strictFilter.vcf'.

This call set is highly stringent, maximizing the true positive rate at the expense of an elevated false negative rate. This call set is really only intended for more general population genetic scale analyses (e.g., burden tests, admixture, linkage/pedigree based analysis, etc.) where false positives can be significantly confounding.

In case of human sequence data, a basic analyses of sample relatedness and ancestry (e.g., % European, African, etc.) is also performed using somalier.

The output folder looks like:

germline/\n\u251c\u2500\u2500 gVCFs\n.\n.\n.\n\u251c\u2500\u2500 somalier # only for hg38 genome\n\u2514\u2500\u2500 VCF\n

The VCF folder contains the final filtered germline variants (SNPs and INDELs) for all samples combined. The folder also contains raw variants for each sample, all samples combined, and also combined raw variants split by chromosome.

VCF/\n\u251c\u2500\u2500 by_chrom\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 raw_variants_byChrom.list\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 raw_variants.chr1.vcf.gz[.tbi]\n.   .\n.   .\n.   .\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 raw_variants.chrN.vcf.gz[.tbi]\n\u251c\u2500\u2500 indel.filterd.vcf.gz[.tbi]\n\u251c\u2500\u2500 {sample1}-normal.germline.vcf.gz[.tbi]\n\u251c\u2500\u2500 {sample1}-tumor.germline.vcf.gz[.tbi]\n.\n.\n.\n\u251c\u2500\u2500 {sampleN}-normal.germline.vcf.gz[.tbi]\n\u251c\u2500\u2500 {sampleN}-tumor.germline.vcf.gz[.tbi]\n\u251c\u2500\u2500 raw_variants.vcf.gz[.tbi]\n\u251c\u2500\u2500 snp.filtered.vcf.gz[.tbi]\n\u2514\u2500\u2500 snp_indel.filtered.vcf.gz[.tbi]\n
"},{"location":"pipeline-details/output/#4-logfiles","title":"4. logfiles","text":"

This folder contains the snakemake log files and computational statistics for the XAVIER run. All the log files (i.e., standard output and error) for each individual step are in the slurmfiles folder. These logfiles are important to diagnose errors in case the pipeline fails.

logfiles/\n\u251c\u2500\u2500 master.log\n\u251c\u2500\u2500 mjobid.log\n\u251c\u2500\u2500 runtime_statistics.json\n\u251c\u2500\u2500 slurmfiles\n\u251c\u2500\u2500 snakemake.log\n\u251c\u2500\u2500 snakemake.log.jobby\n\u2514\u2500\u2500 snakemake.log.jobby.short\n
"},{"location":"pipeline-details/output/#tumor-normal-pair","title":"Tumor-normal pair","text":""},{"location":"pipeline-details/output/#somatic_paired","title":"somatic_paired","text":"

This workflow calls somatic SNPs and INDELs using multiple variant detection algorithms. For each of these tools, variants are called in a paired tumor-normal fashion, with default settings. See Pipeline Details for more information about the tools used and their parameter settings.

For each sample, the resulting VCF is fully annotated using VEP and converted to a MAF file using the vcf2maf tool. Resulting MAF files are found in maf folder within each caller's results directory (i.e., mutect2_out, strelka_out, etc.). Individual sample MAF files are then merged and saved in merged_somatic_variants directory.

For Mutect2, we use a panel of normals (PON) developed from the ExAC (excluding TCGA) dataset, filtered for variants <0.001 in the general population, and also including and in-house set of blacklisted recurrent germline variants that are not found in any population databases.

For Copy Number Variants (CNVs), two tools are employed in tandem. First, Control-FREEC is run with default parameters. This generates pileup files that can be used by Sequenza, primarily for jointly estimating contamination and ploidy. These value are used to run Freec a second time for improved performance.

The output directory should look like:

somatic_paired/\n\u251c\u2500\u2500 CNV # only if CNVs analyzed\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 freec_out\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 sequenza_out\n\u251c\u2500\u2500 ffpe_filter # only if FFPE filter applied\n\u251c\u2500\u2500 qc\n\u2514\u2500\u2500 SNP_Indels\n    \u251c\u2500\u2500 merged_somatic_variants\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf # Final merged MAFs for each sample\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect2_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 chrom_split\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 pileup_summaries\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 read_orientation_data\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 strelka_out\n    \u251c\u2500\u2500 vardict_out\n    \u2514\u2500\u2500 varscan_out\n
"},{"location":"pipeline-details/output/#tumor-only","title":"Tumor-only","text":""},{"location":"pipeline-details/output/#somatic_tumor_only","title":"somatic_tumor_only","text":"

In general, the tumor-only pipeline is a stripped down version of the tumor-normal pipeline. We only run MuTect2, Mutect, and VarDict for somatic variant detection, with the same PON and filtering as described above for the tumor-normal pipeline.

somatic_tumor_only/\n\u251c\u2500\u2500 ffpe_filter # only if FFPE filter applied\n\u251c\u2500\u2500 qc\n\u2514\u2500\u2500 SNP_Indels\n    \u251c\u2500\u2500 merged_somatic_variants\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf # Final merged MAFs for each sample\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect2_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 chrom_split\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 read_orientation_data\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 vardict_out\n    \u2514\u2500\u2500 varscan_out\n
"},{"location":"pipeline-details/overview/","title":"Pipeline Overview","text":"

Workflow diagram of the XAVIER: the pipeline is composed of a series of data processing steps to trim, align, and recalibrate reads prior to calling variants. These data processing steps closely follow GATK's best pratices for cleaning up raw alignments. The pipeline also consists of a series of comprehensive quality-control steps.

"},{"location":"pipeline-details/settings/","title":"Settings","text":"

This page contains details of the settings used for different tools in the pipeline

"},{"location":"pipeline-details/settings/#somatic-paired-variant-calling","title":"Somatic paired variant calling","text":"

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

-normal

BAM file for paired normal sample

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor AND normal bam)

strelka

calling

--exome

Preset filters for exome data

mutect

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/db/PipeDB/lib/COSMIC_82_hg38.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-M

If set, output only candidate somatic

-S

exclude variants that fail filters

filter

--exclude 'STATUS=\"Germline\" | STATUS=\"LikelyLOH\" | STATUS=\"AFDiff\"'

Removes variants with certain flags from vardict; (1) Germline: detected in germline sample (pass all quality parameters); (2) LikelyLOH: detected in germline but either lost in tumor OR 20-80% in germline, but increased to 1-opt_V (95%); (3) AFDiff: detected in tumor (pass quality parameters) and present in germline but didn\u2019t pass quality parameters.

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

"},{"location":"pipeline-details/settings/#somatic-tumor-only-variant-calling","title":"Somatic tumor-only variant calling","text":"

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor bam)

mutect

calling

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

-x 500

Nucleotides to extend

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with map quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-S

exclude variants that fail filters

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

"},{"location":"pipeline-details/tools/","title":"Tools","text":"

This table lists information about the steps performed, tools used, and their details.

Module

Category

Analysis Type

Software

Version

Rule File(s)

Preprocessing

Preprocessing

trim

Trimmomatic

0.39

trim_map_preprocess.smk

map

bwa

0.7.17

trim_map_preprocess.smk

markdup

samblaster

0.1.25

trim_map_preprocess.smk

GATK4 Best Practices

GATK4

4.2.2

SNP/Indel Calling

Mutect2

GATK 4.2.2

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Strelka

2.9.0

somatic_snps.paired.smk

VarScan

2.4.3

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Mutect

1.1.7

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

VarDict

1.4

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

FFPE Artifact Filter

SOBDetector

1.0.4

ffpe.smk

Consensus SNP/Indels

GATK3 CombineVariants

GATK_3.8-1

somatic_snps.common.smk

Somatic Copy Number Variation (CNV)

CNV

Control-FREEC

11.5

somatic_snps.paired.smk

Sequenza

somatic_snps.paired.smk

Somatic Analysis

Annotate

vcf2maf

somatic_snps.common.smk

Germline

Germline SNV Calling

Germline Variants

HaplotypeCaller

GATK_4.2.2

germline.smk

Germline Analysis

Ancestry

Somalier

qc.smk

Relatedness

Somalier

qc.smk

QC Metrics

QC

depth

qualimap

2.2.1

qc.smk

report

multiqc

1.11

qc.smk

base quality

FastQC

0.11.9

qc.smk

contamination

Fastq Screen

0.14.1

qc.smk

kraken

2.1.2

qc.smk

variant quality

vcftools stat

0.1.16

qc.smk

bcftools_stat

1.9

qc.smk

variant effect

SNPeff

4.3t

qc.smk

General

General

R scripts

R

4.1

general

variant wrangling

bcftools

1.9

general

vcftools

0.1.16

general

alignment wrangling

samtools

1.8

general

Orchestration

Orchestration

Containerization

singularity

3.8.5

Orchestration

Workflow managemanet

snakemake

6.8.2

Orchestration

"},{"location":"usage/cache/","title":"xavier cache","text":""},{"location":"usage/cache/#1-about","title":"1. About","text":"

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier cache sub command in more detail. With minimal configuration, the cache sub command enables you to cache remote resources for the xavier pipeline. Caching remote resources allows the pipeline to run in an offline mode. The cache sub command can also be used to pull our pre-built reference bundles onto a new cluster or target system.

The cache sub command creates local cache on the filesysytem for resources hosted on DockerHub or AWS S3. These resources are normally pulled onto the filesystem when the pipeline runs; however, due to network issues or DockerHub pull rate limits, it may make sense to pull the resources once so a shared cache can be created and re-used. It is worth noting that a singularity cache cannot normally be shared across users. Singularity strictly enforces that its cache is owned by the user. To get around this issue, the cache subcommand can be used to create local SIFs on the filesystem from images on DockerHub.

"},{"location":"usage/cache/#2-synopsis","title":"2. Synopsis","text":"

Coming Soon!

"},{"location":"usage/gui/","title":"Getting started","text":""},{"location":"usage/gui/#1-synopsis","title":"1. Synopsis","text":"

XAVIER pipeline can be executed from either using the graphical user interface (GUI) or the command line interface (CLI). GUI offers a more interactive way for the user to provide input and adjust parameter settings. This part of the documentation describes how to run xavier using the GUI (with screenshots). See Command Line tab to read more about the xavier executable and running XAVIER pipeline using the CLI.

"},{"location":"usage/gui/#2-setting-up-xavier","title":"2. Setting up XAVIER","text":""},{"location":"usage/gui/#21-login-to-cluster","title":"2.1 Login to cluster","text":"
# Setup Step 1.) ssh into cluster's head node\n# example below for Biowulf cluster\nssh -Y $USER@biowulf.nih.gov\n
"},{"location":"usage/gui/#22-grab-an-interactive-node","title":"2.2 Grab an interactive node","text":"
# Setup Step 2.) Please do not run XAVIER on the head node!\n# Grab an interactive node first\nsinteractive --time=12:00:00 --mem=8gb  --cpus-per-task=4\n
"},{"location":"usage/gui/#23-load-ccbrpipeliner-module","title":"2.3 Load ccbrpipeliner module","text":"

NOTE: ccbrpipeliner is a custom module created on biowulf which contains various NGS data analysis pipelines developed, tested, and benchmarked by experts at CCBR.

# Setup Step 3.) Add ccbrpipeliner module\nmodule purge # to reset the module environment\nmodule load ccbrpipeliner\n

If the module was loaded correctly, the greetings message should be displayed.

[+] Loading ccbrpipeliner  5  ...\n###########################################################################\nCCBR Pipeliner\n###########################################################################\n\"ccbrpipeliner\" is a suite of end-to-end pipelines and tools\n    Visit https://github.com/ccbr for more details.\n    Pipelines are available on BIOWULF and FRCE.\n    Tools are available on BIOWULF, HELIX and FRCE.\n\n    The following pipelines/tools will be loaded in this module:\n\n    RENEE v2.5 https://ccbr.github.io/RENEE/\n    XAVIER v3.0 https://ccbr.github.io/XAVIER/\n    CARLISLE v2.4 https://ccbr.github.io/CARLISLE/\n    CHAMPAGNE v0.2 https://ccbr.github.io/CHAMPAGNE/\n    CRUISE v0.1 https://ccbr.github.io/CRUISE/\n\n    spacesavers2 v0.10 https://ccbr.github.io/spacesavers2/\n    permfix v0.6 https://github.com/ccbr/permfix\n###########################################################################\nThank you for using CCBR Pipeliner\n###########################################################################\n

To check the current version of XAVIER, enter:

xavier --version\n
"},{"location":"usage/gui/#3-running-xavier","title":"3. Running XAVIER","text":""},{"location":"usage/gui/#31-launching-xavier-gui","title":"3.1 Launching XAVIER GUI","text":"

To run the XAVIER pipeline from the GUI, simply enter:

xavier_gui\n

and it will launch the XAVIER window.

Note: Please wait until window created! message appears on the terminal.

"},{"location":"usage/gui/#32-folder-paths-and-reference-genomes","title":"3.2 Folder paths and reference genomes","text":"

To enter the location of the input folder containing FASTQ files and the location where the output folders should be created, either simply type the absolute paths

or use the Browse tab to choose the input and output directories

Next, from the drop down menu select the reference genome (hg38/mm10)

and enter a job name of this run.

"},{"location":"usage/gui/#33-analysis-mode","title":"3.3 Analysis mode","text":"

XAVIER pipeline can be run in two different modes:\\ (A) Tumor-normal pair \\ (B) Tumor-only

"},{"location":"usage/gui/#33a-tumor-normal-pair-analysis","title":"3.3a Tumor-normal pair analysis","text":"

In case of tumor-normal pairs, a tab-delimited text file is neeed that contains the list of normal and tumor samples. For example,

Normal  Tumor\nsample1-normal     sample1-tumor\nsample2-normal     sample2-tumor\nsample3-normal     sample3-tumor\nsample4-normal     sample4-tumor\n

Similar to input and output folder paths, either type the path to the pairsInfo.txt file or use the Browse tab.

In case of paired mode, XAVIER can also perform copy number variants (CNV) analysis.

"},{"location":"usage/gui/#33b-tumor-only-analysis","title":"3.3b Tumor-only analysis","text":"

In case the paired normal samples are unavailable, XAVIER pipeline can be run in tumor-only mode which does not require paired samples information. However, in the absence of matching normal samples, CNV analysis is also unavailable.

"},{"location":"usage/gui/#34-submit-xavier-job","title":"3.4 Submit XAVIER job","text":"

After all the information is filled out, press Submit.

If the pipeline detects no errors and the run was submitted, a new window appears that has the output of a \"dry-run\" which summarizes each step of the pipeline.

Click OK

A dialogue box will popup to confirm submitting the job to slurm.

Click Yes

The dry-run output will be displayed again and the master job will be submitted. An email notification will be sent out when the pipeline starts and ends.

The XAVIER gui will ask to submit another job.

Click Yes to start again or No to close the XAVIER gui.

"},{"location":"usage/gui/#35-additional-settings","title":"3.5 Additional settings","text":"

Users can input certain additional settings for the pipeline run including running an additional step to correct strand orientation bias in Formalin-Fixed Paraffin-Embedded (FFPE) samples and to provide a custom exome targets BED file. This file can be obtained from the manufacturer of the target capture kit that was used.

"},{"location":"usage/gui/#4-special-instructions-for-biowulf","title":"4. Special instructions for Biowulf","text":"

XAVIER GUI natively uses the X11 Window System to run XAVIER pipeline and display the graphics on a personal desktop or laptop. However, if running XAVIER specifically on NIH's Biowulf cluster, the HPC staff recommends NoMachine (NX) to run graphics applications.

Please see details here on how to install and connect to Biowulf on your local computer using NoMachine.

Once connected to Biowulf using NX, right click to open a terminal connection

and start an interactive session.

Similar to the instructions above, load ccbrpipeliner module and enter xavier_gui to launch the XAVIER gui.

"},{"location":"usage/run/","title":"xavier run","text":""},{"location":"usage/run/#1-about","title":"1. About","text":"

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier run sub command in more detail. With minimal configuration, the run sub command enables you to start running xavier pipeline.

Setting up the xavier pipeline is fast and easy! In its most basic form, xavier run only has four required inputs.

"},{"location":"usage/run/#2-synopsis","title":"2. Synopsis","text":"
$ xavier run [--help] \\\n                   [--mode {local, slurm}] \\\n                   [--job-name JOB_NAME] \\\n                   [--callers {mutect2,mutect,strelka, ...}] \\\n                   [--pairs PAIRS] \\\n                   [--ffpe] \\\n                   [--cnv] \\\n                   [--silent] \\\n                   [--singularity-cache SINGULARITY_CACHE] \\\n                   [--sif-cache SIF_CACHE] \\\n                   [--threads THREADS] \\\n                   --runmode {init, dryrun, run} \\\n                   --input INPUT [INPUT ...] \\\n                   --output OUTPUT \\\n                   --genome {hg38, ...} \\\n                   --targets TARGETS\n

The synopsis for each command shows its parameters and their usage. Optional parameters are shown in square brackets.

A user must provide a list of FastQ or BAM files (globbing is supported) to analyze via --input argument, an output directory to store results via --output argument, an exome targets BED file for the samples' capture kit, and select reference genome for alignment and annotation via the --genome argument.

Use you can always use the -h option for information on a specific command.

"},{"location":"usage/run/#21-required-arguments","title":"2.1 Required Arguments","text":"

Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.

--input INPUT [INPUT ...]

Input FastQ or BAM file(s) to process. type: file(s)

One or more FastQ files can be provided. The pipeline does NOT support single-end WES data. Please provide either a set of FastQ files or a set of BAM files. The pipeline does NOT support processing a mixture of FastQ files and BAM files. From the command-line, each input file should separated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should be gzipp-ed.

Example: --input .tests/*.R?.fastq.gz

--output OUTPUT

Path to an output directory. type: path

This location is where the pipeline will create all of its output files, also known as the pipeline's working directory. If the provided output directory does not exist, it will be initialized automatically.

Example: --output /data/$USER/WES_hg38

--runmode {init,dryrun,run} `

Execution Process. type: string

User should initialize the pipeline folder by first running --runmode init User should then perform a dry-run to list all steps the pipeline will take--runmode dryrun User should then perform the full run --runmode run

Example: --runmode init THEN --runmode dryrun THEN --runmode run

--genome {hg38, custom.json}

Reference genome. type: string/file

This option defines the reference genome for your set of samples. On Biowulf, xavier does comes bundled with pre built reference files for human samples; however, it is worth noting that the pipeline does accept a pre-built resource bundle pulled with the cache sub command (coming soon). Currently, the pipeline only supports the human reference hg38; however, support for mouse reference mm10 will be added soon.

Pre built Option Here is a list of available pre built genomes on Biowulf: hg38.

Custom Option For users running the pipeline outside of Biowulf, a pre-built resource bundle can be pulled with the cache sub command (coming soon). Please supply the custom reference JSON file that was generated by the cache sub command.

Example: --genome hg38 OR --genome /data/${USER}/hg38/hg38.json

--targets TARGETS

Exome targets BED file. type: file

This file can be obtained from the manufacturer of the target capture kit that was used.

Example: --targets /data/$USER/Agilent_SSv7_allExons_hg38.bed

"},{"location":"usage/run/#22-options","title":"2.2 Options","text":"

Each of the following arguments are optional and do not need to be provided.

-h, --help

Display Help. type: boolean flag

Shows command's synopsis, help message, and an example command

Example: --help

--silent

Silence standard output. type: boolean flag

Reduces the amount of information directed to standard output when submitting master job to the job scheduler. Only the job id of the master job is returned.

Example: --silent

--mode {local,slurm}

Execution Method. type: string default: slurm

Execution Method. Defines the mode or method of execution. Valid mode options include: local or slurm.

local Local executions will run serially on compute instance. This is useful for testing, debugging, or when a users does not have access to a high performance computing environment. If this option is not provided, it will default to a local execution mode.

slurm The slurm execution method will submit jobs to a cluster using a singularity backend. It is recommended running xavier in this mode as execution will be significantly faster in a distributed environment.

Example: --mode slurm

--job-name JOB_NAME

Set the name of the pipeline's master job. type: string > default: pl:xavier

When submitting the pipeline to a job scheduler, like SLURM, this option always you to set the name of the pipeline's master job. By default, the name of the pipeline's master job is set to \"pl:xavier\".

Example: --job-name xavier_run1

--callers CALLERS [CALLERS ...]

Variant Callers. type: string(s) > default: mutect2, mutect, strelka, vardict, varscan

List of variant callers to detect mutations. Please select from one or more of the following options: [mutect2, mutect, strelka, vardict, varscan]. Defaults to using all variant callers.

Example: --callers mutect2 strelka varscan

--pairs PAIRS

Tumor normal pairs file. type: file

This tab delimited file contains two columns with the names of tumor and normal pairs, one per line. The header of the file needs to be Tumor for the tumor column and Normal for the normal column. The base name of each sample should be listed in the pairs file. The base name of a given sample can be determined by removing the following extension from the sample's R1 FastQ file: .R1.fastq.gz. Contents of example pairs file:

Normal    Tumor\nSample4_CRL1622_S31   Sample10_ARK1_S37\nSample4_CRL1622_S31   Sample11_ACI_158_S38\n

Example: --pairs /data/$USER/pairs.tsv

--ffpe

Apply FFPE correction. type: boolean flag

Runs an additional steps to correct strand orientation bias in Formalin-Fixed Paraffin-Embedded (FFPE) samples. Do NOT use this option with non-FFPE samples.

Example: --ffpe

--cnv

Call copy number variations (CNVs). type: boolean flag

CNVs will only be called from tumor-normal pairs. If this option is provided without providing a --pairs file, CNVs will NOT be called.

Example: --cnv

--singularity-cache SINGULARITY_CACHE

Overrides the $SINGULARITY_CACHEDIR environment variable. type: path default: --output OUTPUT/.singularity

Singularity will cache image layers pulled from remote registries. This ultimately speeds up the process of pull an image from DockerHub if an image layer already exists in the singularity cache directory. By default, the cache is set to the value provided to the --output argument. Please note that this cache cannot be shared across users. Singularity strictly enforces you own the cache directory and will return a non-zero exit code if you do not own the cache directory! See the --sif-cache option to create a shareable resource.

Example: --singularity-cache /data/$USER/.singularity

--sif-cache SIF_CACHE

Path where a local cache of SIFs are stored. type: path

Uses a local cache of SIFs on the filesystem. This SIF cache can be shared across users if permissions are set correctly. If a SIF does not exist in the SIF cache, the image will be pulled from Dockerhub and a warning message will be displayed. The xavier cache subcommand can be used to create a local SIF cache. Please see xavier cache for more information. This command is extremely useful for avoiding DockerHub pull rate limits. It also remove any potential errors that could occur due to network issues or DockerHub being temporarily unavailable. We recommend running xavier with this option when ever possible.

Example: --singularity-cache /data/$USER/SIFs

--threads THREADS

Max number of threads for each process. type: int default: 2

Max number of threads for each process. This option is more applicable when running the pipeline with --mode local. It is recommended setting this value to the maximum number of CPUs available on the host machine.

Example: --threads 12

"},{"location":"usage/run/#3-example","title":"3. Example","text":"
# Step 1.) Grab an interactive node\n# Do not run on head node!\nsinteractive --mem=8g --cpus-per-task=4\nmodule purge\nmodule load ccbrpipeliner\n\n# Step 2A.) Initialize the all resources to the output folder\nxavier run --input .tests/*.R?.fastq.gz \\\n--output /data/$USER/xavier_hg38 \\\n--genome hg38 \\\n--targets Agilent_SSv7_allExons_hg38.bed \\\n--mode slurm \\\n--runmode init\n\n# Step 2B.) Dry-run the pipeline\nxavier run --input .tests/*.R?.fastq.gz \\\n--output /data/$USER/xavier_hg38 \\\n--genome hg38 \\\n--targets Agilent_SSv7_allExons_hg38.bed \\\n--mode slurm \\\n--runmode dryrun\n\n# Step 2C.) Run the XAVIER pipeline\n# The slurm mode will submit jobs to the cluster.\n# It is recommended running xavier in this mode.\nxavier run --input .tests/*.R?.fastq.gz \\\n--output /data/$USER/xavier_hg38 \\\n--genome hg38 \\\n--targets Agilent_SSv7_allExons_hg38.bed \\\n--mode slurm \\\n--runmode run\n
"},{"location":"usage/unlock/","title":"xavier unlock","text":""},{"location":"usage/unlock/#1-about","title":"1. About","text":"

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier unlock sub command in more detail. With minimal configuration, the unlock sub command enables you to unlock a pipeline output directory.

If the pipeline fails ungracefully, it maybe required to unlock the working directory before proceeding again. Snakemake will inform a user when it maybe necessary to unlock a working directory with an error message stating: Error: Directory cannot be locked.

Please verify that the pipeline is not running before running this command. If the pipeline is currently running, the workflow manager will report the working directory is locked. The is the default behavior of snakemake, and it is normal. Do NOT run this command if the pipeline is still running! Please kill the master job and it's child jobs prior to running this command.

Unlocking xavier pipeline output directory is fast and easy! In its most basic form, xavier unlock only has one required input.

"},{"location":"usage/unlock/#2-synopsis","title":"2. Synopsis","text":"
$ xavier unlock [-h] --output OUTPUT\n

The synopsis for this command shows its parameters and their usage. Optional parameters are shown in square brackets.

A user must provide an output directory to unlock via --output argument. After running the unlock sub command, you can resume the build or run pipeline from where it left off by re-running it.

Use you can always use the -h option for information on a specific command.

"},{"location":"usage/unlock/#21-required-arguments","title":"2.1 Required Arguments","text":"

--output OUTPUT

Output directory to unlock. type: path

Path to a previous run's output directory. This will remove a lock on the working directory. Please verify that the pipeline is not running before running this command. Example: --output /data/$USER/WES_hg38

"},{"location":"usage/unlock/#22-options","title":"2.2 Options","text":"

Each of the following arguments are optional and do not need to be provided.

-h, --help

Display Help. type: boolean

Shows command's synopsis, help message, and an example command

Example: --help

"},{"location":"usage/unlock/#3-example","title":"3. Example","text":"
# Step 0.) Grab an interactive node (do not run on head node)\nsinteractive --mem=8g -N 1 -n 4\nmodule purge\nmodule load ccbrpipeliner\n\n# Step 1.) Unlock a pipeline output directory\nxavier unlock --output /data/$USER/xavier_hg38\n
"}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"XAVIER - eXome Analysis and Variant explorER \ud83d\udd2c","text":"

XAVIER - eXome Analysis and Variant explorER. XAVIER is an open-source, reproducible, and scalable solution for analyzing Whole Exome sequencing data. Its long-term goals: to accurately call germline and somatic variants, to infer CNVs, and to boldly annotate variants like no pipeline before!

"},{"location":"#overview","title":"Overview","text":"

Welcome to XAVIER's documentation! This guide is the main source of documentation for users that are getting started with the XAVIER pipeline.

The xavier pipeline is composed several inter-related sub commands to setup and run the pipeline across different systems. Each of the available sub commands perform different functions:

  • xavier run: Run the XAVIER pipeline with your input files.
  • xavier unlock: Unlocks a previous runs output directory.
  • xavier cache: Cache remote resources locally, coming soon!

XAVIER is a comprehensive whole exome-sequencing pipeline following the Broad's set of best practices. It relies on technologies like Singularity1 to maintain the highest-level of reproducibility. The pipeline consists of a series of data processing and quality-control steps orchestrated by Snakemake2, a flexible and scalable workflow management system, to submit jobs to a cluster or cloud provider.

The pipeline is compatible with data generated from Illumina short-read sequencing technologies. As input, it accepts a set of FastQ or BAM files and can be run locally on a compute instance, on-premise using a cluster, or on the cloud (feature coming soon!). A user can define the method or mode of execution. The pipeline can submit jobs to a cluster using a job scheduler like SLURM, or run on AWS using Tibanna (feature coming soon!). A hybrid approach ensures the pipeline is accessible to all users.

Before getting started, we highly recommend reading through the usage section of each available sub command.

For more information about issues or trouble-shooting a problem, please checkout our FAQ prior to opening an issue on Github.

"},{"location":"#contribute","title":"Contribute","text":"

This site is a living document, created for and by members like you. XAVIER is maintained by the members of CCBR and is improved by continuous feedback! We encourage you to contribute new content and make improvements to existing content via pull request to our GitHub repository .

"},{"location":"#references","title":"References","text":"

1. Kurtzer GM, Sochat V, Bauer MW (2017). Singularity: Scientific containers for mobility of compute. PLoS ONE 12(5): e0177459. 2. Koster, J. and S. Rahmann (2018). \"Snakemake-a scalable bioinformatics workflow engine.\" Bioinformatics 34(20): 3600.

"},{"location":"license/","title":"MIT License","text":"

Copyright \u00a9 2021 CCBR

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

"},{"location":"release-guide/","title":"Release Guide","text":"

Make sure you're keeping the changelog up-to-date during development. Ideally, every PR that includes a user-facing change (e.g. a new feature, bug fix, or any API change) should add a concise summary to the changelog with a link to the PR. Only approve or merge PRs that either update the changelog or have no user-facing changes.

"},{"location":"release-guide/#how-to-release-a-new-version-on-github","title":"How to release a new version on GitHub","text":"
  1. Determine the new version number according to semantic versioning guidelines.
  2. Update CHANGELOG.md:
  3. Edit the heading for the development version to match the new version.
  4. If needed, clean up the changelog -- fix any typos, optionally create subheadings for 'New features' and 'Bug fixes' if there are lots of changes, etc.
  5. Update the version in src/__init__.py.
  6. On GitHub, go to \"Releases\" and click \"Draft a new release\". https://github.com/CCBR/XAVIER/releases/new
  7. Choose a tag: same as the version number.
  8. Choose the target: most likely this should be the main branch, or a specific commit hash.
  9. Set the title as the new version number, e.g. v3.0.2
  10. Copy and paste the release notes from the CHANGELOG into the description box.
  11. Check the box \"Set as the latest release\".
  12. Click \"Publish release\".
  13. Post release chores:
  14. Add a new \"development version\" heading to the top of CHANGELOG.md.
  15. Bump the version number in src/__init__.py to include -dev, e.g. v3.0.2-dev if you just released v3.0.2.
"},{"location":"release-guide/#how-to-install-a-release-on-biowulf","title":"How to install a release on biowulf","text":"

After releasing a new version on GitHub:

# go to the shared pipeline directory on biowulf\ncd /data/CCBR_Pipeliner/Pipelines/XAVIER\n\n# clone the new version tag (e.g. v3.0.2) to a hidden directory\ngit clone --depth 1 --branch v3.0.2 https://github.com/CCBR/XAVIER .v3.0.2\n\n# change permissions for the new directory so anyone will be able to use the pipeline\nchown -R :CCBR_Pipeliner .v3.0.2\nchmod -R a+rX /data/CCBR_Pipeliner/Pipelines/XAVIER/.v3.0.2\n\n# if needed, remove the old symlink for the minor version number\nrm -i v3.0\n\n# recreate the symlink to point to the new latest version\nln -s .v3.0.2 v3.0\n\n# you can verify that the symlink points to the new version with readlink\nreadlink -f v3.0\n

Versions of the ccbrpipeliner module only specify the major and minor version of each pipeline. If the new pipeline release only increments the patch number, ccbrpipeliner will use it automatically after you update the symlink as above. If you need to release a new major or minor version of a pipeline on biowulf, contact Kelly or Vishal.

Verify that ccbrpipeliner uses the latest version with:

module load ccbrpipeliner && xavier --version\n
"},{"location":"faq/questions/","title":"Frequently Asked Questions","text":"

Coming soon!

"},{"location":"pipeline-details/methods/","title":"Methods description","text":"

This page contains a description of all methods used in the pipeline, along with references for important tools.

Note that depending on the settings used, not all of these methods may be applicable, so please adapt this text appropriately for your application.

You can also download this text as a Word document (.docx) that contains an EndNote traveling library using the button below.

"},{"location":"pipeline-details/methods/#data-preprocessing","title":"Data preprocessing","text":"

Low-quality and adapters sequences are trimmed from the raw sequencing reads using Trimmomatic (v. 0.39)1. Trimmed reads are then aligned to the human hg38 reference genome using BWA mapping software (v. 0.7.17)2. Duplicate reads are marked using Samblaster (v. 0.1.25)3 and sorted using samtools (v. 1.8). Finally, base quality score recalibration is performed as indicated in the GATK4 (v. 4.2.2.0) best practices 4.

"},{"location":"pipeline-details/methods/#germline-variant-calling","title":"Germline variant calling","text":"

HaplotypeCaller from GATK4 (v. 4.2.2.0) is used to call germline variants, parallelized across chromosomes, and all samples in the cohort are joint genotyped together 4,5.

"},{"location":"pipeline-details/methods/#somatic-variant-calling","title":"Somatic variant calling","text":"

Somatic variant calling (SNPs and Indels) is performed using Mutect (v. 1.1.7)6, Mutect2 (GATK v. 4.2.0)7, Strelka2 (v. 2.9.0)8, and VarDict (v. 1.4)9 in tumor-normal mode. Variants from all callers are merged using the CombineVariants tool from GATK version 3.8-1. Genomic, functional and consequence annotations are added using Variant Effect Predictor (VEP v. 99)10 and converted to Mutation Annotation Format (MAF) using the vcf2maf tool (v. 1.6.16)11.

For Copy Number Variants (CNVs), Control-Freec (v. 11.6)12 is used to generate pileups, which are used as input for the R package 'sequenza' (v. 3.0.0)13. The complete Control-Freec workflow is then re-run using ploidy and cellularity estimates from 'sequenza'.

"},{"location":"pipeline-details/methods/#ffpe-artifact-filtering","title":"FFPE Artifact filtering","text":"

SOBDetector is a tool that scores variants based on strand-orientation bias, which can be a sign of DNA damage caused by fixation of tissue. This pipeline runs SOBDetector in a two-pass method. The first pass uses parameters provided with the software (calculated from publicly available data from TCGA), then cohort-specific bias metrics are computed from those results, and SOBDetector is re-run using these cohort-specific values.

"},{"location":"pipeline-details/methods/#quality-and-identity-metrics","title":"Quality and identity metrics","text":"

Ancestry and relatedness scores are generated using Somalier (v. 0.2.13)14. Contamination analyses are performed against viral and bacterial genomes from NCBI using Kraken2 (v. 2.1.2)15, as well as against mouse, human, and UniVec databases using FastQ Screen (v. 0.14.1)16. Sequence, mapping and variant statistics are computed using FastQC (v. 0.11.9), Qualimap (v. 2.2.1)17 and SNPeff (v. 4.3t)18. All of these metrics are combined into an interactive HTML report using MultiQC (v. 1.11)19.

"},{"location":"pipeline-details/methods/#pipeline-orchestration","title":"Pipeline Orchestration","text":"

Job execution and management is done using Snakemake (v. 6.8.2)20 using custom-built Singularity (v. 3.8.5) containers for reproducibility.

"},{"location":"pipeline-details/methods/#references","title":"References","text":"
  1. Bolger, A.M., M. Lohse, and B. Usadel, Trimmomatic: a flexible trimmer for Illumina sequence data. Bioinformatics, 2014. 30(15): p. 2114-20.\u00a0\u21a9

  2. Li, H. and R. Durbin, Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics, 2009. 25(14): p. 1754-60.\u00a0\u21a9

  3. Faust, G.G. and I.M. Hall, SAMBLASTER: fast duplicate marking and structural variant read extraction. Bioinformatics, 2014. 30(17): p. 2503-5.\u00a0\u21a9

  4. Van der Auwera, G.A. and B.D. O'Connor, Genomics in the cloud : using Docker, GATK, and WDL in Terra. First edition. ed. 2020, Sebastopol, CA: O'Reilly Media.\u00a0\u21a9\u21a9

  5. Poplin, R., et al., Scaling accurate genetic variant discovery to tens of thousands of samples. bioRxiv, 2018: p. 201178.\u00a0\u21a9

  6. Cibulskis, K., et al., Sensitive detection of somatic point mutations in impure and heterogeneous cancer samples. Nat Biotechnol, 2013. 31(3): p. 213-9.\u00a0\u21a9

  7. Benjamin, D., et al., Calling Somatic SNVs and Indels with Mutect2. bioRxiv, 2019: p. 861054.\u00a0\u21a9

  8. Kim, S., et al., Strelka2: fast and accurate calling of germline and somatic variants. Nat Methods, 2018. 15(8): p. 591-594.\u00a0\u21a9

  9. Lai, Z., et al., VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research. Nucleic Acids Res, 2016. 44(11): p. e108.\u00a0\u21a9

  10. McLaren, W., et al., The Ensembl Variant Effect Predictor. Genome Biol, 2016. 17(1): p. 122.\u00a0\u21a9

  11. Memorial Sloan Kettering Cancer Center. vcf2maf. 2013; Available from: https://github.com/mskcc/vcf2maf.\u00a0\u21a9

  12. Boeva, V., et al., Control-FREEC: a tool for assessing copy number and allelic content using next-generation sequencing data. Bioinformatics, 2012. 28(3): p. 423-5.\u00a0\u21a9

  13. Favero, F., et al., Sequenza: allele-specific copy number and mutation profiles from tumor sequencing data. Ann Oncol, 2015. 26(1): p. 64-70.\u00a0\u21a9

  14. Pedersen, B. somalier: extract informative sites, evaluate relatedness, and perform quality-control on BAM/CRAM/BCF/VCF/GVCF. 2018; Available from: https://github.com/brentp/somalier.\u00a0\u21a9

  15. Wood, D.E., J. Lu, and B. Langmead, Improved metagenomic analysis with Kraken 2. Genome Biol, 2019. 20(1): p. 257.\u00a0\u21a9

  16. Wingett, S.W. and S. Andrews, FastQ Screen: A tool for multi-genome mapping and quality control. F1000Res, 2018. 7: p. 1338.\u00a0\u21a9

  17. Okonechnikov, K., A. Conesa, and F. Garcia-Alcalde, Qualimap 2: advanced multi-sample quality control for high-throughput sequencing data. Bioinformatics, 2016. 32(2): p. 292-4.\u00a0\u21a9

  18. Cingolani, P., et al., A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin), 2012. 6(2): p. 80-92.\u00a0\u21a9

  19. Ewels, P., et al., MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics, 2016. 32(19): p. 3047-8.\u00a0\u21a9

  20. Koster, J. and S. Rahmann, Snakemake-a scalable bioinformatics workflow engine. Bioinformatics, 2018. 34(20): p. 3600.\u00a0\u21a9

"},{"location":"pipeline-details/output/","title":"Output files","text":""},{"location":"pipeline-details/output/#xavier","title":"XAVIER","text":"

The output files and their locations are broken down here for the XAVIER pipeline. Pre-processing and germline variant calling steps are common but somatic variant calling is dependent on whether the pipeline was run in either (A) tumor-normal pair or (B) tumor-only analysis mode. All file locations are relative to the output directory specified during the job submission.

The output directory after a complete XAVIER run should look like:

xavier_output/\n\u251c\u2500\u2500 bams\n\u251c\u2500\u2500 cluster.json # cluster info for the run\n\u251c\u2500\u2500 config\n\u251c\u2500\u2500 config.json  # config file for the run\n\u251c\u2500\u2500 fastqs\n\u251c\u2500\u2500 germline\n\u251c\u2500\u2500 indels.vcf.gz[.tbi] # raw germline INDELs\n\u251c\u2500\u2500 input_files\n\u251c\u2500\u2500 intervals.list\n\u251c\u2500\u2500 {sample1}-normal.R1.fastq.gz -> /path/to/{sample1}-normal.R1.fastq.gz\n\u251c\u2500\u2500 {sample1}-normal.R2.fastq.gz -> /path/to/{sample1}-normal.R2.fastq.gz\n\u251c\u2500\u2500 {sample1}-tumor.R1.fastq.gz -> /path/to/{sample1}-tumor.R1.fastq.gz\n\u251c\u2500\u2500 {sample1}-tumor.R2.fastq.gz -> /path/to/{sample1}-tumor.R2.fastq.gz\n.\n.\n.\n\u251c\u2500\u2500 kickoff.sh\n\u251c\u2500\u2500 logfiles\n\u251c\u2500\u2500 QC\n\u251c\u2500\u2500 resources\n\u251c\u2500\u2500 snps.vcf.gz[.tbi] # raw germline SNPs\n\u251c\u2500\u2500 somatic_paired # in case of tumor-normal paired run\n\u251c\u2500\u2500 somatic_tumor_only # in case of tumor-only run\n\u2514\u2500\u2500 workflow\n

Below we describe the different folders that contain specific outputs obtained for all samples from the XAVIER pipeline

"},{"location":"pipeline-details/output/#1-qc","title":"1. QC","text":"

The QC folder contains all the Quality-Control analyses performed at different steps of the pipeline for each sample to assess sequencing quality before and after adapter trimming, microbial taxonomic composition, contamination, variant calling, etc. The final summary report and data is available finalQC folder. \\ The MultiQC report also contains results from other analysis like mapping statistics, ancestry and relatedness, etc. It is recommended to study the MultiQC report first to get a birds eye view of the sequence data quality.

QC/\n\u251c\u2500\u2500 exome_targets.bed\n\u251c\u2500\u2500 finalQC/\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 MultiQC_Report_data\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 MultiQC_Report.html\n\u251c\u2500\u2500 FQscreen\n\u251c\u2500\u2500 sample1-normal\n\u251c\u2500\u2500 sample1-normal_fastqc.html\n.\n.\n.\n\u251c\u2500\u2500 sample1-tumor\n\u251c\u2500\u2500 sample1-tumor_fastqc.html\n.\n.\n.\n\u251c\u2500\u2500 kraken\n\u251c\u2500\u2500 raw_variants.het\n\u251c\u2500\u2500 raw_variants.variant_calling_detail_metrics\n\u2514\u2500\u2500 raw_variants.variant_calling_summary_metrics\n
"},{"location":"pipeline-details/output/#2-bams","title":"2. bams","text":"

The bams folder contain two subfolders chrom_split and final_bams. final_bams contains the final processed BAM files for each sample in the run and the chrom_split folder contains all the sample BAM files split by each chromosome.

bams/\n\u251c\u2500\u2500 chrom_split\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 {sample1}-normal.chr1.split.bam[.bai]\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 {sample1}-normal.chr2.split.bam[.bai]\n.   .\n.   .\n.   .\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 {sampleN}-tumor.chrN.split.bam[.bai]\n\u2514\u2500\u2500 final_bams\n    \u251c\u2500\u2500 {sample1}-normal.bam[.bai]\n    \u251c\u2500\u2500 {sample1}-tumor.bam[.bai]\n    .\n    .\n    .\n    \u2514\u2500\u2500 {sampleN}-tumor.bam.bai\n
"},{"location":"pipeline-details/output/#3-germline","title":"3. germline","text":"

This folder contains the output from the GATK Best Practices pipeline to obtain germline variants with a few alterations detailed below. Briefly, joint SNP and INDEL variant detection is conducted across all samples included in a pipeline run using the GATK Haplotypcaller under default settings. Raw variants are then subsequently filtered based on several GATK annotations: \\ A strict set of criteria (QD < 2.0, FS > 60.0, MQ < 40.0, MQRankSum < -12.5, ReadPosRankSum < -8.0 for SNPs; QD < 2.0, FS > 200.0, ReadPosRankSum < -20.0 for INDELs) generates the 'combined.strictFilter.vcf'.

This call set is highly stringent, maximizing the true positive rate at the expense of an elevated false negative rate. This call set is really only intended for more general population genetic scale analyses (e.g., burden tests, admixture, linkage/pedigree based analysis, etc.) where false positives can be significantly confounding.

In case of human sequence data, a basic analyses of sample relatedness and ancestry (e.g., % European, African, etc.) is also performed using somalier.

The output folder looks like:

germline/\n\u251c\u2500\u2500 gVCFs\n.\n.\n.\n\u251c\u2500\u2500 somalier # only for hg38 genome\n\u2514\u2500\u2500 VCF\n

The VCF folder contains the final filtered germline variants (SNPs and INDELs) for all samples combined. The folder also contains raw variants for each sample, all samples combined, and also combined raw variants split by chromosome.

VCF/\n\u251c\u2500\u2500 by_chrom\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 raw_variants_byChrom.list\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 raw_variants.chr1.vcf.gz[.tbi]\n.   .\n.   .\n.   .\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 raw_variants.chrN.vcf.gz[.tbi]\n\u251c\u2500\u2500 indel.filterd.vcf.gz[.tbi]\n\u251c\u2500\u2500 {sample1}-normal.germline.vcf.gz[.tbi]\n\u251c\u2500\u2500 {sample1}-tumor.germline.vcf.gz[.tbi]\n.\n.\n.\n\u251c\u2500\u2500 {sampleN}-normal.germline.vcf.gz[.tbi]\n\u251c\u2500\u2500 {sampleN}-tumor.germline.vcf.gz[.tbi]\n\u251c\u2500\u2500 raw_variants.vcf.gz[.tbi]\n\u251c\u2500\u2500 snp.filtered.vcf.gz[.tbi]\n\u2514\u2500\u2500 snp_indel.filtered.vcf.gz[.tbi]\n
"},{"location":"pipeline-details/output/#4-logfiles","title":"4. logfiles","text":"

This folder contains the snakemake log files and computational statistics for the XAVIER run. All the log files (i.e., standard output and error) for each individual step are in the slurmfiles folder. These logfiles are important to diagnose errors in case the pipeline fails.

logfiles/\n\u251c\u2500\u2500 master.log\n\u251c\u2500\u2500 mjobid.log\n\u251c\u2500\u2500 runtime_statistics.json\n\u251c\u2500\u2500 slurmfiles\n\u251c\u2500\u2500 snakemake.log\n\u251c\u2500\u2500 snakemake.log.jobby\n\u2514\u2500\u2500 snakemake.log.jobby.short\n
"},{"location":"pipeline-details/output/#tumor-normal-pair","title":"Tumor-normal pair","text":""},{"location":"pipeline-details/output/#somatic_paired","title":"somatic_paired","text":"

This workflow calls somatic SNPs and INDELs using multiple variant detection algorithms. For each of these tools, variants are called in a paired tumor-normal fashion, with default settings. See Pipeline Details for more information about the tools used and their parameter settings.

For each sample, the resulting VCF is fully annotated using VEP and converted to a MAF file using the vcf2maf tool. Resulting MAF files are found in maf folder within each caller's results directory (i.e., mutect2_out, strelka_out, etc.). Individual sample MAF files are then merged and saved in merged_somatic_variants directory.

For Mutect2, we use a panel of normals (PON) developed from the ExAC (excluding TCGA) dataset, filtered for variants <0.001 in the general population, and also including and in-house set of blacklisted recurrent germline variants that are not found in any population databases.

For Copy Number Variants (CNVs), two tools are employed in tandem. First, Control-FREEC is run with default parameters. This generates pileup files that can be used by Sequenza, primarily for jointly estimating contamination and ploidy. These value are used to run Freec a second time for improved performance.

The output directory should look like:

somatic_paired/\n\u251c\u2500\u2500 CNV # only if CNVs analyzed\n\u2502\u00a0\u00a0 \u251c\u2500\u2500 freec_out\n\u2502\u00a0\u00a0 \u2514\u2500\u2500 sequenza_out\n\u251c\u2500\u2500 ffpe_filter # only if FFPE filter applied\n\u251c\u2500\u2500 qc\n\u2514\u2500\u2500 SNP_Indels\n    \u251c\u2500\u2500 merged_somatic_variants\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf # Final merged MAFs for each sample\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect2_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 chrom_split\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 pileup_summaries\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 read_orientation_data\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 strelka_out\n    \u251c\u2500\u2500 vardict_out\n    \u2514\u2500\u2500 varscan_out\n
"},{"location":"pipeline-details/output/#tumor-only","title":"Tumor-only","text":""},{"location":"pipeline-details/output/#somatic_tumor_only","title":"somatic_tumor_only","text":"

In general, the tumor-only pipeline is a stripped down version of the tumor-normal pipeline. We only run MuTect2, Mutect, and VarDict for somatic variant detection, with the same PON and filtering as described above for the tumor-normal pipeline.

somatic_tumor_only/\n\u251c\u2500\u2500 ffpe_filter # only if FFPE filter applied\n\u251c\u2500\u2500 qc\n\u2514\u2500\u2500 SNP_Indels\n    \u251c\u2500\u2500 merged_somatic_variants\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf # Final merged MAFs for each sample\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect2_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 chrom_split\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 cohort_summary\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 read_orientation_data\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 mutect_out\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 maf\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u251c\u2500\u2500 .\n    \u2502\u00a0\u00a0 \u2514\u2500\u2500 vcf\n    \u251c\u2500\u2500 vardict_out\n    \u2514\u2500\u2500 varscan_out\n
"},{"location":"pipeline-details/overview/","title":"Pipeline Overview","text":"

Workflow diagram of the XAVIER: the pipeline is composed of a series of data processing steps to trim, align, and recalibrate reads prior to calling variants. These data processing steps closely follow GATK's best pratices for cleaning up raw alignments. The pipeline also consists of a series of comprehensive quality-control steps.

"},{"location":"pipeline-details/settings/","title":"Settings","text":"

This page contains details of the settings used for different tools in the pipeline

"},{"location":"pipeline-details/settings/#somatic-paired-variant-calling","title":"Somatic paired variant calling","text":"

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

-normal

BAM file for paired normal sample

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor AND normal bam)

strelka

calling

--exome

Preset filters for exome data

mutect

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/db/PipeDB/lib/COSMIC_82_hg38.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-M

If set, output only candidate somatic

-S

exclude variants that fail filters

filter

--exclude 'STATUS=\"Germline\" | STATUS=\"LikelyLOH\" | STATUS=\"AFDiff\"'

Removes variants with certain flags from vardict; (1) Germline: detected in germline sample (pass all quality parameters); (2) LikelyLOH: detected in germline but either lost in tumor OR 20-80% in germline, but increased to 1-opt_V (95%); (3) AFDiff: detected in tumor (pass quality parameters) and present in germline but didn\u2019t pass quality parameters.

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

"},{"location":"pipeline-details/settings/#somatic-tumor-only-variant-calling","title":"Somatic tumor-only variant calling","text":"

Tool

Step

Argument

Description

Resource

mutect2

calling

--panel-of-normals

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--germline-resource

GATK Bundle; reheadered to match genome fasta

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/GNOMAD/somatic-hg38-af-only-gnomad.hg38.vcf.gz

filter

--ob-priors

from GATK LearnReadOrientationModel (uses f1r2 file output during calling)

--contamination-table

from GATK CalculateContamination (uses pileup of tumor bam)

mutect

calling

--normal_panel

1000 Genomes with COSMIC and ClinVar samples removed

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--cosmic

COSMIC version 82

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

--dbsnp

dbSNP version 138

/fdb/GATK_resource_bundle/hg38bundle/dbsnp_138.hg38.vcf.gz

-rf BadCigar

removes bad cigar strings, similar to https://gatk.broadinstitute.org/hc/en-us/articles/360037430171-GoodCigarReadFilter

vardict

-f 0.05

Minimum variant allele frequency threshold 0.05

-x 500

Nucleotides to extend

--nosv

Turn off structural variant calling

-t

Remove duplicated reads

-Q 20

Reads with map quality < 20 are removed

-c 1

Column of targets BED file with chromosome

-S 2

Column of targets BED file with start position

-E 3

Column of targets BED file with end position

var2vcf

-d 10

Min total depth

-v 6

Min variant depth

-S

exclude variants that fail filters

varscan

pileup

-d 100000 -q 15 -Q 15

samtools mpileup arguments; max depth of 100,000; min mapping quality of 15; min base quality of 15

calling

--strand-filter 0

Do not ignore variants with >90% support on one strand

--min-var-freq 0.01

Minimum variant allele frequency threshold 0.01

--output-vcf 1

Output in VCF format

--variants 1

Report only variant (SNP/indel) positions

all

GATK SelectVariants

--exclude-filtered

Removes non-PASS variants

--discordance

Remove variants found in supplied file (same as panel-of-normals file)

/data/CCBR_Pipeliner/Pipelines/XAVIER/resources/hg38/PON/hg38.noCOSMIC_ClinVar.pon.vcf.gz

"},{"location":"pipeline-details/tools/","title":"Tools","text":"

This table lists information about the steps performed, tools used, and their details.

Module

Category

Analysis Type

Software

Version

Rule File(s)

Preprocessing

Preprocessing

trim

Trimmomatic

0.39

trim_map_preprocess.smk

map

bwa

0.7.17

trim_map_preprocess.smk

markdup

samblaster

0.1.25

trim_map_preprocess.smk

GATK4 Best Practices

GATK4

4.2.2

SNP/Indel Calling

Mutect2

GATK 4.2.2

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Strelka

2.9.0

somatic_snps.paired.smk

VarScan

2.4.3

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

Mutect

1.1.7

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

VarDict

1.4

somatic_snps.paired.smk, somatic_snps.tumor_only.smk

FFPE Artifact Filter

SOBDetector

1.0.4

ffpe.smk

Consensus SNP/Indels

GATK3 CombineVariants

GATK_3.8-1

somatic_snps.common.smk

Somatic Copy Number Variation (CNV)

CNV

Control-FREEC

11.5

somatic_snps.paired.smk

Sequenza

somatic_snps.paired.smk

Somatic Analysis

Annotate

vcf2maf

somatic_snps.common.smk

Germline

Germline SNV Calling

Germline Variants

HaplotypeCaller

GATK_4.2.2

germline.smk

Germline Analysis

Ancestry

Somalier

qc.smk

Relatedness

Somalier

qc.smk

QC Metrics

QC

depth

qualimap

2.2.1

qc.smk

report

multiqc

1.11

qc.smk

base quality

FastQC

0.11.9

qc.smk

contamination

Fastq Screen

0.14.1

qc.smk

kraken

2.1.2

qc.smk

variant quality

vcftools stat

0.1.16

qc.smk

bcftools_stat

1.9

qc.smk

variant effect

SNPeff

4.3t

qc.smk

General

General

R scripts

R

4.1

general

variant wrangling

bcftools

1.9

general

vcftools

0.1.16

general

alignment wrangling

samtools

1.8

general

Orchestration

Orchestration

Containerization

singularity

3.8.5

Orchestration

Workflow managemanet

snakemake

6.8.2

Orchestration

"},{"location":"usage/cache/","title":"xavier cache","text":""},{"location":"usage/cache/#1-about","title":"1. About","text":"

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier cache sub command in more detail. With minimal configuration, the cache sub command enables you to cache remote resources for the xavier pipeline. Caching remote resources allows the pipeline to run in an offline mode. The cache sub command can also be used to pull our pre-built reference bundles onto a new cluster or target system.

The cache sub command creates local cache on the filesysytem for resources hosted on DockerHub or AWS S3. These resources are normally pulled onto the filesystem when the pipeline runs; however, due to network issues or DockerHub pull rate limits, it may make sense to pull the resources once so a shared cache can be created and re-used. It is worth noting that a singularity cache cannot normally be shared across users. Singularity strictly enforces that its cache is owned by the user. To get around this issue, the cache subcommand can be used to create local SIFs on the filesystem from images on DockerHub.

"},{"location":"usage/cache/#2-synopsis","title":"2. Synopsis","text":"

Coming Soon!

"},{"location":"usage/gui/","title":"Getting started","text":""},{"location":"usage/gui/#1-synopsis","title":"1. Synopsis","text":"

XAVIER pipeline can be executed from either using the graphical user interface (GUI) or the command line interface (CLI). GUI offers a more interactive way for the user to provide input and adjust parameter settings. This part of the documentation describes how to run xavier using the GUI (with screenshots). See Command Line tab to read more about the xavier executable and running XAVIER pipeline using the CLI.

"},{"location":"usage/gui/#2-setting-up-xavier","title":"2. Setting up XAVIER","text":""},{"location":"usage/gui/#21-login-to-cluster","title":"2.1 Login to cluster","text":"
# Setup Step 1.) ssh into cluster's head node\n# example below for Biowulf cluster\nssh -Y $USER@biowulf.nih.gov\n
"},{"location":"usage/gui/#22-grab-an-interactive-node","title":"2.2 Grab an interactive node","text":"
# Setup Step 2.) Please do not run XAVIER on the head node!\n# Grab an interactive node first\nsinteractive --time=12:00:00 --mem=8gb  --cpus-per-task=4\n
"},{"location":"usage/gui/#23-load-ccbrpipeliner-module","title":"2.3 Load ccbrpipeliner module","text":"

NOTE: ccbrpipeliner is a custom module created on biowulf which contains various NGS data analysis pipelines developed, tested, and benchmarked by experts at CCBR.

# Setup Step 3.) Add ccbrpipeliner module\nmodule purge # to reset the module environment\nmodule load ccbrpipeliner\n

If the module was loaded correctly, the greetings message should be displayed.

[+] Loading ccbrpipeliner  5  ...\n###########################################################################\n                                CCBR Pipeliner\n###########################################################################\n    \"ccbrpipeliner\" is a suite of end-to-end pipelines and tools\n    Visit https://github.com/ccbr for more details.\n    Pipelines are available on BIOWULF and FRCE.\n    Tools are available on BIOWULF, HELIX and FRCE.\n\n    The following pipelines/tools will be loaded in this module:\n\n    RENEE v2.5 https://ccbr.github.io/RENEE/\n    XAVIER v3.0 https://ccbr.github.io/XAVIER/\n    CARLISLE v2.4 https://ccbr.github.io/CARLISLE/\n    CHAMPAGNE v0.2 https://ccbr.github.io/CHAMPAGNE/\n    CRUISE v0.1 https://ccbr.github.io/CRUISE/\n\n    spacesavers2 v0.10 https://ccbr.github.io/spacesavers2/\n    permfix v0.6 https://github.com/ccbr/permfix\n###########################################################################\nThank you for using CCBR Pipeliner\n###########################################################################\n

To check the current version of XAVIER, enter:

xavier --version\n
"},{"location":"usage/gui/#3-running-xavier","title":"3. Running XAVIER","text":""},{"location":"usage/gui/#31-launching-xavier-gui","title":"3.1 Launching XAVIER GUI","text":"

To run the XAVIER pipeline from the GUI, simply enter:

xavier_gui\n

and it will launch the XAVIER window.

Note: Please wait until window created! message appears on the terminal.

"},{"location":"usage/gui/#32-folder-paths-and-reference-genomes","title":"3.2 Folder paths and reference genomes","text":"

To enter the location of the input folder containing FASTQ files and the location where the output folders should be created, either simply type the absolute paths

or use the Browse tab to choose the input and output directories

Next, from the drop down menu select the reference genome (hg38/mm10)

and enter a job name of this run.

"},{"location":"usage/gui/#33-analysis-mode","title":"3.3 Analysis mode","text":"

XAVIER pipeline can be run in two different modes:\\ (A) Tumor-normal pair \\ (B) Tumor-only

"},{"location":"usage/gui/#33a-tumor-normal-pair-analysis","title":"3.3a Tumor-normal pair analysis","text":"

In case of tumor-normal pairs, a tab-delimited text file is neeed that contains the list of normal and tumor samples. For example,

Normal  Tumor\nsample1-normal     sample1-tumor\nsample2-normal     sample2-tumor\nsample3-normal     sample3-tumor\nsample4-normal     sample4-tumor\n

Similar to input and output folder paths, either type the path to the pairsInfo.txt file or use the Browse tab.

In case of paired mode, XAVIER can also perform copy number variants (CNV) analysis.

"},{"location":"usage/gui/#33b-tumor-only-analysis","title":"3.3b Tumor-only analysis","text":"

In case the paired normal samples are unavailable, XAVIER pipeline can be run in tumor-only mode which does not require paired samples information. However, in the absence of matching normal samples, CNV analysis is also unavailable.

"},{"location":"usage/gui/#34-submit-xavier-job","title":"3.4 Submit XAVIER job","text":"

After all the information is filled out, press Submit.

If the pipeline detects no errors and the run was submitted, a new window appears that has the output of a \"dry-run\" which summarizes each step of the pipeline.

Click OK

A dialogue box will popup to confirm submitting the job to slurm.

Click Yes

The dry-run output will be displayed again and the master job will be submitted. An email notification will be sent out when the pipeline starts and ends.

The XAVIER gui will ask to submit another job.

Click Yes to start again or No to close the XAVIER gui.

"},{"location":"usage/gui/#35-additional-settings","title":"3.5 Additional settings","text":"

Users can input certain additional settings for the pipeline run including running an additional step to correct strand orientation bias in Formalin-Fixed Paraffin-Embedded (FFPE) samples and to provide a custom exome targets BED file. This file can be obtained from the manufacturer of the target capture kit that was used.

"},{"location":"usage/gui/#4-special-instructions-for-biowulf","title":"4. Special instructions for Biowulf","text":"

XAVIER GUI natively uses the X11 Window System to run XAVIER pipeline and display the graphics on a personal desktop or laptop. However, if running XAVIER specifically on NIH's Biowulf cluster, the HPC staff recommends NoMachine (NX) to run graphics applications.

Please see details here on how to install and connect to Biowulf on your local computer using NoMachine.

Once connected to Biowulf using NX, right click to open a terminal connection

and start an interactive session.

Similar to the instructions above, load ccbrpipeliner module and enter xavier_gui to launch the XAVIER gui.

"},{"location":"usage/run/","title":"xavier run","text":""},{"location":"usage/run/#1-about","title":"1. About","text":"

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier run sub command in more detail. With minimal configuration, the run sub command enables you to start running xavier pipeline.

Setting up the xavier pipeline is fast and easy! In its most basic form, xavier run only has four required inputs.

"},{"location":"usage/run/#2-synopsis","title":"2. Synopsis","text":"
$ xavier run [--help] \\\n                   [--mode {local, slurm}] \\\n                   [--job-name JOB_NAME] \\\n                   [--callers {mutect2,mutect,strelka, ...}] \\\n                   [--pairs PAIRS] \\\n                   [--ffpe] \\\n                   [--cnv] \\\n                   [--silent] \\\n                   [--singularity-cache SINGULARITY_CACHE] \\\n                   [--sif-cache SIF_CACHE] \\\n                   [--threads THREADS] \\\n                   --runmode {init, dryrun, run} \\\n                   --input INPUT [INPUT ...] \\\n                   --output OUTPUT \\\n                   --genome {hg38, ...} \\\n                   --targets TARGETS\n

The synopsis for each command shows its parameters and their usage. Optional parameters are shown in square brackets.

A user must provide a list of FastQ or BAM files (globbing is supported) to analyze via --input argument, an output directory to store results via --output argument, an exome targets BED file for the samples' capture kit, and select reference genome for alignment and annotation via the --genome argument.

Use you can always use the -h option for information on a specific command.

"},{"location":"usage/run/#21-required-arguments","title":"2.1 Required Arguments","text":"

Each of the following arguments are required. Failure to provide a required argument will result in a non-zero exit-code.

--input INPUT [INPUT ...]

Input FastQ or BAM file(s) to process. type: file(s)

One or more FastQ files can be provided. The pipeline does NOT support single-end WES data. Please provide either a set of FastQ files or a set of BAM files. The pipeline does NOT support processing a mixture of FastQ files and BAM files. From the command-line, each input file should separated by a space. Globbing is supported! This makes selecting FastQ files easy. Input FastQ files should be gzipp-ed.

Example: --input .tests/*.R?.fastq.gz

--output OUTPUT

Path to an output directory. type: path

This location is where the pipeline will create all of its output files, also known as the pipeline's working directory. If the provided output directory does not exist, it will be initialized automatically.

Example: --output /data/$USER/WES_hg38

--runmode {init,dryrun,run} `

Execution Process. type: string

User should initialize the pipeline folder by first running --runmode init User should then perform a dry-run to list all steps the pipeline will take--runmode dryrun User should then perform the full run --runmode run

Example: --runmode init THEN --runmode dryrun THEN --runmode run

--genome {hg38, custom.json}

Reference genome. type: string/file

This option defines the reference genome for your set of samples. On Biowulf, xavier does comes bundled with pre built reference files for human samples; however, it is worth noting that the pipeline does accept a pre-built resource bundle pulled with the cache sub command (coming soon). Currently, the pipeline only supports the human reference hg38; however, support for mouse reference mm10 will be added soon.

Pre built Option Here is a list of available pre built genomes on Biowulf: hg38.

Custom Option For users running the pipeline outside of Biowulf, a pre-built resource bundle can be pulled with the cache sub command (coming soon). Please supply the custom reference JSON file that was generated by the cache sub command.

Example: --genome hg38 OR --genome /data/${USER}/hg38/hg38.json

--targets TARGETS

Exome targets BED file. type: file

This file can be obtained from the manufacturer of the target capture kit that was used.

Example: --targets /data/$USER/Agilent_SSv7_allExons_hg38.bed

"},{"location":"usage/run/#22-options","title":"2.2 Options","text":"

Each of the following arguments are optional and do not need to be provided.

-h, --help

Display Help. type: boolean flag

Shows command's synopsis, help message, and an example command

Example: --help

--silent

Silence standard output. type: boolean flag

Reduces the amount of information directed to standard output when submitting master job to the job scheduler. Only the job id of the master job is returned.

Example: --silent

--mode {local,slurm}

Execution Method. type: string default: slurm

Execution Method. Defines the mode or method of execution. Valid mode options include: local or slurm.

local Local executions will run serially on compute instance. This is useful for testing, debugging, or when a users does not have access to a high performance computing environment. If this option is not provided, it will default to a local execution mode.

slurm The slurm execution method will submit jobs to a cluster using a singularity backend. It is recommended running xavier in this mode as execution will be significantly faster in a distributed environment.

Example: --mode slurm

--job-name JOB_NAME

Set the name of the pipeline's master job. type: string > default: pl:xavier

When submitting the pipeline to a job scheduler, like SLURM, this option always you to set the name of the pipeline's master job. By default, the name of the pipeline's master job is set to \"pl:xavier\".

Example: --job-name xavier_run1

--callers CALLERS [CALLERS ...]

Variant Callers. type: string(s) > default: mutect2, mutect, strelka, vardict, varscan

List of variant callers to detect mutations. Please select from one or more of the following options: [mutect2, mutect, strelka, vardict, varscan]. Defaults to using all variant callers.

Example: --callers mutect2 strelka varscan

--pairs PAIRS

Tumor normal pairs file. type: file

This tab delimited file contains two columns with the names of tumor and normal pairs, one per line. The header of the file needs to be Tumor for the tumor column and Normal for the normal column. The base name of each sample should be listed in the pairs file. The base name of a given sample can be determined by removing the following extension from the sample's R1 FastQ file: .R1.fastq.gz. Contents of example pairs file:

Normal    Tumor\nSample4_CRL1622_S31   Sample10_ARK1_S37\nSample4_CRL1622_S31   Sample11_ACI_158_S38\n

Example: --pairs /data/$USER/pairs.tsv

--ffpe

Apply FFPE correction. type: boolean flag

Runs an additional steps to correct strand orientation bias in Formalin-Fixed Paraffin-Embedded (FFPE) samples. Do NOT use this option with non-FFPE samples.

Example: --ffpe

--cnv

Call copy number variations (CNVs). type: boolean flag

CNVs will only be called from tumor-normal pairs. If this option is provided without providing a --pairs file, CNVs will NOT be called.

Example: --cnv

--singularity-cache SINGULARITY_CACHE

Overrides the $SINGULARITY_CACHEDIR environment variable. type: path default: --output OUTPUT/.singularity

Singularity will cache image layers pulled from remote registries. This ultimately speeds up the process of pull an image from DockerHub if an image layer already exists in the singularity cache directory. By default, the cache is set to the value provided to the --output argument. Please note that this cache cannot be shared across users. Singularity strictly enforces you own the cache directory and will return a non-zero exit code if you do not own the cache directory! See the --sif-cache option to create a shareable resource.

Example: --singularity-cache /data/$USER/.singularity

--sif-cache SIF_CACHE

Path where a local cache of SIFs are stored. type: path

Uses a local cache of SIFs on the filesystem. This SIF cache can be shared across users if permissions are set correctly. If a SIF does not exist in the SIF cache, the image will be pulled from Dockerhub and a warning message will be displayed. The xavier cache subcommand can be used to create a local SIF cache. Please see xavier cache for more information. This command is extremely useful for avoiding DockerHub pull rate limits. It also remove any potential errors that could occur due to network issues or DockerHub being temporarily unavailable. We recommend running xavier with this option when ever possible.

Example: --singularity-cache /data/$USER/SIFs

--threads THREADS

Max number of threads for each process. type: int default: 2

Max number of threads for each process. This option is more applicable when running the pipeline with --mode local. It is recommended setting this value to the maximum number of CPUs available on the host machine.

Example: --threads 12

"},{"location":"usage/run/#3-example","title":"3. Example","text":"
# Step 1.) Grab an interactive node\n# Do not run on head node!\nsinteractive --mem=8g --cpus-per-task=4\nmodule purge\nmodule load ccbrpipeliner\n\n# Step 2A.) Initialize the all resources to the output folder\nxavier run --input .tests/*.R?.fastq.gz \\\n                 --output /data/$USER/xavier_hg38 \\\n                 --genome hg38 \\\n                 --targets Agilent_SSv7_allExons_hg38.bed \\\n                 --mode slurm \\\n                 --runmode init\n\n# Step 2B.) Dry-run the pipeline\nxavier run --input .tests/*.R?.fastq.gz \\\n                 --output /data/$USER/xavier_hg38 \\\n                 --genome hg38 \\\n                 --targets Agilent_SSv7_allExons_hg38.bed \\\n                 --mode slurm \\\n                 --runmode dryrun\n\n# Step 2C.) Run the XAVIER pipeline\n# The slurm mode will submit jobs to the cluster.\n# It is recommended running xavier in this mode.\nxavier run --input .tests/*.R?.fastq.gz \\\n                 --output /data/$USER/xavier_hg38 \\\n                 --genome hg38 \\\n                 --targets Agilent_SSv7_allExons_hg38.bed \\\n                 --mode slurm \\\n                 --runmode run\n
"},{"location":"usage/unlock/","title":"xavier unlock","text":""},{"location":"usage/unlock/#1-about","title":"1. About","text":"

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier unlock sub command in more detail. With minimal configuration, the unlock sub command enables you to unlock a pipeline output directory.

If the pipeline fails ungracefully, it maybe required to unlock the working directory before proceeding again. Snakemake will inform a user when it maybe necessary to unlock a working directory with an error message stating: Error: Directory cannot be locked.

Please verify that the pipeline is not running before running this command. If the pipeline is currently running, the workflow manager will report the working directory is locked. The is the default behavior of snakemake, and it is normal. Do NOT run this command if the pipeline is still running! Please kill the master job and it's child jobs prior to running this command.

Unlocking xavier pipeline output directory is fast and easy! In its most basic form, xavier unlock only has one required input.

"},{"location":"usage/unlock/#2-synopsis","title":"2. Synopsis","text":"
$ xavier unlock [-h] --output OUTPUT\n

The synopsis for this command shows its parameters and their usage. Optional parameters are shown in square brackets.

A user must provide an output directory to unlock via --output argument. After running the unlock sub command, you can resume the build or run pipeline from where it left off by re-running it.

Use you can always use the -h option for information on a specific command.

"},{"location":"usage/unlock/#21-required-arguments","title":"2.1 Required Arguments","text":"

--output OUTPUT

Output directory to unlock. type: path

Path to a previous run's output directory. This will remove a lock on the working directory. Please verify that the pipeline is not running before running this command. Example: --output /data/$USER/WES_hg38

"},{"location":"usage/unlock/#22-options","title":"2.2 Options","text":"

Each of the following arguments are optional and do not need to be provided.

-h, --help

Display Help. type: boolean

Shows command's synopsis, help message, and an example command

Example: --help

"},{"location":"usage/unlock/#3-example","title":"3. Example","text":"
# Step 0.) Grab an interactive node (do not run on head node)\nsinteractive --mem=8g -N 1 -n 4\nmodule purge\nmodule load ccbrpipeliner\n\n# Step 1.) Unlock a pipeline output directory\nxavier unlock --output /data/$USER/xavier_hg38\n
"}]} \ No newline at end of file diff --git a/dev/sitemap.xml.gz b/dev/sitemap.xml.gz index 5e1784801ceb0e03dc7602820e975642cda18479..f8782822fec34543fd682bf2cc229662d46e9449 100644 GIT binary patch delta 13 Ucmb=gXP58h;P|#+^F;Ov03kaC82|tP delta 13 Ucmb=gXP58h;OJJ~IFY>q031LBF8}}l diff --git a/dev/usage/cache/index.html b/dev/usage/cache/index.html index 0197e51..5b0bfa4 100644 --- a/dev/usage/cache/index.html +++ b/dev/usage/cache/index.html @@ -1,4 +1,4 @@ - xavier cache - XAVIER Documentation

xavier cache

1. About

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier cache sub command in more detail. With minimal configuration, the cache sub command enables you to cache remote resources for the xavier pipeline. Caching remote resources allows the pipeline to run in an offline mode. The cache sub command can also be used to pull our pre-built reference bundles onto a new cluster or target system.

The cache sub command creates local cache on the filesysytem for resources hosted on DockerHub or AWS S3. These resources are normally pulled onto the filesystem when the pipeline runs; however, due to network issues or DockerHub pull rate limits, it may make sense to pull the resources once so a shared cache can be created and re-used. It is worth noting that a singularity cache cannot normally be shared across users. Singularity strictly enforces that its cache is owned by the user. To get around this issue, the cache subcommand can be used to create local SIFs on the filesystem from images on DockerHub.

2. Synopsis

Coming Soon!

CCBR/XAVIER

xavier cache

1. About

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier cache sub command in more detail. With minimal configuration, the cache sub command enables you to cache remote resources for the xavier pipeline. Caching remote resources allows the pipeline to run in an offline mode. The cache sub command can also be used to pull our pre-built reference bundles onto a new cluster or target system.

The cache sub command creates local cache on the filesysytem for resources hosted on DockerHub or AWS S3. These resources are normally pulled onto the filesystem when the pipeline runs; however, due to network issues or DockerHub pull rate limits, it may make sense to pull the resources once so a shared cache can be created and re-used. It is worth noting that a singularity cache cannot normally be shared across users. Singularity strictly enforces that its cache is owned by the user. To get around this issue, the cache subcommand can be used to create local SIFs on the filesystem from images on DockerHub.

2. Synopsis

Coming Soon!


Last update: 2024-01-30
\ No newline at end of file +-->
\ No newline at end of file diff --git a/dev/usage/gui/index.html b/dev/usage/gui/index.html index a4063e8..bc75e46 100644 --- a/dev/usage/gui/index.html +++ b/dev/usage/gui/index.html @@ -1,4 +1,4 @@ - Graphical Interface - XAVIER Documentation

Getting started

1. Synopsis

XAVIER pipeline can be executed from either using the graphical user interface (GUI) or the command line interface (CLI). GUI offers a more interactive way for the user to provide input and adjust parameter settings. This part of the documentation describes how to run xavier using the GUI (with screenshots). See Command Line tab to read more about the xavier executable and running XAVIER pipeline using the CLI.

2. Setting up XAVIER

2.1 Login to cluster

# Setup Step 1.) ssh into cluster's head node
+ Graphical Interface - XAVIER Documentation      

Getting started

1. Synopsis

XAVIER pipeline can be executed from either using the graphical user interface (GUI) or the command line interface (CLI). GUI offers a more interactive way for the user to provide input and adjust parameter settings. This part of the documentation describes how to run xavier using the GUI (with screenshots). See Command Line tab to read more about the xavier executable and running XAVIER pipeline using the CLI.

2. Setting up XAVIER

2.1 Login to cluster

# Setup Step 1.) ssh into cluster's head node
 # example below for Biowulf cluster
 ssh -Y $USER@biowulf.nih.gov
 

2.2 Grab an interactive node

# Setup Step 2.) Please do not run XAVIER on the head node!
@@ -36,4 +36,4 @@
 sample2-normal     sample2-tumor
 sample3-normal     sample3-tumor
 sample4-normal     sample4-tumor
-

Similar to input and output folder paths, either type the path to the pairsInfo.txt file or use the Browse tab.

In case of paired mode, XAVIER can also perform copy number variants (CNV) analysis.

xavier_pair

3.3b Tumor-only analysis

In case the paired normal samples are unavailable, XAVIER pipeline can be run in tumor-only mode which does not require paired samples information. However, in the absence of matching normal samples, CNV analysis is also unavailable.

xavier_tumorOnly

3.4 Submit XAVIER job

After all the information is filled out, press Submit.

If the pipeline detects no errors and the run was submitted, a new window appears that has the output of a "dry-run" which summarizes each step of the pipeline.

xavier_dryrun

Click OK

A dialogue box will popup to confirm submitting the job to slurm.

xavier_submit

Click Yes

The dry-run output will be displayed again and the master job will be submitted. An email notification will be sent out when the pipeline starts and ends.

The XAVIER gui will ask to submit another job.

xavier_resubmit

Click Yes to start again or No to close the XAVIER gui.

3.5 Additional settings

Users can input certain additional settings for the pipeline run including running an additional step to correct strand orientation bias in Formalin-Fixed Paraffin-Embedded (FFPE) samples and to provide a custom exome targets BED file. This file can be obtained from the manufacturer of the target capture kit that was used.

gui_additionalSettings1

gui_additionalSettings2

4. Special instructions for Biowulf

XAVIER GUI natively uses the X11 Window System to run XAVIER pipeline and display the graphics on a personal desktop or laptop. However, if running XAVIER specifically on NIH's Biowulf cluster, the HPC staff recommends NoMachine (NX) to run graphics applications.

Please see details here on how to install and connect to Biowulf on your local computer using NoMachine.

Once connected to Biowulf using NX, right click to open a terminal connection

gui_nx_config1

and start an interactive session.

gui_nx_config2

Similar to the instructions above, load ccbrpipeliner module and enter xavier_gui to launch the XAVIER gui.

gui_nx_xavier


Last update: 2024-01-30
\ No newline at end of file +

Similar to input and output folder paths, either type the path to the pairsInfo.txt file or use the Browse tab.

In case of paired mode, XAVIER can also perform copy number variants (CNV) analysis.

xavier_pair

3.3b Tumor-only analysis

In case the paired normal samples are unavailable, XAVIER pipeline can be run in tumor-only mode which does not require paired samples information. However, in the absence of matching normal samples, CNV analysis is also unavailable.

xavier_tumorOnly

3.4 Submit XAVIER job

After all the information is filled out, press Submit.

If the pipeline detects no errors and the run was submitted, a new window appears that has the output of a "dry-run" which summarizes each step of the pipeline.

xavier_dryrun

Click OK

A dialogue box will popup to confirm submitting the job to slurm.

xavier_submit

Click Yes

The dry-run output will be displayed again and the master job will be submitted. An email notification will be sent out when the pipeline starts and ends.

The XAVIER gui will ask to submit another job.

xavier_resubmit

Click Yes to start again or No to close the XAVIER gui.

3.5 Additional settings

Users can input certain additional settings for the pipeline run including running an additional step to correct strand orientation bias in Formalin-Fixed Paraffin-Embedded (FFPE) samples and to provide a custom exome targets BED file. This file can be obtained from the manufacturer of the target capture kit that was used.

gui_additionalSettings1

gui_additionalSettings2

4. Special instructions for Biowulf

XAVIER GUI natively uses the X11 Window System to run XAVIER pipeline and display the graphics on a personal desktop or laptop. However, if running XAVIER specifically on NIH's Biowulf cluster, the HPC staff recommends NoMachine (NX) to run graphics applications.

Please see details here on how to install and connect to Biowulf on your local computer using NoMachine.

Once connected to Biowulf using NX, right click to open a terminal connection

gui_nx_config1

and start an interactive session.

gui_nx_config2

Similar to the instructions above, load ccbrpipeliner module and enter xavier_gui to launch the XAVIER gui.

gui_nx_xavier

\ No newline at end of file diff --git a/dev/usage/run/index.html b/dev/usage/run/index.html index 8f579da..fdde60a 100644 --- a/dev/usage/run/index.html +++ b/dev/usage/run/index.html @@ -1,4 +1,4 @@ - xavier run - XAVIER Documentation

xavier run

1. About

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier run sub command in more detail. With minimal configuration, the run sub command enables you to start running xavier pipeline.

Setting up the xavier pipeline is fast and easy! In its most basic form, xavier run only has four required inputs.

2. Synopsis

$ xavier run [--help] \
+ xavier run - XAVIER Documentation      

xavier run

1. About

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier run sub command in more detail. With minimal configuration, the run sub command enables you to start running xavier pipeline.

Setting up the xavier pipeline is fast and easy! In its most basic form, xavier run only has four required inputs.

2. Synopsis

$ xavier run [--help] \
                    [--mode {local, slurm}] \
                    [--job-name JOB_NAME] \
                    [--callers {mutect2,mutect,strelka, ...}] \
@@ -48,4 +48,4 @@
                  --targets Agilent_SSv7_allExons_hg38.bed \
                  --mode slurm \
                  --runmode run
-

Last update: 2024-01-30
\ No newline at end of file +
\ No newline at end of file diff --git a/dev/usage/unlock/index.html b/dev/usage/unlock/index.html index a577352..3d6e0c0 100644 --- a/dev/usage/unlock/index.html +++ b/dev/usage/unlock/index.html @@ -1,4 +1,4 @@ - xavier unlock - XAVIER Documentation

xavier unlock

1. About

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier unlock sub command in more detail. With minimal configuration, the unlock sub command enables you to unlock a pipeline output directory.

If the pipeline fails ungracefully, it maybe required to unlock the working directory before proceeding again. Snakemake will inform a user when it maybe necessary to unlock a working directory with an error message stating: Error: Directory cannot be locked.

Please verify that the pipeline is not running before running this command. If the pipeline is currently running, the workflow manager will report the working directory is locked. The is the default behavior of snakemake, and it is normal. Do NOT run this command if the pipeline is still running! Please kill the master job and it's child jobs prior to running this command.

Unlocking xavier pipeline output directory is fast and easy! In its most basic form, xavier unlock only has one required input.

2. Synopsis

$ xavier unlock [-h] --output OUTPUT
+ xavier unlock - XAVIER Documentation      

xavier unlock

1. About

The xavier executable is composed of several inter-related sub commands. Please see xavier -h for all available options.

This part of the documentation describes options and concepts for xavier unlock sub command in more detail. With minimal configuration, the unlock sub command enables you to unlock a pipeline output directory.

If the pipeline fails ungracefully, it maybe required to unlock the working directory before proceeding again. Snakemake will inform a user when it maybe necessary to unlock a working directory with an error message stating: Error: Directory cannot be locked.

Please verify that the pipeline is not running before running this command. If the pipeline is currently running, the workflow manager will report the working directory is locked. The is the default behavior of snakemake, and it is normal. Do NOT run this command if the pipeline is still running! Please kill the master job and it's child jobs prior to running this command.

Unlocking xavier pipeline output directory is fast and easy! In its most basic form, xavier unlock only has one required input.

2. Synopsis

$ xavier unlock [-h] --output OUTPUT
 

The synopsis for this command shows its parameters and their usage. Optional parameters are shown in square brackets.

A user must provide an output directory to unlock via --output argument. After running the unlock sub command, you can resume the build or run pipeline from where it left off by re-running it.

Use you can always use the -h option for information on a specific command.

2.1 Required Arguments

--output OUTPUT

Output directory to unlock.
type: path

Path to a previous run's output directory. This will remove a lock on the working directory. Please verify that the pipeline is not running before running this command.
Example: --output /data/$USER/WES_hg38

2.2 Options

Each of the following arguments are optional and do not need to be provided.

-h, --help

Display Help.
type: boolean

Shows command's synopsis, help message, and an example command

Example: --help

3. Example

# Step 0.) Grab an interactive node (do not run on head node)
 sinteractive --mem=8g -N 1 -n 4
 module purge
@@ -6,4 +6,4 @@
 
 # Step 1.) Unlock a pipeline output directory
 xavier unlock --output /data/$USER/xavier_hg38
-

Last update: 2024-01-30
\ No newline at end of file +
\ No newline at end of file