Skip to content

Commit

Permalink
#35 initial rdock enhancements
Browse files Browse the repository at this point in the history
  • Loading branch information
tdudgeon committed Nov 27, 2019
1 parent 41d5487 commit bde7a0f
Show file tree
Hide file tree
Showing 7 changed files with 3,366 additions and 0 deletions.
43 changes: 43 additions & 0 deletions data/nudt7/ligand.mol
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@

OpenBabel06051719483D

18 19 0 0 0 0 2 V2000
29.0700 -43.2240 73.7660 C 0 0 0 0 0
36.2650 -44.8070 74.9140 C 0 0 0 0 0
37.1260 -44.6280 73.8270 C 0 0 0 0 0
38.5050 -44.6030 73.9960 C 0 0 0 0 0
39.0530 -44.7650 75.2580 C 0 0 0 0 0
38.2200 -44.9420 76.3450 C 0 0 0 0 0
36.8400 -44.9680 76.1790 C 0 0 0 0 0
30.0630 -44.1750 73.1160 C 0 0 0 0 0
29.4310 -45.1480 72.1250 C 0 0 0 0 0
32.0050 -44.9160 74.3360 C 0 0 0 0 0
32.8370 -45.6680 73.5100 C 0 0 0 0 0
34.2080 -45.6230 73.7110 C 0 0 0 0 0
34.7860 -44.8390 74.7200 C 0 0 0 0 0
33.9230 -44.0930 75.5330 C 0 0 0 0 0
32.5490 -44.1280 75.3490 C 0 0 0 0 0
30.2050 -45.7640 71.3670 O 0 0 0 0 0
28.1910 -45.2760 72.1490 O 0 0 0 0 0
30.6380 -44.9430 74.1670 O 0 0 0 0 0
1 8 1 0 0 0
2 3 2 0 0 0
2 7 1 0 0 0
2 13 1 0 0 0
3 4 1 0 0 0
4 5 2 0 0 0
5 6 1 0 0 0
6 7 2 0 0 0
8 9 1 0 0 0
8 18 1 0 0 0
9 16 2 0 0 0
9 17 1 0 0 0
10 11 2 0 0 0
10 15 1 0 0 0
10 18 1 0 0 0
11 12 1 0 0 0
12 13 2 0 0 0
13 14 1 0 0 0
14 15 2 0 0 0
M CHG 1 17 -1
M END
Binary file added data/nudt7/ligands.data.gz
Binary file not shown.
3,137 changes: 3,137 additions & 0 deletions data/nudt7/receptor.mol2

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/nextflow/docking/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
results
1 change: 1 addition & 0 deletions src/nextflow/docking/rdock-filter.nsd.config
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
// Intentionally Empty
180 changes: 180 additions & 0 deletions src/nextflow/docking/rdock-filter.nsd.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
#!/usr/bin/env nextflow

/* Squonk Nextflow pipline that runs Docking using rDock, filtering the poses relative to the score from docking the
* reference ligand.
*
* To test this manually run something like this:
* nextflow -c src/nextflow/nextflow-docker.config run src/nextflow/docking/rdock-filter.nsd.nf --ligands data/nudt7/ligands.data.gz --ligand data/nudt7/ligand.mol --receptor data/nudt7/receptor.mol2 --num_dockings 5 -with-docker informaticsmatters/rdkit_pipelines
*/

params.ligand = "$baseDir/ligand.mol"
params.ligands = "$baseDir/ligands.data.gz"
params.receptor = "$baseDir/receptor.mol2"
params.chunk = 25
params.num_dockings = 25
params.top = 1
params.score = null
params.nscore = null
params.limit = 0
params.digits = 4
params.threshold = 0.0
params.field = 'SCORE.norm'

ligand = file(params.ligand)
ligands = file(params.ligands)
receptor = file(params.receptor)

process create_cavity {

container 'informaticsmatters/rdock-mini:latest'
beforeScript 'chmod g+w .'

input:
file ligand
file receptor

output:
file 'receptor.prm' into prmfile
file 'receptor.as' into asfile

"""
cat << EOF > receptor.prm
RBT_PARAMETER_FILE_V1.00
RECEPTOR_FILE $receptor
RECEPTOR_FLEX 3.0
SECTION MAPPER
SITE_MAPPER RbtLigandSiteMapper
REF_MOL $ligand
RADIUS 3.0
SMALL_SPHERE 1.0
MIN_VOLUME 100
MAX_CAVITIES 1
VOL_INCR 0.0
GRIDSTEP 0.5
END_SECTION
SECTION CAVITY
SCORING_FUNCTION RbtCavityGridSF
WEIGHT 1.0
END_SECTION
EOF
rbcavity -was -d -r receptor.prm > rbcavity.log
"""
}

/* Docks the reference ligand
*/
process dock_reference_ligand {

container 'informaticsmatters/rdock-mini:latest'
beforeScript 'chmod g+w .'

publishDir "$baseDir/results", mode: 'copy'

input:
file receptor
file 'receptor.as' from asfile
file 'receptor.prm' from prmfile
file ligand

output:
file 'best_ligand.sdf' into best_ligand

"""
rbdock -i ligand.mol -r receptor.prm -p dock.prm -n $params.num_dockings -o docked_ligand > docked_ligand_out.log
sdsort -n -s -fSCORE docked_ligand.sd | sdfilter -f'\$_COUNT <= 1' > best_ligand.sdf
"""
}

/* Splits the input into multiple files of ${params.chunk} records.
*/
process splitter {

//beforeScript 'chmod g+w .'
container 'informaticsmatters/rdkit_pipelines:latest'

input:
file ligands

output:
file 'ligands_part*.sdf' into ligands_parts mode flatten
file 'ligands_part_metrics.txt' into splitter_metrics

"""
python -m pipelines_utils_rdkit.filter -i $ligands -c $params.chunk -l $params.limit -d $params.digits -o ligands_part -of sdf --no-gzip --meta
"""
}

/* Docks each file from the ligands_parts channel sending each resulting SD file to the results channel
*/
process dock_ligands {

container 'informaticsmatters/rdock-mini:latest'
// change permissions on the work dir so that the rdock user in the container
// can write to the directory that is owned by root
beforeScript 'chmod g+w .'

input:
file part from ligands_parts
file receptor
file 'receptor.as' from asfile
file 'receptor.prm' from prmfile

output:
file 'docked_part*.sd' into docked_parts

"""
rbdock -i $part -r receptor.prm -p dock.prm -n $params.num_dockings -o ${part.name.replace('ligands', 'docked')[0..-5]} > docked_out.log
"""
}

/* Filter, combine and publish the results.
* Poses are only included if they are within ${params.threshold} of the best score obtained from docking the
* target ligand into the same receptor (output of the dock_ligand process).
*/
process combine_and_filter {

container 'informaticsmatters/rdock-mini:latest'
beforeScript 'chmod g+w .'

input:
file parts from docked_parts.collect()
file best from best_ligand

output:
file 'rdock_results.sdf' into results

"""
FSCORE=\$(sdreport -nh -t${params.field} best_ligand.sdf | cut -f 2 | awk '{\$1=\$1};1')
ASCORE=\$(awk "BEGIN {print \$FSCORE + ${params.threshold}}")
echo "Processing $parts with normalised score filter of \$ASCORE"
sdsort -n -s -f${params.field} docked_part*.sd | sdfilter -f"\\\$${params.field} < \$ASCORE" | sdfilter -f'\$_COUNT <= ${params.top}' > rdock_results.sdf
"""
}

process results {

beforeScript 'chmod g+w .'
container 'informaticsmatters/rdkit_pipelines:latest'
beforeScript 'chmod g+w .'

publishDir "$baseDir/results", mode: 'copy'

input:
file 'results.sdf' from results
file 'splitter_metrics.txt' from splitter_metrics

output:
file 'output.data.gz'
file 'output.metadata'
file 'output_metrics.txt'

"""
python -m pipelines_utils_rdkit.filter -i results.sdf -of json -o output --meta
mv output_metrics.txt old_metrics.txt
echo -n 'DockingRDock=' >> output_metrics.txt
echo \$((`grep '__InputCount__' splitter_metrics.txt | cut -d '=' -f 2` * ${params.num_dockings})) >> output_metrics.txt
grep '__InputCount__' splitter_metrics.txt >> output_metrics.txt
grep '__OutputCount__' old_metrics.txt >> output_metrics.txt
"""
}
4 changes: 4 additions & 0 deletions src/nextflow/nextflow-docker.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
docker.enabled = true
docker.mountFlags = 'z'
docker.runOptions = '-u $(id -u):$(id -g)'
process.container = 'busybox'

0 comments on commit bde7a0f

Please sign in to comment.