From 15725e4eeb5bb4012a634577936f5fc3099f73d2 Mon Sep 17 00:00:00 2001
From: Dror Kessler <dror27.kessler@gmail.com>
Date: Mon, 12 Feb 2024 12:12:02 +0200
Subject: [PATCH] code cleanup (of unused methods, etc)

---
 src/main/java/picard/flow/FlowBasedRead.java  |  31 +-
 .../java/picard/flow/FlowBasedReadUtils.java  | 287 +-----------------
 2 files changed, 21 insertions(+), 297 deletions(-)

diff --git a/src/main/java/picard/flow/FlowBasedRead.java b/src/main/java/picard/flow/FlowBasedRead.java
index 25196c4bcb..a3d0db38d8 100644
--- a/src/main/java/picard/flow/FlowBasedRead.java
+++ b/src/main/java/picard/flow/FlowBasedRead.java
@@ -12,8 +12,8 @@ public class FlowBasedRead {
     // constants
     private final double MINIMAL_CALL_PROB = 0.1;
 
-    final public static String FLOW_MATRIX_TAG_NAME = "tp";
-    final public static String FLOW_MATRIX_T0_TAG_NAME = "t0";
+    final private static String FLOW_MATRIX_TAG_NAME = "tp";
+    final private static String FLOW_MATRIX_T0_TAG_NAME = "t0";
     final public static String MAX_CLASS_READ_GROUP_TAG = "mc";
 
     /**
@@ -29,16 +29,14 @@ public class FlowBasedRead {
     private int[] key;
 
     /**
-     * The maximal length of an hmer that can be encoded (normally in the 10-12 range)
+     * The maximal length of an hmer that can be encoded (normally in the 10-20 range)
      */
     private int maxHmer;
-    private double totalMinErrorProbability;
-    private double perHmerMinErrorProbability;
+
     /**
-     * The order in which flow key in encoded (See decription for key field). Flow order may be wrapped if a longer one
-     * needed.
+     * computed minimal error probability for an hmer
      */
-    private byte[] flowOrder;
+    private double perHmerMinErrorProbability;
 
     /**
      * The probability matrix for this read. [n][m] position represents that probablity that an hmer of n length will be
@@ -119,15 +117,14 @@ private void spreadFlowLengthProbsAcrossCountsAtFlow(final int flowToSpread) {
     private void readFlowMatrix(final String _flowOrder) {
 
         // generate key (base to flow space)
-        if (fbargs.fillingValue == 0) {
-            totalMinErrorProbability = estimateFillingValue();
-        } else {
-            totalMinErrorProbability = fbargs.fillingValue;
-        }
-        perHmerMinErrorProbability = totalMinErrorProbability/getMaxHmer();
 
+        // establish min probability
+        final double totalMinErrorProbability = (fbargs.fillingValue == 0) ? estimateFillingValue() : fbargs.fillingValue;
+        perHmerMinErrorProbability = totalMinErrorProbability / getMaxHmer();
+
+        // generate flow key
         key = FlowBasedReadUtils.baseArrayToKey(samRecord.getReadBases(), _flowOrder);
-        flowOrder = FlowBasedReadUtils.getFlowToBase(_flowOrder, key.length);
+
         // initialize matrix
         flowMatrix = new double[maxHmer + 1][key.length];
         for (int i = 0; i < maxHmer + 1; i++) {
@@ -170,7 +167,7 @@ private void readFlowMatrix(final String _flowOrder) {
             }
 
             if ((run == 0) && (specialTreatmentForZeroCalls)) {
-                parseZeroQuals(t0probs, i, qualOfs);
+                parseZeroQuals(t0probs, i, qualOfs, totalMinErrorProbability);
             }
 
             double totalErrorProb = 0;
@@ -208,7 +205,7 @@ private void parseSingleHmer(final double[] probs, final byte[] tp, final int fl
     // place it on the neighboring bases and choose the **lower** error probability between the
     // neighbors (that's how T0 encoding works). The error is placed only on the 1-mer error assuming
     // that 2->0 errors are negligibly rare.
-    private void parseZeroQuals(final double[] probs, final int flowIdx, final int qualOfs) {
+    private void parseZeroQuals(final double[] probs, final int flowIdx, final int qualOfs, final double totalMinErrorProbability) {
         if ((qualOfs == 0) | (qualOfs == probs.length)) { // do not report zero error probability on the edge of the read
             return;
         }
diff --git a/src/main/java/picard/flow/FlowBasedReadUtils.java b/src/main/java/picard/flow/FlowBasedReadUtils.java
index 8dcdf6aabc..fa7f3a92f8 100644
--- a/src/main/java/picard/flow/FlowBasedReadUtils.java
+++ b/src/main/java/picard/flow/FlowBasedReadUtils.java
@@ -1,8 +1,6 @@
 package picard.flow;
 
-import com.google.common.annotations.VisibleForTesting;
 import htsjdk.samtools.*;
-import htsjdk.samtools.util.SequenceUtil;
 import picard.PicardException;
 
 import java.util.ArrayList;
@@ -63,163 +61,33 @@ static public int[] baseArrayToKey(final byte[] bases, final String flowOrder){
         return ret;
     }
 
-    /**
-     * For every flow of the key output the index of the last base that was output prior to this flow
-     * @param key given key
-     * @return array
-     */
-    static public int[] getKeyToBase(final int[] key) {
-        final int[] result = new int[key.length];
-        result[0] = -1;
-        for (int i = 1; i < result.length; i++) {
-            result[i] = result[i - 1] + key[i - 1];
-        }
-        return result;
-
-    }
-
-    /**
-     * For every flow of the key output the nucleotide that is being read for this flow
-     * @param flowOrder given flow order
-     * @param expectedLength the length of the key (key is not provided)
-     * @return array of bases
-     */
-
-    static public byte[] getFlowToBase(final String flowOrder, final int expectedLength) {
-        final byte[] result = new byte[expectedLength] ;
-        for ( int i = 0; i < result.length; i++ ) {
-            result[i] = (byte)flowOrder.charAt(i%flowOrder.length());
-        }
-        return result;
-    }
-
-    /**
-     * Prints the key as character-encoded string
-     * @param ints (key array)
-     * @return encoded string
-     */
-    static public String keyAsString(final int[] ints)
-    {
-        final StringBuilder   sb = new StringBuilder();
-
-        for ( final int i : ints )
-            sb.append((char)((i < 10) ? ('0' + i) : ('A' + i - 10)));
-
-        return sb.toString();
-    }
-
-    /**
-     * Converts a numerical array into flow space by flattening per-base elements to fill empty flows based on minimum scores.
-     * This is intended to make it easier to transform per-base read statistics that are computed on the read in base-space
-     * sanely into flow-space reads in a reasonable fashion that makes sense for parameters like indel-likelihoods.
-     *
-     * The rules are as follows:
-     *  - For every run of homopolymers, the minimum score for any given base is translated to cover the whole run
-     *  - For every 0 base flow, the score from the previous filled flow is copied into place.
-     *  - For the beginning of the array (in the case of preceding 0-flows) the given default value is used.
-     *
-     * Example:
-     *  A read with the following bases with a base-space calculated into flow space (ACTG) as:
-     *     TTTATGC -> 0030101101
-     *  With an input array like this:
-     *     byte[]{1,2,3,4,5,6,7}
-     *  We should expect the following result array (Which matches the key array in length)
-     *     byte[]{defaultQual,defaultQual,1,1,4,4,5,6,6,7}
-     *
-     * @param bases base space sequence
-     * @param keyLength size of the converted bases array in flow-space
-     * @param baseSpacedArrayToConvert Array of base-space scores to be conformed to flow space
-     * @param defaultQual default quality to use at the head of the array for non-covered flows
-     * @param flowOrder flow order
-     * @return Array of translated bases comparable to the keyLength
-     */
-    @VisibleForTesting
-    static public byte[] baseArrayToKeySpace(final byte[] bases, final int keyLength, final byte[] baseSpacedArrayToConvert, final byte defaultQual, final String flowOrder){
-
-        if (bases.length != baseSpacedArrayToConvert.length) {
-            throw new IllegalArgumentException("Read and qual arrays do not match");
-        }
-
-        final byte[] result = new byte[keyLength];
-        final byte[] flowOrderBytes = flowOrder.getBytes();
-        int loc = 0;
-        int flowNumber = 0 ;
-        byte lastQual = defaultQual;
-        final int period = flowOrderBytes.length;
-        while ( loc < bases.length ) {
-            final byte flowBase = flowOrderBytes[flowNumber%period];
-            if ((bases[loc]!=flowBase) && ( bases[loc]!='N')) {
-                result[flowNumber] = lastQual;
-            } else {
-                byte qual = Byte.MAX_VALUE;
-                while ( ( loc < bases.length) && ((bases[loc]==flowBase) || (bases[loc]== 'N')) ){
-                    qual = (byte)Math.min(baseSpacedArrayToConvert[loc], qual);
-                    loc++;
-                }
-                result[flowNumber] = qual;
-                lastQual = qual;
+    static public class ReadGroupInfo {
+        private static final String PLATFORM_ULTIMA = "ULTIMA";
+        private static final String PLATFORM_LS454 = "LS454";
 
-            }
-            flowNumber++;
-        }
-        return result;
-    }
 
-    static public class ReadGroupInfo {
         final public String  flowOrder;
         final public int     maxClass;
         final public boolean isFlowPlatform;
-        private String  reversedFlowOrder = null;
 
         public ReadGroupInfo(final SAMReadGroupRecord readGroup) {
-            if (readGroup.getPlatform()==null){
-                isFlowPlatform = false;
-            } else if (NGSPlatform.fromReadGroupPL(readGroup.getPlatform())==NGSPlatform.UNKNOWN){
-                isFlowPlatform = false;
-            } else if (NGSPlatform.fromReadGroupPL(readGroup.getPlatform()) == NGSPlatform.LS454) {
-                //old Ultima data can have PLATFORM==LS454 and not have FO tag
-                isFlowPlatform = true;
-            } else if (NGSPlatform.fromReadGroupPL(readGroup.getPlatform()) == NGSPlatform.ULTIMA){
-                if (readGroup.getFlowOrder()!=null) {
-                    isFlowPlatform = true;
-                } else {
-                    throw new RuntimeException("Malformed Ultima read group identified, aborting: " + readGroup);
-                }
-            } else {
-                isFlowPlatform = false;
-            }
+            isFlowPlatform = PLATFORM_ULTIMA.equals(readGroup.getPlatform()) || PLATFORM_LS454.equals(readGroup.getPlatform());
 
             if (isFlowPlatform) {
                 this.flowOrder = readGroup.getFlowOrder();
-                String mc = readGroup.getAttribute(FlowBasedRead.MAX_CLASS_READ_GROUP_TAG);
+                final String mc = readGroup.getAttribute(FlowBasedRead.MAX_CLASS_READ_GROUP_TAG);
                 this.maxClass = (mc == null) ? FlowBasedRead.MAX_CLASS : Integer.parseInt(mc);
-            } else { // not a flow platform
+            } else {
                 this.flowOrder = null;
                 this.maxClass = 0;
             }
         }
 
-        public synchronized String getReversedFlowOrder() {
-            if ( reversedFlowOrder == null ) {
-                reversedFlowOrder = SequenceUtil.reverseComplement(flowOrder);
-            }
-            return reversedFlowOrder;
-        }
-
-    }
-
-    public static boolean hasFlowTags(final SAMRecord rec) {
-        return rec.hasAttribute(FlowBasedRead.FLOW_MATRIX_TAG_NAME);
-
     }
 
     public static synchronized ReadGroupInfo getReadGroupInfo(final SAMFileHeader hdr, final SAMRecord read) {
 
-        if ( !hasFlowTags(read) ) {
-            throw new IllegalArgumentException("read must be flow based: " + read);
-        }
-
-        String name = read.getReadGroup().getReadGroupId();
+        final String name = read.getReadGroup().getReadGroupId();
         ReadGroupInfo info = readGroupInfo.get(name);
         if ( info == null ) {
             readGroupInfo.put(name, info = new ReadGroupInfo(hdr.getReadGroup(name)));
@@ -227,145 +95,4 @@ public static synchronized ReadGroupInfo getReadGroupInfo(final SAMFileHeader hd
         return info;
     }
 
-    /**
-    /*
-     * clips flows from the left to clip the input number of bases
-     * Needed to trim the haplotype to the read
-     * Returns number of flows to remove and the change in the left most remaining flow if necessary
-     */
-    static public int[] findLeftClipping(final int baseClipping, final int[] flow2base, final int[] key) {
-        final int [] result = new int[2];
-        if (baseClipping == 0 ){
-            return result;
-        }
-
-        int stopClip = 0;
-        for (int i = 0 ; i < flow2base.length; i++ ) {
-
-            if (flow2base[i] + key[i] >= baseClipping) {
-                stopClip = i;
-                break;
-            }
-        }
-        final int hmerClipped = baseClipping - flow2base[stopClip] - 1;
-        result[0] = stopClip;
-        result[1] = hmerClipped;
-        return result;
-    }
-
-    /*
-     * clips flows from the right to trim the input number of bases
-     * Returns number of flows to remove and the change in the right most flow.
-     */
-    static public int[] findRightClipping(final int baseClipping, final int[] rFlow2Base, final int[] rKey) {
-        final int [] result = new int[2];
-        if (baseClipping == 0 ){
-            return result;
-        }
-
-        int stopClip = 0;
-
-        for (int i = 0; i < rFlow2Base.length; i++ ) {
-            if (rFlow2Base[i] + rKey[i] >= baseClipping) {
-                stopClip = i;
-                break;
-            }
-        }
-
-        final int hmerClipped = baseClipping - rFlow2Base[stopClip] - 1;
-        result[0] = stopClip;
-        result[1] = hmerClipped;
-        return result;
-    }
-
-    /**
-     * Retrieve flow matrix modifications matrix from its string argument format. This matrix contains
-     * logic for modifying the flow matrix as it is read in. If the value of [n] is not zero,
-     * then the hmer probability for hmer length n will be copied to the [n] position
-     *
-     * For the implementation logic, see FlowBasedRead.fillFlowMatrix
-     */
-    static public int[] getFlowMatrixModsInstructions(final String flowMatrixMods, final int maxHmer) {
-
-        if ( flowMatrixMods != null ) {
-            final int[] flowMatrixModsInstructions = new int[maxHmer + 1];
-
-            final String[]    toks = flowMatrixMods.split(",");
-            for ( int i = 0 ; i < toks.length - 1 ; i += 2 ) {
-                final int hmer = Integer.parseInt(toks[i]);
-                flowMatrixModsInstructions[hmer] = Integer.parseInt(toks[i + 1]);
-            }
-            return flowMatrixModsInstructions;
-        } else {
-            return null;
-        }
-    }
-
-    /**
-     * A canonical, master list of the standard NGS platforms.  These values
-     * can be obtained (efficiently) from a SAMRecord object with the
-     * getNGSPlatform method.
-     */
-    public enum NGSPlatform {
-        // note the order of elements here determines the order of matching operations, and therefore the
-        // efficiency of getting a NGSPlatform from a string.
-        ILLUMINA(SequencerFlowClass.DISCRETE, "ILLUMINA", "SLX", "SOLEXA"),
-        SOLID(SequencerFlowClass.DISCRETE, "SOLID"),
-        LS454(SequencerFlowClass.FLOW, "454", "LS454"),
-        COMPLETE_GENOMICS(SequencerFlowClass.DISCRETE, "COMPLETE"),
-        PACBIO(SequencerFlowClass.DISCRETE, "PACBIO"),
-        ION_TORRENT(SequencerFlowClass.FLOW, "IONTORRENT"),
-        CAPILLARY(SequencerFlowClass.OTHER, "CAPILLARY"),
-        HELICOS(SequencerFlowClass.OTHER, "HELICOS"),
-        ULTIMA(SequencerFlowClass.FLOW, "ULTIMA"),
-        UNKNOWN(SequencerFlowClass.OTHER, "UNKNOWN");
-
-
-        /**
-         * Array of the prefix names in a BAM file for each of the platforms.
-         */
-        protected final String[] BAM_PL_NAMES;
-        protected final SequencerFlowClass sequencerType;
-
-        NGSPlatform(final SequencerFlowClass type, final String... BAM_PL_NAMES) {
-            if ( BAM_PL_NAMES.length == 0 ) throw new IllegalStateException("Platforms must have at least one name");
-
-            for ( int i = 0; i < BAM_PL_NAMES.length; i++ )
-                BAM_PL_NAMES[i] = BAM_PL_NAMES[i].toUpperCase();
-
-            this.BAM_PL_NAMES = BAM_PL_NAMES;
-            this.sequencerType = type;
-        }
-
-        /**
-         * Returns the NGSPlatform corresponding to the PL tag in the read group
-         * @param plFromRG -- the PL field (or equivalent) in a ReadGroup object.  Can be null => UNKNOWN
-         * @return an NGSPlatform object matching the PL field of the header, or UNKNOWN if there was no match or plFromRG is null
-         */
-        public static NGSPlatform fromReadGroupPL(final String plFromRG) {
-            if ( plFromRG == null ) return UNKNOWN;
-
-            final String pl = plFromRG.toUpperCase();
-            for ( final NGSPlatform ngsPlatform : NGSPlatform.values() ) {
-                for ( final String bamPLName : ngsPlatform.BAM_PL_NAMES ) {
-                    if ( pl.contains(bamPLName) )
-                        return ngsPlatform;
-                }
-            }
-
-            return UNKNOWN;
-        }
-
-        /**
-         * In broad terms, each sequencing platform can be classified by whether it flows nucleotides in some order
-         * such that homopolymers get sequenced in a single event (ie 454 or Ion) or it reads each position in the
-         * sequence one at a time, regardless of base composition (Illumina or Solid).  This information is primarily
-         * useful in the BQSR process
-         */
-        public enum SequencerFlowClass {
-            DISCRETE,
-            FLOW,
-            OTHER //Catch-all for unknown platforms, as well as relics that GATK doesn't handle well (Capillary, Helicos)
-        }
-    }
 }