Skip to content

Commit

Permalink
fix bug in VariantContextBuilder (#1403)
Browse files Browse the repository at this point in the history
* Fix regression in VariantContextBuilder introduced in #1344 
* Fixes #1404 
* filter validation now fails with IllegalStateException instead of of NPE
* Treat null filter array the same as null filter sets


Co-authored-by: Yossi Farjoun <[email protected]>
Co-authored-by: Louis Bergelson <[email protected]>
  • Loading branch information
Yossi Farjoun and lbergelson committed Aug 2, 2019
1 parent 1d4a316 commit 833a0e2
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ private static void validateFilters(final VariantContext variantContext) {
}

for (String filter : filters) {
if ( filter == null) {
throw new IllegalStateException("'null' is not a valid filter string.");
}
if (!VALID_FILTER.matcher(filter).matches()) {
throw new IllegalStateException("Filter '" + filter +
"' contains an illegal character. It must conform to the regex ;'" + VALID_FILTER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -354,7 +353,7 @@ private void makeFiltersModifiable() {
if (!filtersCanBeModified) {
this.filtersCanBeModified = true;
final Set<String> tempFilters = filters;
this.filters = new HashSet<>();
this.filters = new LinkedHashSet<>();
if (tempFilters != null) {
this.filters.addAll(tempFilters);
}
Expand All @@ -376,7 +375,7 @@ public VariantContextBuilder filters(final Set<String> filters) {
unfiltered();
} else {
this.filtersCanBeModified = true;
filtersAsIs(new HashSet<>(filters));
filtersAsIs(new LinkedHashSet<>(filters));
}
return this;
}
Expand All @@ -398,11 +397,16 @@ private void filtersAsIs(final Set<String> filters) {
/**
* {@link #filters}
*
* @param filters Set of strings to set as the filters for this builder
* @param filters Strings to set as the filters for this builder
* @return this builder
*/
public VariantContextBuilder filters(final String ... filters) {
filtersAsIs(new LinkedHashSet<>(Arrays.asList(filters)));
if(filters == null){
this.unfiltered();
} else {
this.filtersCanBeModified = true;
filtersAsIs(new LinkedHashSet<>(Arrays.asList(filters)));
}
return this;
}

Expand Down Expand Up @@ -435,6 +439,7 @@ public VariantContextBuilder passFilters() {
*/
public VariantContextBuilder unfiltered() {
this.filters = null;
this.filtersCanBeModified = false;
return this;
}

Expand Down
106 changes: 65 additions & 41 deletions src/main/java/htsjdk/variant/vcf/VCFEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,9 @@ public class VCFEncoder {
* allowing missing fields in the header.
*/
public VCFEncoder(final VCFHeader header, final boolean allowMissingFieldsInHeader, final boolean outputTrailingFormatFields) {
if (header == null) throw new NullPointerException("The VCF header must not be null.");
if (header == null) {
throw new NullPointerException("The VCF header must not be null.");
}
this.header = header;
this.allowMissingFieldsInHeader = allowMissingFieldsInHeader;
this.outputTrailingFormatFields = outputTrailingFormatFields;
Expand Down Expand Up @@ -128,20 +130,26 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro
vcfOutput.append(VCFConstants.FIELD_SEPARATOR);

// QUAL
if ( ! context.hasLog10PError()) vcfOutput.append(VCFConstants.MISSING_VALUE_v4);
else vcfOutput.append(formatQualValue(context.getPhredScaledQual()));
if ( !context.hasLog10PError()) {
vcfOutput.append(VCFConstants.MISSING_VALUE_v4);
} else {
vcfOutput.append(formatQualValue(context.getPhredScaledQual()));
}
vcfOutput.append(VCFConstants.FIELD_SEPARATOR)
// FILTER
.append(getFilterString(context)).append(VCFConstants.FIELD_SEPARATOR);

// INFO
final Map<String, String> infoFields = new TreeMap<>();
for (final Map.Entry<String, Object> field : context.getAttributes().entrySet()) {
if (!this.header.hasInfoLine(field.getKey()))
if (!this.header.hasInfoLine(field.getKey())) {
fieldIsMissingFromHeaderError(context, field.getKey(), "INFO");
}

final String outputValue = formatVCFField(field.getValue());
if (outputValue != null) infoFields.put(field.getKey(), outputValue);
if (outputValue != null) {
infoFields.put(field.getKey(), outputValue);
}
}
writeInfoString(infoFields, vcfOutput);

Expand All @@ -152,11 +160,12 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro
vcfOutput.append(((LazyGenotypesContext) gc).getUnparsedGenotypeData().toString());
} else {
final List<String> genotypeAttributeKeys = context.calcVCFGenotypeKeys(this.header);
if ( ! genotypeAttributeKeys.isEmpty()) {
for (final String format : genotypeAttributeKeys)
if ( ! this.header.hasFormatLine(format))
if ( !genotypeAttributeKeys.isEmpty()) {
for (final String format : genotypeAttributeKeys) {
if (!this.header.hasFormatLine(format)) {
fieldIsMissingFromHeaderError(context, format, "FORMAT");

}
}
final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);

vcfOutput.append(VCFConstants.FIELD_SEPARATOR);
Expand All @@ -166,8 +175,6 @@ public void write(final Appendable vcfOutput, final VariantContext context) thro
appendGenotypeData(context, alleleStrings, genotypeAttributeKeys, vcfOutput);
}
}


}

VCFHeader getVCFHeader() {
Expand All @@ -181,52 +188,60 @@ boolean getAllowMissingFieldsInHeader() {
private String getFilterString(final VariantContext vc) {
if (vc.isFiltered()) {
for (final String filter : vc.getFilters()) {
if (!this.header.hasFilterLine(filter)) fieldIsMissingFromHeaderError(vc, filter, "FILTER");
if (!this.header.hasFilterLine(filter)) {
fieldIsMissingFromHeaderError(vc, filter, "FILTER");
}
}

return ParsingUtils.join(";", ParsingUtils.sortList(vc.getFilters()));
} else if (vc.filtersWereApplied()) return VCFConstants.PASSES_FILTERS_v4;
else return VCFConstants.UNFILTERED;
} else {
return vc.filtersWereApplied() ? VCFConstants.PASSES_FILTERS_v4 : VCFConstants.UNFILTERED;
}
}

private static String formatQualValue(final double qual) {
String s = String.format(QUAL_FORMAT_STRING, qual);
if (s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM))
if (s.endsWith(QUAL_FORMAT_EXTENSION_TO_TRIM)) {
s = s.substring(0, s.length() - QUAL_FORMAT_EXTENSION_TO_TRIM.length());
}
return s;
}

private void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) {
if (!allowMissingFieldsInHeader)
if (!allowMissingFieldsInHeader) {
throw new IllegalStateException("Key " + id + " found in VariantContext field " + field
+ " at " + vc.getContig() + ":" + vc.getStart()
+ " but this key isn't defined in the VCFHeader. We require all VCFs to have"
+ " complete VCF headers by default.");
+ " at " + vc.getContig() + ":" + vc.getStart()
+ " but this key isn't defined in the VCFHeader. We require all VCFs to have"
+ " complete VCF headers by default.");
}
}

@SuppressWarnings("rawtypes")
String formatVCFField(final Object val) {
final String result;
if ( val == null )
if (val == null) {
result = VCFConstants.MISSING_VALUE_v4;
else if ( val instanceof Double )
} else if (val instanceof Double) {
result = formatVCFDouble((Double) val);
else if ( val instanceof Boolean )
result = (Boolean)val ? "" : null; // empty string for true, null for false
else if ( val instanceof List ) {
result = formatVCFField(((List)val).toArray());
} else if ( val.getClass().isArray() ) {
} else if (val instanceof Boolean) {
result = (Boolean) val ? "" : null; // empty string for true, null for false
} else if (val instanceof List) {
result = formatVCFField(((List) val).toArray());
} else if (val.getClass().isArray()) {
final int length = Array.getLength(val);
if ( length == 0 )
if (length == 0) {
return formatVCFField(null);
final StringBuilder sb = new StringBuilder(formatVCFField(Array.get(val, 0)));
for ( int i = 1; i < length; i++) {
}
final StringBuilder sb = new StringBuilder(
formatVCFField(Array.get(val, 0)));
for (int i = 1; i < length; i++) {
sb.append(',');
sb.append(formatVCFField(Array.get(val, i)));
}
result = sb.toString();
} else
} else {
result = val.toString();
}

return result;
}
Expand All @@ -245,9 +260,9 @@ public static String formatVCFDouble(final double d) {
final String format;
if (d < 1) {
if (d < 0.01) {
if (Math.abs(d) >= 1e-20)
if (Math.abs(d) >= 1e-20) {
format = "%.3e";
else {
} else {
// return a zero format
return "0.00";
}
Expand Down Expand Up @@ -299,7 +314,9 @@ public void addGenotypeData(final VariantContext vc, final Map<Allele, String> a
vcfoutput.append(VCFConstants.FIELD_SEPARATOR);

Genotype g = vc.getGenotype(sample);
if (g == null) g = GenotypeBuilder.createMissing(sample, ploidy);
if (g == null) {
g = GenotypeBuilder.createMissing(sample, ploidy);
}

final List<String> attrs = new ArrayList<>(genotypeFormatKeys.size());
for (final String field : genotypeFormatKeys) {
Expand All @@ -323,11 +340,11 @@ public void addGenotypeData(final VariantContext vc, final Map<Allele, String> a
final IntGenotypeFieldAccessors.Accessor accessor = GENOTYPE_FIELD_ACCESSORS.getAccessor(field);
if (accessor != null) {
final int[] intValues = accessor.getValues(g);
if (intValues == null)
if (intValues == null) {
outputValue = VCFConstants.MISSING_VALUE_v4;
else if (intValues.length == 1) // fast path
} else if (intValues.length == 1) { // fast path
outputValue = Integer.toString(intValues[0]);
else {
} else {
final StringBuilder sb = new StringBuilder();
sb.append(intValues[0]);
for (int i = 1; i < intValues.length; i++) {
Expand All @@ -342,16 +359,20 @@ else if (intValues.length == 1) // fast path
}
}

if (outputValue != null)
if (outputValue != null) {
attrs.add(outputValue);
}
}
}

// strip off trailing missing values
if (!outputTrailingFormatFields) {
for (int i = attrs.size() - 1; i >= 0; i--) {
if (isMissingValue(attrs.get(i))) attrs.remove(i);
else break;
if (isMissingValue(attrs.get(i))) {
attrs.remove(i);
} else {
break;
}
}
}

Expand All @@ -375,8 +396,11 @@ private void writeInfoString(final Map<String, String> infoFields, final Appenda

boolean isFirst = true;
for (final Map.Entry<String, String> entry : infoFields.entrySet()) {
if (isFirst) isFirst = false;
else vcfoutput.append(VCFConstants.INFO_FIELD_SEPARATOR);
if (isFirst) {
isFirst = false;
} else {
vcfoutput.append(VCFConstants.INFO_FIELD_SEPARATOR);
}

vcfoutput.append(entry.getKey());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -314,4 +314,31 @@ public void testGenotypesUnaffectedByClonedVariants(final VCBuilderScheme builde

Assert.assertNotEquals(vc2.getGenotypes(), vc1.getGenotypes(), "The two genotype lists should be different. only saw " + vc1.getGenotypes().toString());
}

@Test
public void testCanResetFilters() {
final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST");
builder.unfiltered();
builder.filter("mayIPlease?");
}

@Test(expectedExceptions = IllegalStateException.class)
public void testCantCreateNullFilter(){
final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST");
builder.filters((String)null);
builder.make();
}

@Test
public void testNullFilterArray(){
final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST");
builder.filters((String[])null);
}

@Test
public void testNullFilterSet(){
final VariantContextBuilder builder = new VariantContextBuilder("source", "contig", 1, 1, Arrays.asList(Tref, C, G)).filter("TEST");
builder.filters((Set<String>)null);
builder.make();
}
}

0 comments on commit 833a0e2

Please sign in to comment.