diff --git a/bin/date_filter.py b/bin/date_filter.py
index 39a8e03..bc5ff02 100755
--- a/bin/date_filter.py
+++ b/bin/date_filter.py
@@ -58,7 +58,7 @@ def filter_by_date(in_metadata, out_metadata, todays_date, time_window, filter_c
                 continue
 
             if (todays_date - window) > date:
-                row[filter_column] = "sample_date older than %s days" %window
+                row[filter_column] = "sample_date older than %s days" %time_window
                 if not restrict:
                     writer.writerow(row)
                 continue
diff --git a/bin/downsample.py b/bin/downsample.py
index cf66c5f..de9d264 100755
--- a/bin/downsample.py
+++ b/bin/downsample.py
@@ -119,7 +119,10 @@ def downsample(in_metadata, out_metadata, in_fasta, out_fasta, max_diff, outgrou
         open(out_metadata, 'w', newline = '') as csv_out:
 
         reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
-        writer = csv.DictWriter(csv_out, fieldnames = reader.fieldnames, delimiter=",", quotechar='\"', quoting=csv.QUOTE_MINIMAL, dialect = "unix")
+        fieldnames = reader.fieldnames
+        if "note" not in reader.fieldnames:
+            fieldnames.append("note")
+        writer = csv.DictWriter(csv_out, fieldnames = fieldnames, delimiter=",", quotechar='\"', quoting=csv.QUOTE_MINIMAL, dialect = "unix")
         writer.writeheader()
 
         if "nucleotide_variants" in reader.fieldnames:
@@ -134,8 +137,10 @@ def downsample(in_metadata, out_metadata, in_fasta, out_fasta, max_diff, outgrou
         very_most_frequent = get_by_frequency(count_dict, num_samples, band=[0.5,1.0])
 
         for row in reader:
+            row["note"] = ""
             fasta_header = row[FASTA_HEADER]
             if fasta_header not in indexed_fasta:
+                writer.writerow(row)
                 continue
             if original_num_seqs % 1000 == 0:
                 now = datetime.datetime.now()
@@ -146,13 +151,14 @@ def downsample(in_metadata, out_metadata, in_fasta, out_fasta, max_diff, outgrou
                                                                       downsample_lineage_size,lineage_dict):
                 if fasta_header in outgroups:
                     row["why_excluded"]=""
-                writer.writerow(row)
                 if row["why_excluded"] in [None, "None", ""] and fasta_header in indexed_fasta:
                     seq_rec = indexed_fasta[fasta_header]
                     fa_out.write(">" + seq_rec.id + "\n")
                     fa_out.write(str(seq_rec.seq) + "\n")
+                    row["note"] = "included in downsample"
                 else:
                     print(row["why_excluded"], fasta_header, (fasta_header in indexed_fasta))
+                writer.writerow(row)
                 continue
             muts = row[var_column].split("|")
             if len(muts) < max_diff:
@@ -192,11 +198,12 @@ def downsample(in_metadata, out_metadata, in_fasta, out_fasta, max_diff, outgrou
                     else:
                         var_dict[mut].append(fasta_header)
                 row["why_excluded"] = ""
-                writer.writerow(row)
                 if fasta_header in indexed_fasta:
                     seq_rec = indexed_fasta[fasta_header]
                     fa_out.write(">" + seq_rec.id + "\n")
                     fa_out.write(str(seq_rec.seq) + "\n")
+                    row["note"] = "included in downsample"
+                writer.writerow(row)
 
     now = datetime.datetime.now()
     print("%s Downsampled from %i seqs to %i seqs" %(str(now), original_num_seqs, len(sample_dict)))
diff --git a/bin/filter_by_ambiguous_sites.py b/bin/filter_by_ambiguous_sites.py
index e0a8b65..77bf356 100755
--- a/bin/filter_by_ambiguous_sites.py
+++ b/bin/filter_by_ambiguous_sites.py
@@ -44,6 +44,8 @@ def apply_filter(in_fasta, out_fasta, sites_file):
             if keep:
                 fasta_out.write('>' + ID + '\n')
                 fasta_out.write(seq + '\n')
+            else:
+                print(ID)
 
 def main():
     args = parse_args()
diff --git a/bin/parse_usher_log.py b/bin/parse_usher_log.py
index 0f3d015..4ca9476 100755
--- a/bin/parse_usher_log.py
+++ b/bin/parse_usher_log.py
@@ -15,6 +15,11 @@ def parse_log(in_file, out_file):
     """
     Usher STDOUT > CSV
     """
+    ignored = set()
+    with open(in_file, 'r') as in_handle:
+        for line in in_handle:
+            if "Ignoring sample" in line:
+                ignored.add(line.split("Ignoring sample ")[1].split(".")[0])
     with open(in_file, 'r') as in_handle, \
          open(out_file, 'w') as out_handle:
         out_handle.write("sequence_name,parsimony_score,num_parsimony_optimal_placements,is_unreliable_in_tree\n")
@@ -26,6 +31,8 @@ def parse_log(in_file, out_file):
             print(fields)
             assert len(fields) > 3
             row = {"sequence_name": fields[1].split(": ")[1], "parsimony_score": fields[2].split(": ")[1], "num_parsimony_optimal_placements": fields[3].split(": ")[1]}
+            if row["sequence_name"] in ignored:
+                continue
             if int(row["num_parsimony_optimal_placements"]) > 1:
                 row["is_unreliable_in_tree"] = "Y"
             else:
diff --git a/modules/extract_protected_sequences.nf b/modules/extract_protected_sequences.nf
index 3c22f9b..6e8a303 100644
--- a/modules/extract_protected_sequences.nf
+++ b/modules/extract_protected_sequences.nf
@@ -40,6 +40,51 @@ process filter_on_sample_date_for_recent {
         """
 }
 
+process annotate_metadata {
+    /**
+    * Adds to note column with info about sequences excluded by date
+    * @input metadata
+    * @output metadata
+    */
+
+    input:
+    path metadata
+    path fasta
+
+    output:
+    path "${metadata.baseName}.annotated.csv"
+
+    script:
+    """
+    #!/usr/bin/env python3
+    import csv
+
+    seqs = set()
+    with open("${fasta}", 'r', newline = '') as fasta_in:
+        for line in fasta_in:
+            if line.startswith(">"):
+                seqs.add(line.rstrip()[1:])
+
+    print("Fasta of recent had %d sequences" %len(seqs))
+
+    with open("${metadata}", 'r', newline = '') as csv_in, \
+        open("${metadata.baseName}.annotated.csv", 'w', newline = '') as csv_out:
+        reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
+        new_fieldnames = reader.fieldnames
+        if "note" not in reader.fieldnames:
+            new_fieldnames.append("note")
+        writer = csv.DictWriter(csv_out, fieldnames = new_fieldnames, delimiter=",", quotechar='\"', quoting=csv.QUOTE_MINIMAL, dialect = "unix")
+        writer.writeheader()
+        for row in reader:
+            note = []
+            if row["sequence_name"] not in seqs:
+                note.append("sample not recent")
+            if row["note"]:
+                note.append(row["note"])
+            row["note"] = "|".join(note)
+            writer.writerow(row)
+    """
+}
 
 process fetch_recent {
     /**
@@ -146,8 +191,10 @@ workflow extract_protected_sequences {
         fetch_outgroups(fasta)
         fetch_outgroups.out.concat( recent_ch, designations_ch ).set { protected_ch }
         protected_ch.collectFile(name: "force.fa").set { output_ch }
+        annotate_metadata(metadata,output_ch)
     emit:
-        output_ch
+        fasta = output_ch
+        metadata = annotate_metadata.out
 }
 
 
diff --git a/modules/preprocess.nf b/modules/preprocess.nf
index ae6a026..05b28ae 100644
--- a/modules/preprocess.nf
+++ b/modules/preprocess.nf
@@ -188,6 +188,72 @@ process prune_tree_with_metadata {
     """
 }
 
+process get_tree_tips {
+    /**
+    * Gets list of tree tips
+    * @input tree
+    */
+
+    input:
+    path tree
+
+    output:
+    path "${tree.baseName}.tips.txt"
+
+    script:
+    """
+    gotree stats tips \
+      -i ${tree} | cut -f4 > "${tree.baseName}.tips.txt"
+    """
+}
+
+
+process annotate_metadata {
+    /**
+    * Adds note column with info about sequences in input
+    * @input metadata
+    * @output metadata
+    */
+
+    input:
+    path metadata
+    path tips
+
+    output:
+    path "${metadata.baseName}.annotated.csv"
+
+    script:
+    """
+    #!/usr/bin/env python3
+    import csv
+
+    tips = set()
+    with open("${tips}", 'r', newline = '') as tips_in:
+        for line in tips_in:
+            tip = line.rstrip().replace("'","")
+            tips.add(tip)
+
+    print("Tree contains %d tips" %len(tips))
+
+    with open("${metadata}", 'r', newline = '') as csv_in, \
+        open("${metadata.baseName}.annotated.csv", 'w', newline = '') as csv_out:
+        reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
+        new_fieldnames = reader.fieldnames
+        if "note" not in reader.fieldnames:
+            new_fieldnames.append("note")
+        writer = csv.DictWriter(csv_out, fieldnames = new_fieldnames, delimiter=",", quotechar='\"', quoting=csv.QUOTE_MINIMAL, dialect = "unix")
+        writer.writeheader()
+        for row in reader:
+            note = []
+            if row["sequence_name"] in tips:
+                note.append("tip in input tree")
+            if row["note"]:
+                note.append(row["note"])
+            row["note"] = "|".join(note)
+            writer.writerow(row)
+    """
+}
+
 
 process announce_summary {
     /**
@@ -300,10 +366,12 @@ workflow clean_fasta_and_metadata_and_tree {
         clean_metadata(metadata, clean_fasta_headers_with_tree.out.map)
         get_keep_tips(clean_metadata.out)
         prune_tree_with_metadata(clean_fasta_headers_with_tree.out.tree, get_keep_tips.out)
+        get_tree_tips(prune_tree_with_metadata.out)
+        annotate_metadata(clean_metadata.out, get_tree_tips.out)
         announce_metadata_pruned_tree(tree, metadata, prune_tree_with_metadata.out)
     emit:
         fasta = clean_fasta_headers_with_tree.out.fasta
-        metadata = clean_metadata.out
+        metadata = annotate_metadata.out
         tree = prune_tree_with_metadata.out
 }
 
diff --git a/modules/subsample_for_tree.nf b/modules/subsample_for_tree.nf
index 7980dd8..4a0afde 100644
--- a/modules/subsample_for_tree.nf
+++ b/modules/subsample_for_tree.nf
@@ -79,7 +79,9 @@ process filter_on_ambiguous_sites {
     path fasta
 
     output:
-    path "${fasta.baseName}.site_filtered.fa"
+    path "${fasta.baseName}.site_filtered.fa", emit: fasta
+    path "ids_with_ambiguous_sites.log", emit: ambiguous_site_ids
+
 
     script:
     if ( params.downsample )
@@ -87,7 +89,7 @@ process filter_on_ambiguous_sites {
         $project_dir/../bin/filter_by_ambiguous_sites.py \
                 --in-alignment ${fasta} \
                 --out-alignment "${fasta.baseName}.site_filtered.fa" \
-                --sites ${ambiguous_sites}
+                --sites ${ambiguous_sites} > "ids_with_ambiguous_sites.log"
         """
     else
         """
@@ -129,6 +131,65 @@ process downsample {
         """
 }
 
+process annotate_metadata {
+    /**
+    * Adds note column with info about sequences hashed, filtered and downsampled
+    * @input metadata
+    * @output metadata
+    */
+
+    input:
+    path metadata
+    path hashmap
+    path ambiguous_site_log
+
+    output:
+    path "${metadata.baseName}.annotated.csv"
+
+    script:
+    """
+    #!/usr/bin/env python3
+    import csv
+
+    tips = set()
+    hashmap = {}
+    with open("${hashmap}", 'r', newline = '') as hashmap_in:
+        for line in hashmap_in:
+            tip, redundant = line.rstrip().split(",")
+            tips.add(tip)
+            for id in redundant.split("|"):
+                hashmap[id] = tip
+    print("hashmap contains %d tips and %d redundants" %(len(tips), len(hashmap)))
+
+    ambig = set()
+    with open("${ambiguous_site_log}", 'r', newline = '') as ambiguous_site_log_in:
+        for line in ambiguous_site_log_in:
+            ambig.add(line.rstrip())
+    print("%d had ambiguous bases" %len(ambig))
+
+    with open("${metadata}", 'r', newline = '') as csv_in, \
+        open("${metadata.baseName}.annotated.csv", 'w', newline = '') as csv_out:
+        reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
+        new_fieldnames = reader.fieldnames
+        if "note" not in reader.fieldnames:
+            new_fieldnames.append("note")
+        writer = csv.DictWriter(csv_out, fieldnames = new_fieldnames, delimiter=",", quotechar='\"', quoting=csv.QUOTE_MINIMAL, dialect = "unix")
+        writer.writeheader()
+        for row in reader:
+            note = []
+            if row["sequence_name"] in hashmap:
+                note.append("hashed to tip %s" %hashmap[row["sequence_name"]])
+            if row["sequence_name"] in ambig:
+                note.append("filtered due to ambiguous base")
+            if row["date_filter"]:
+                if row["note"] and "downsample" not in row["note"]:
+                    note.append("date filtered")
+            if row["note"]:
+                note.append(row["note"])
+            row["note"] = "|".join(note)
+            writer.writerow(row)
+    """
+}
 
 process announce_summary {
     /**
@@ -188,11 +249,12 @@ workflow subsample_for_tree {
         hash_non_unique_seqs(fasta, metadata)
         filter_on_ambiguous_sites(hash_non_unique_seqs.out.fasta)
         filter_on_sample_date(metadata)
-        downsample(filter_on_ambiguous_sites.out, filter_on_sample_date.out)
-        announce_summary(fasta, hash_non_unique_seqs.out.fasta, filter_on_ambiguous_sites.out, filter_on_sample_date.out, downsample.out.fasta)
+        downsample(filter_on_ambiguous_sites.out.fasta, filter_on_sample_date.out)
+        annotate_metadata(downsample.out.metadata, hash_non_unique_seqs.out.hashmap, filter_on_ambiguous_sites.out.ambiguous_site_ids)
+        announce_summary(fasta, hash_non_unique_seqs.out.fasta, filter_on_ambiguous_sites.out.fasta, filter_on_sample_date.out, downsample.out.fasta)
     emit:
         fasta = downsample.out.fasta // subset of unique
-        metadata = downsample.out.metadata
+        metadata = annotate_metadata.out
         hashmap = hash_non_unique_seqs.out.hashmap
 }
 
diff --git a/modules/usher_expand_tree.nf b/modules/usher_expand_tree.nf
index 2d5c54f..01ea7fe 100644
--- a/modules/usher_expand_tree.nf
+++ b/modules/usher_expand_tree.nf
@@ -207,6 +207,54 @@ process usher_start_tree {
     """
 }
 
+
+process optimize_start_tree {
+    /**
+    * Runs matOptimize to improve tree
+    * @input tree
+    */
+
+    publishDir "${publish_dev}/trees", pattern: "*.pb", mode: 'copy', saveAs: { "cog_global.${params.date}.pb" }, overwrite: true
+
+    input:
+    path protobuf
+
+    output:
+    path "${protobuf.baseName}.optimized.pb"
+
+    script:
+    """
+    matOptimize -i ${protobuf} \
+            -o "${protobuf.baseName}.optimized.pb" \
+            -r 100 \
+            -T ${params.max_cpus} \
+            -s 259200
+    """
+}
+
+process protobuf_to_tree {
+    /**
+    * Runs matUtils to convert to tree
+    * @input tree
+    */
+
+    publishDir "${publish_dev}/trees", pattern: "*.tree", mode: 'copy', saveAs: { "cog_global.${params.date}.tree" }, overwrite: true
+
+    input:
+    path protobuf
+
+    output:
+    path "${protobuf.baseName}.tree"
+
+    script:
+    """
+    matUtils extract \
+        -i ${protobuf} \
+        -t "${protobuf.baseName}.tree"
+    """
+}
+
+
 process usher_update_tree {
     /**
     * Makes usher mutation annotated tree
@@ -361,6 +409,52 @@ process add_usher_metadata {
     """
 }
 
+process annotate_metadata {
+    /**
+    * Adds to note column with info about sequences added
+    * @input metadata
+    * @output metadata
+    */
+
+    input:
+    path metadata
+    path fasta
+
+    output:
+    path "${metadata.baseName}.annotated.csv"
+
+    script:
+    """
+    #!/usr/bin/env python3
+    import csv
+
+    seqs = set()
+    with open("${fasta}", 'r', newline = '') as fasta_in:
+        for line in fasta_in:
+            if line.startswith(">"):
+                seqs.add(line.rstrip()[1:])
+
+    print("Fasta to add had %d sequences" %len(seqs))
+
+    with open("${metadata}", 'r', newline = '') as csv_in, \
+        open("${metadata.baseName}.annotated.csv", 'w', newline = '') as csv_out:
+        reader = csv.DictReader(csv_in, delimiter=",", quotechar='\"', dialect = "unix")
+        new_fieldnames = reader.fieldnames
+        if "note" not in reader.fieldnames:
+            new_fieldnames.append("note")
+        writer = csv.DictWriter(csv_out, fieldnames = new_fieldnames, delimiter=",", quotechar='\"', quoting=csv.QUOTE_MINIMAL, dialect = "unix")
+        writer.writeheader()
+        for row in reader:
+            note = []
+            if row["sequence_name"] in seqs:
+                note.append("tried to add with usher")
+            if row["note"]:
+                note.append(row["note"])
+            row["note"] = "|".join(note)
+            writer.writerow(row)
+    """
+}
+
 process root_tree {
     /**
     * Roots tree with WH04
@@ -488,7 +582,7 @@ workflow prepare_fasta {
         add_reference_to_fasta(mask_fasta.out, reference)
         fasta_to_vcf(add_reference_to_fasta.out)
     emit:
-        fasta = fasta_to_vcf.out
+        vcf = fasta_to_vcf.out
 }
 
 
@@ -503,8 +597,10 @@ workflow iteratively_update_tree {
         prepare_fasta(fasta_chunks)
         prepare_fasta.out.collect().set{ vcf_list }
         usher_update_tree(vcf_list, protobuf)
-        add_usher_metadata(usher_update_tree.out.usher_log.last(),metadata)
+        annotate_metadata(metadata, fasta)
+        add_usher_metadata(usher_update_tree.out.usher_log.last(),annotate_metadata.out)
         prune_tree_of_long_branches(usher_update_tree.out.protobuf.last())
+
         final_tree = prune_tree_of_long_branches.out.tree
         final_protobuf = prune_tree_of_long_branches.out.protobuf
     emit:
@@ -523,7 +619,8 @@ workflow iteratively_force_update_tree {
         prepare_fasta(fasta_chunks)
         prepare_fasta.out.collect().set{ vcf_list }
         usher_force_update_tree(vcf_list, protobuf)
-        add_usher_metadata(usher_force_update_tree.out.usher_log.last(),metadata)
+        annotate_metadata(metadata, fasta)
+        add_usher_metadata(usher_force_update_tree.out.usher_log.last(),annotate_metadata.out)
         prune_tree_of_long_branches(usher_force_update_tree.out.protobuf.last())
         final_tree = prune_tree_of_long_branches.out.tree
         final_protobuf = prune_tree_of_long_branches.out.protobuf
@@ -554,12 +651,14 @@ workflow usher_expand_tree {
             out_tree = usher_start_tree.out.tree
             out_metadata = metadata
         }
-        root_tree(out_tree)
+        optimize_start_tree(out_pb)
+        protobuf_to_tree(optimize_start_tree.out)
+        root_tree(protobuf_to_tree.out)
         rescale_branch_lengths(root_tree.out)
         announce_tree_complete(rescale_branch_lengths.out, "initial")
     emit:
         tree = rescale_branch_lengths.out
-        protobuf = out_pb
+        protobuf = optimize_start_tree.out
         metadata = out_metadata
 }
 
diff --git a/workflows/process_tree_building.nf b/workflows/process_tree_building.nf
index 6b35271..ed676ed 100644
--- a/workflows/process_tree_building.nf
+++ b/workflows/process_tree_building.nf
@@ -39,42 +39,50 @@ workflow {
         ch_clean_metadata = clean_fasta_and_metadata.out.metadata
     }
 
-    ch_protected = extract_protected_sequences(ch_clean_fasta, ch_clean_metadata)
+    extract_protected_sequences(ch_clean_fasta, ch_clean_metadata)
+    ch_protected = extract_protected_sequences.out.fasta
+    ch_protected_metadata = extract_protected_sequences.out.metadata
 
     if ( ! params.protobuf ) {
         if ( ! params.newick_tree ) {
-            subsample_for_tree(ch_clean_fasta, ch_clean_metadata)
+            subsample_for_tree(ch_clean_fasta, ch_protected_metadata)
             build_split_grafted_tree(subsample_for_tree.out.fasta, subsample_for_tree.out.metadata, subsample_for_tree.out.hashmap)
-            if ( params.skip_usher ){
-                ch_full_tree = build_split_grafted_tree.out.tree
-                ch_full_metadata = ch_clean_metadata
-            } else {
-                ch_clean_tree = build_split_grafted_tree.out.tree
-            }
+            ch_clean_tree = build_split_grafted_tree.out.tree
+            ch_fasttree_metadata = subsample_for_tree.out.metadata
+        } else {
+            ch_fasttree_metadata = ch_protected_metadata
         }
+
         if ( ! params.skip_usher ){
-            usher_expand_tree(ch_clean_fasta, ch_clean_tree, ch_clean_metadata)
+            usher_expand_tree(ch_clean_fasta, ch_clean_tree, ch_fasttree_metadata)
             ch_protobuf = usher_expand_tree.out.protobuf
             ch_expanded_tree = usher_expand_tree.out.tree
             ch_expanded_metadata = usher_expand_tree.out.metadata
+        } else {
+            ch_expanded_tree = ch_clean_tree
+            ch_expanded_metadata = ch_fasttree_metadata
         }
 
     } else if ( params.newick_tree && params.update_protobuf ) {
         ch_protobuf_raw = Channel.fromPath(params.protobuf)
-        soft_update_usher_tree(ch_clean_fasta, ch_clean_tree, ch_protobuf_raw, ch_clean_metadata)
+        soft_update_usher_tree(ch_clean_fasta, ch_clean_tree, ch_protobuf_raw, ch_protected_metadata)
         ch_protobuf = soft_update_usher_tree.out.protobuf
         ch_expanded_tree = soft_update_usher_tree.out.tree
         ch_expanded_metadata = soft_update_usher_tree.out.metadata
+
     } else if ( params.newick_tree ) {
         ch_protobuf = Channel.fromPath(params.protobuf)
         ch_expanded_tree = ch_clean_tree
-        ch_expanded_metadata = ch_clean_metadata
+        ch_expanded_metadata = ch_protected_metadata
     }
 
     if (! params.skip_usher ) {
         hard_update_usher_tree(ch_protected, ch_expanded_tree, ch_protobuf, ch_expanded_metadata)
         ch_full_tree = hard_update_usher_tree.out.tree
         ch_full_metadata = hard_update_usher_tree.out.metadata
+    } else if ( params.protobuf ){
+        ch_full_tree = ch_expanded_tree
+        ch_full_metadata = ch_expanded_metadata
     }
 
     post_process_tree(ch_full_tree, ch_full_metadata)