Let measure-complexity output the worst performing verification tas…

…ks by resource count (#5631) ### Description - Let `measure-complexity` output the worst performing verification tasks by resource count - Add an option `--top-x` to `measure-complexity` to configure how many of the worst tasks are shown - Change the default of `--iterations` from 10 to 1 ### How has this been tested? - Add a CLI test <small>By submitting this pull request, I confirm that my contribution is made under the terms of the [MIT license](https://github.com/dafny-lang/dafny/blob/master/LICENSE.txt).</small>
dafny-lang · Jul 24, 2024 · e397b4f · e397b4f
1 parent 281922d
commit e397b4f
Show file tree

Hide file tree

Showing 4 changed files with 103 additions and 4 deletions.
diff --git a/Source/DafnyDriver/Commands/MeasureComplexityCommand.cs b/Source/DafnyDriver/Commands/MeasureComplexityCommand.cs
@@ -3,16 +3,20 @@
 using System.CommandLine;
 using System.Linq;
 using System.Reactive.Subjects;
+using System.Threading;
 using System.Threading.Tasks;
 using DafnyCore;
 using DafnyDriver.Commands;
+using Microsoft.Boogie;
+using VC;
 
 namespace Microsoft.Dafny;
 
 static class MeasureComplexityCommand {
   public static IEnumerable<Option> Options => new Option[] {
     Iterations,
     RandomSeed,
+    TopX,
     VerifyCommand.FilterSymbol,
     VerifyCommand.FilterPosition,
   }.Concat(DafnyCommands.VerificationOptions).
@@ -24,12 +28,16 @@ static MeasureComplexityCommand() {
 
     DooFile.RegisterNoChecksNeeded(Iterations, false);
     DooFile.RegisterNoChecksNeeded(RandomSeed, false);
+    DooFile.RegisterNoChecksNeeded(TopX, false);
   }
 
+  private static readonly Option<uint> TopX = new("--worst-amount", () => 10U,
+    $"Configures the amount of worst performing verification tasks that are reported.");
+
   private static readonly Option<uint> RandomSeed = new("--random-seed", () => 0U,
     $"Turn on randomization of the input that Dafny passes to the SMT solver and turn on randomization in the SMT solver itself. Certain Dafny proofs are complex in the sense that changes to the proof that preserve its meaning may cause its verification result to change. This option simulates meaning-preserving changes to the proofs without requiring the user to actually make those changes. The proof changes are renaming variables and reordering declarations in the SMT input passed to the solver, and setting solver options that have similar effects.");
 
-  private static readonly Option<uint> Iterations = new("--iterations", () => 10U,
+  private static readonly Option<uint> Iterations = new("--iterations", () => 1U,
     $"Attempt to verify each proof n times with n random seeds, each seed derived from the previous one. {RandomSeed.Name} can be used to specify the first seed, which will otherwise be 0.") {
     ArgumentHelpName = "n"
   };
@@ -62,7 +70,7 @@ private static async Task<int> Execute(DafnyOptions options) {
       // For error diagnostics, we should group duplicates and say how often they occur.
       // Performance data of individual verification tasks (VCs) should be grouped by VcNum (the assertion batch).
       VerifyCommand.ReportVerificationDiagnostics(compilation, verificationResults);
-      var summaryReported = VerifyCommand.ReportVerificationSummary(compilation, verificationResults);
+      var summaryReported = ReportResourceSummary(compilation, verificationResults);
       var proofDependenciesReported = VerifyCommand.ReportProofDependencies(compilation, resolution, verificationResults);
       var verificationResultsLogged = VerifyCommand.LogVerificationResults(compilation, resolution, verificationResults);
 
@@ -75,13 +83,47 @@ private static async Task<int> Execute(DafnyOptions options) {
     return await compilation.GetAndReportExitCode();
   }
 
+  public static async Task ReportResourceSummary(
+    CliCompilation cliCompilation,
+    IObservable<CanVerifyResult> verificationResults) {
+
+    PriorityQueue<VerificationTaskResult, int> worstPerformers = new();
+
+    var totalResources = 0;
+    var worstAmount = cliCompilation.Options.Get(TopX);
+    verificationResults.Subscribe(result => {
+      foreach (var taskResult in result.Results) {
+        var runResult = taskResult.Result;
+        totalResources += runResult.ResourceCount;
+        worstPerformers.Enqueue(taskResult, runResult.ResourceCount);
+        if (worstPerformers.Count > worstAmount) {
+          worstPerformers.Dequeue();
+        }
+      }
+    });
+    await verificationResults.WaitForComplete();
+    var output = cliCompilation.Options.OutputWriter;
+    var decreasingWorst = new Stack<VerificationTaskResult>();
+    while (worstPerformers.Count > 0) {
+      decreasingWorst.Push(worstPerformers.Dequeue());
+    }
+
+    await output.WriteLineAsync($"The total consumed resources are {totalResources}");
+    await output.WriteLineAsync($"The most demanding {worstAmount} verification tasks consumed these resources:");
+    foreach (var performer in decreasingWorst) {
+      var location = ((IToken)performer.Task.Token).TokenToString(cliCompilation.Options);
+      await output.WriteLineAsync($"{location}: {performer.Result.ResourceCount}");
+    }
+  }
+
   private static async Task RunVerificationIterations(DafnyOptions options, CliCompilation compilation,
     IObserver<CanVerifyResult> verificationResultsObserver) {
     int iterationSeed = (int)options.Get(RandomSeed);
     var random = new Random(iterationSeed);
-    foreach (var iteration in Enumerable.Range(0, (int)options.Get(Iterations))) {
+    var iterations = (int)options.Get(Iterations);
+    foreach (var iteration in Enumerable.Range(0, iterations)) {
       await options.OutputWriter.WriteLineAsync(
-        $"Starting verification of iteration {iteration} with seed {iterationSeed}");
+        $"Starting verification of iteration {iteration + 1}/{iterations} with seed {iterationSeed}");
       try {
         await foreach (var result in compilation.VerifyAllLazily(iterationSeed)) {
           verificationResultsObserver.OnNext(result);

diff --git a/Source/IntegrationTests/TestFiles/LitTests/LitTest/cli/measure-complexity.dfy b/Source/IntegrationTests/TestFiles/LitTests/LitTest/cli/measure-complexity.dfy
@@ -0,0 +1,20 @@
+// RUN: %exits-with 4 %baredafny measure-complexity --use-basename-for-filename --isolate-assertions --worst-amount 100 "%s" > %t.raw
+// RUN: %sed 's#\): \d+#): <redacted>#g' %t.raw > %t.raw2
+// RUN: %sed 's#are \d+#are <redacted>#g' %t.raw2 > %t
+// RUN: %diff "%s.expect" "%t"
+method Foo() {
+  assert Ack(0,0) == 10;
+  assert Ack(1,3) == 10;
+  assert Ack(3,3) == 10;
+  assert Ack(3,4) == 10;
+}
+
+function Ack(m: nat, n: nat): nat
+{
+  if m == 0 then
+    n + 1
+  else if n == 0 then
+    Ack(m - 1, 1)
+  else
+    Ack(m - 1, Ack(m, n - 1))
+}
diff --git a/Source/IntegrationTests/TestFiles/LitTests/LitTest/cli/measure-complexity.dfy.expect b/Source/IntegrationTests/TestFiles/LitTests/LitTest/cli/measure-complexity.dfy.expect
@@ -0,0 +1,32 @@
+Starting verification of iteration 1/1 with seed 0
+measure-complexity.dfy(6,18): Error: assertion might not hold
+The total consumed resources are <redacted>
+The most demanding 100 verification tasks consumed these resources:
+measure-complexity.dfy(9,18): <redacted>
+measure-complexity.dfy(8,18): <redacted>
+measure-complexity.dfy(9,15): <redacted>
+measure-complexity.dfy(7,18): <redacted>
+measure-complexity.dfy(19,4): <redacted>
+measure-complexity.dfy(6,18): <redacted>
+measure-complexity.dfy(7,15): <redacted>
+measure-complexity.dfy(19,10): <redacted>
+measure-complexity.dfy(7,13): <redacted>
+measure-complexity.dfy(17,4): <redacted>
+measure-complexity.dfy(17,15): <redacted>
+measure-complexity.dfy(19,15): <redacted>
+measure-complexity.dfy(19,24): <redacted>
+measure-complexity.dfy(17,10): <redacted>
+measure-complexity.dfy(15,6): <redacted>
+measure-complexity.dfy(19,4): <redacted>
+measure-complexity.dfy(19,4): <redacted>
+measure-complexity.dfy(9,13): <redacted>
+measure-complexity.dfy(17,4): <redacted>
+measure-complexity.dfy(6,13): <redacted>
+measure-complexity.dfy(8,15): <redacted>
+measure-complexity.dfy(17,4): <redacted>
+measure-complexity.dfy(8,13): <redacted>
+measure-complexity.dfy(19,15): <redacted>
+measure-complexity.dfy(6,15): <redacted>
+measure-complexity.dfy(19,15): <redacted>
+measure-complexity.dfy(12,9): <redacted>
+measure-complexity.dfy(5,7): <redacted>
diff --git a/docs/dev/news/5631.feat b/docs/dev/news/5631.feat
@@ -0,0 +1,5 @@
+Let the command `measure-complexity` output which verification tasks performed the worst in terms of resource count. Output looks like:
+...
+Verification task on line 8 in file measure-complexity.dfy consumed 9984 resources
+Verification task on line 7 in file measure-complexity.dfy consumed 9065 resources
+...