Skip to content

Commit

Permalink
Let measure-complexity output the worst performing verification tas…
Browse files Browse the repository at this point in the history
…ks by resource count (#5631)

### Description
- Let `measure-complexity` output the worst performing verification
tasks by resource count
- Add an option `--top-x` to `measure-complexity` to configure how many
of the worst tasks are shown
- Change the default of `--iterations` from 10 to 1

### How has this been tested?
- Add a CLI test

<small>By submitting this pull request, I confirm that my contribution
is made under the terms of the [MIT
license](https://github.com/dafny-lang/dafny/blob/master/LICENSE.txt).</small>
  • Loading branch information
keyboardDrummer authored Jul 24, 2024
1 parent 281922d commit e397b4f
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 4 deletions.
50 changes: 46 additions & 4 deletions Source/DafnyDriver/Commands/MeasureComplexityCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
using System.CommandLine;
using System.Linq;
using System.Reactive.Subjects;
using System.Threading;
using System.Threading.Tasks;
using DafnyCore;
using DafnyDriver.Commands;
using Microsoft.Boogie;
using VC;

namespace Microsoft.Dafny;

static class MeasureComplexityCommand {
public static IEnumerable<Option> Options => new Option[] {
Iterations,
RandomSeed,
TopX,
VerifyCommand.FilterSymbol,
VerifyCommand.FilterPosition,
}.Concat(DafnyCommands.VerificationOptions).
Expand All @@ -24,12 +28,16 @@ static MeasureComplexityCommand() {

DooFile.RegisterNoChecksNeeded(Iterations, false);
DooFile.RegisterNoChecksNeeded(RandomSeed, false);
DooFile.RegisterNoChecksNeeded(TopX, false);
}

private static readonly Option<uint> TopX = new("--worst-amount", () => 10U,
$"Configures the amount of worst performing verification tasks that are reported.");

private static readonly Option<uint> RandomSeed = new("--random-seed", () => 0U,
$"Turn on randomization of the input that Dafny passes to the SMT solver and turn on randomization in the SMT solver itself. Certain Dafny proofs are complex in the sense that changes to the proof that preserve its meaning may cause its verification result to change. This option simulates meaning-preserving changes to the proofs without requiring the user to actually make those changes. The proof changes are renaming variables and reordering declarations in the SMT input passed to the solver, and setting solver options that have similar effects.");

private static readonly Option<uint> Iterations = new("--iterations", () => 10U,
private static readonly Option<uint> Iterations = new("--iterations", () => 1U,
$"Attempt to verify each proof n times with n random seeds, each seed derived from the previous one. {RandomSeed.Name} can be used to specify the first seed, which will otherwise be 0.") {
ArgumentHelpName = "n"
};
Expand Down Expand Up @@ -62,7 +70,7 @@ private static async Task<int> Execute(DafnyOptions options) {
// For error diagnostics, we should group duplicates and say how often they occur.
// Performance data of individual verification tasks (VCs) should be grouped by VcNum (the assertion batch).
VerifyCommand.ReportVerificationDiagnostics(compilation, verificationResults);
var summaryReported = VerifyCommand.ReportVerificationSummary(compilation, verificationResults);
var summaryReported = ReportResourceSummary(compilation, verificationResults);
var proofDependenciesReported = VerifyCommand.ReportProofDependencies(compilation, resolution, verificationResults);
var verificationResultsLogged = VerifyCommand.LogVerificationResults(compilation, resolution, verificationResults);

Expand All @@ -75,13 +83,47 @@ private static async Task<int> Execute(DafnyOptions options) {
return await compilation.GetAndReportExitCode();
}

public static async Task ReportResourceSummary(
CliCompilation cliCompilation,
IObservable<CanVerifyResult> verificationResults) {

PriorityQueue<VerificationTaskResult, int> worstPerformers = new();

var totalResources = 0;
var worstAmount = cliCompilation.Options.Get(TopX);
verificationResults.Subscribe(result => {
foreach (var taskResult in result.Results) {
var runResult = taskResult.Result;
totalResources += runResult.ResourceCount;
worstPerformers.Enqueue(taskResult, runResult.ResourceCount);
if (worstPerformers.Count > worstAmount) {
worstPerformers.Dequeue();
}
}
});
await verificationResults.WaitForComplete();
var output = cliCompilation.Options.OutputWriter;
var decreasingWorst = new Stack<VerificationTaskResult>();
while (worstPerformers.Count > 0) {
decreasingWorst.Push(worstPerformers.Dequeue());
}

await output.WriteLineAsync($"The total consumed resources are {totalResources}");
await output.WriteLineAsync($"The most demanding {worstAmount} verification tasks consumed these resources:");
foreach (var performer in decreasingWorst) {
var location = ((IToken)performer.Task.Token).TokenToString(cliCompilation.Options);
await output.WriteLineAsync($"{location}: {performer.Result.ResourceCount}");
}
}

private static async Task RunVerificationIterations(DafnyOptions options, CliCompilation compilation,
IObserver<CanVerifyResult> verificationResultsObserver) {
int iterationSeed = (int)options.Get(RandomSeed);
var random = new Random(iterationSeed);
foreach (var iteration in Enumerable.Range(0, (int)options.Get(Iterations))) {
var iterations = (int)options.Get(Iterations);
foreach (var iteration in Enumerable.Range(0, iterations)) {
await options.OutputWriter.WriteLineAsync(
$"Starting verification of iteration {iteration} with seed {iterationSeed}");
$"Starting verification of iteration {iteration + 1}/{iterations} with seed {iterationSeed}");
try {
await foreach (var result in compilation.VerifyAllLazily(iterationSeed)) {
verificationResultsObserver.OnNext(result);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// RUN: %exits-with 4 %baredafny measure-complexity --use-basename-for-filename --isolate-assertions --worst-amount 100 "%s" > %t.raw
// RUN: %sed 's#\): \d+#): <redacted>#g' %t.raw > %t.raw2
// RUN: %sed 's#are \d+#are <redacted>#g' %t.raw2 > %t
// RUN: %diff "%s.expect" "%t"
method Foo() {
assert Ack(0,0) == 10;
assert Ack(1,3) == 10;
assert Ack(3,3) == 10;
assert Ack(3,4) == 10;
}

function Ack(m: nat, n: nat): nat
{
if m == 0 then
n + 1
else if n == 0 then
Ack(m - 1, 1)
else
Ack(m - 1, Ack(m, n - 1))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Starting verification of iteration 1/1 with seed 0
measure-complexity.dfy(6,18): Error: assertion might not hold
The total consumed resources are <redacted>
The most demanding 100 verification tasks consumed these resources:
measure-complexity.dfy(9,18): <redacted>
measure-complexity.dfy(8,18): <redacted>
measure-complexity.dfy(9,15): <redacted>
measure-complexity.dfy(7,18): <redacted>
measure-complexity.dfy(19,4): <redacted>
measure-complexity.dfy(6,18): <redacted>
measure-complexity.dfy(7,15): <redacted>
measure-complexity.dfy(19,10): <redacted>
measure-complexity.dfy(7,13): <redacted>
measure-complexity.dfy(17,4): <redacted>
measure-complexity.dfy(17,15): <redacted>
measure-complexity.dfy(19,15): <redacted>
measure-complexity.dfy(19,24): <redacted>
measure-complexity.dfy(17,10): <redacted>
measure-complexity.dfy(15,6): <redacted>
measure-complexity.dfy(19,4): <redacted>
measure-complexity.dfy(19,4): <redacted>
measure-complexity.dfy(9,13): <redacted>
measure-complexity.dfy(17,4): <redacted>
measure-complexity.dfy(6,13): <redacted>
measure-complexity.dfy(8,15): <redacted>
measure-complexity.dfy(17,4): <redacted>
measure-complexity.dfy(8,13): <redacted>
measure-complexity.dfy(19,15): <redacted>
measure-complexity.dfy(6,15): <redacted>
measure-complexity.dfy(19,15): <redacted>
measure-complexity.dfy(12,9): <redacted>
measure-complexity.dfy(5,7): <redacted>
5 changes: 5 additions & 0 deletions docs/dev/news/5631.feat
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Let the command `measure-complexity` output which verification tasks performed the worst in terms of resource count. Output looks like:
...
Verification task on line 8 in file measure-complexity.dfy consumed 9984 resources
Verification task on line 7 in file measure-complexity.dfy consumed 9065 resources
...

0 comments on commit e397b4f

Please sign in to comment.